diff --git a/doc/ipfs.md b/doc/ipfs.md new file mode 100644 index 0000000000000000000000000000000000000000..c29e712f28b0f5d98d3f05273afc4d8b8bf30ae4 --- /dev/null +++ b/doc/ipfs.md @@ -0,0 +1,5 @@ +Migrate ipfs repo after upgrade + +```sh +docker run -it -v datapod_kubo_data:/data/ipfs -v ./emptydir:/container-init.d h30x/datapod-kubo:latest daemon --migrate=true +``` \ No newline at end of file diff --git a/src/indexer/handlers.ts b/src/indexer/handlers.ts index 2372dfa2244f8f818ff2c6760ee59f173d6697d4..18b437d8421be8cc954c1e60011d21638f419b50 100644 --- a/src/indexer/handlers.ts +++ b/src/indexer/handlers.ts @@ -29,7 +29,7 @@ export const GLOB_processQueue: Array<[CID, IndexRequest]> = [] let GLOB_lockTree: boolean = false // queue of index requests waiting to be added to the tree // these are the requests coming from other pods -const GLOB_mergeQueue: Array<[string, CID]> = [] // FIXME at the moment the key is not used +const GLOB_mergeQueue: Array<[string, CID]> = [] // queue of DiffData waiting to be processed const GLOB_diffQueue: Array<DiffData> = [] // lock to prevent multiple database edits which would fake the last root cid @@ -173,10 +173,10 @@ export async function computeDiff(fromCID: CID, toCID: CID): Promise<void> { // iterate over all index requests of diff const iterator = getDiff(fromCID, toCID) let num = 0 - for await (const leaf of iterator) { - for (let irCID of leaf) { + for await (const item of iterator) { + for (let irCID of item.leaf) { // add it to the process queue to be added in the tree - GLOB_mergeQueue.push(['key', irCID]) // FIXME get key while iterating + GLOB_mergeQueue.push([item.key, irCID]) num += 1 } // make sure that collection is triggered regularly @@ -207,7 +207,7 @@ export async function takeFromMergeQueue() { return } // sort merge queue by key to give a better chance of good batching - // mergeQueue.sort() // FIXME while key is not there, we can not sort + GLOB_mergeQueue.sort() // number of items retreived let num = 0 // item taker diff --git a/src/interface.ts b/src/interface.ts index d8097f17124fb71f8eb80d2bd613d83008df5eb1..4792a5118d4bb6892bb4262b9b467e05abbd3b43 100644 --- a/src/interface.ts +++ b/src/interface.ts @@ -5,7 +5,7 @@ import { BASE, KEYSIZE } from './consts' import assert from 'assert' // interface to easily iterate over the AMT and get all CIDs inside leaves -export async function* getAll(cid: CID): AsyncIterable<CID[]> { +export async function* getAll(cid: CID): AsyncIterable<IndexLeaf> { const result = await kubo.dag.get(cid).catch((e) => { console.log('🕰 could not retreive ' + cid) console.log(e) @@ -13,16 +13,16 @@ export async function* getAll(cid: CID): AsyncIterable<CID[]> { }) if (result == null) { // could not retreive this node, we are done iterating and return an emtpy array of CID - return [] + return { key: "", val: [] } } const node = result.value if (node.leaf) { - yield node.leaf + yield node as IndexLeaf } else { for (let b of node.children) { if (b) { - for await (const leaf of getAll(b[1])) { - yield leaf + for await (const item of getAll(b[1])) { + yield item } } } @@ -32,7 +32,7 @@ export async function* getAll(cid: CID): AsyncIterable<CID[]> { // interface to easily iterate over diff of two AMT // similar to "compareInodes" below, but focused on what's new in cid2 and iterate all the way down to the index requests // iterate over all CIDs of index requests -export async function* getDiff(cid1: CID, cid2: CID): AsyncIterable<CID[]> { +export async function* getDiff(cid1: CID, cid2: CID): AsyncIterable<IndexLeaf> { const [node1, node2] = await Promise.all([kubo.dag.get(cid1), kubo.dag.get(cid2)]) // .catch() can not compare when can not retreive const [leaf1, leaf2] = [node1.value as IndexLeaf, node2.value as IndexLeaf] const [inode1, inode2] = [node1.value as IndexInode, node2.value as IndexInode] @@ -42,7 +42,8 @@ export async function* getDiff(cid1: CID, cid2: CID): AsyncIterable<CID[]> { if (added1.length != 0) { // console.debug('ignoring missing index request ' + added1) } else { - yield added2.map(([_k, v]) => v) + // we put in leaf only the ones that were added + yield { key: leaf1.key, leaf: added2.map(([_k, v]) => v) } } } else if (inode1.children && inode2.children) { // do the inode comparison @@ -53,7 +54,7 @@ export async function* getDiff(cid1: CID, cid2: CID): AsyncIterable<CID[]> { } // same as getDiff, but operates on index internal nodes only, not cids that can represent something else -async function* getDiffInodes(inode1: IndexInode, inode2: IndexInode): AsyncIterable<CID[]> { +async function* getDiffInodes(inode1: IndexInode, inode2: IndexInode): AsyncIterable<IndexLeaf> { assert(inode1.ctx == inode2.ctx) const ctx = inode1.ctx // iterate over nodes children diff --git a/src/scripts/diff.ts b/src/scripts/diff.ts index 97c4aeb59a902ef5dd4c7ec3c338c825cf634bc6..114cf4735eb41ae5398d90615b2bafab9cc075d5 100644 --- a/src/scripts/diff.ts +++ b/src/scripts/diff.ts @@ -15,20 +15,21 @@ const fromCID = CID.parse('bafyreifqojtso7fkz2qg3d5p432jbcjgzah2bmvcvowfhbvn4dow const toCID = CID.parse('bafyreihwmnq3wtfgbe7mlbwkztasqfcr5auopxwgmw6bnmobhfccgolayu') const iterator = getDiff(fromCID, toCID) +// const iterator = getDiff(toCID, fromCID) const BATCH_SIZE = 1000 async function doit() { let num = 0 - for await (const leaf of iterator) { - for (let irCID of leaf) { + for await (const item of iterator) { + for (let irCID of item.leaf) { num += 1 if (num % BATCH_SIZE == 0) { console.log(num) } await setTimeout(1000) // wait 1 sec - console.log(irCID.toString()) + console.log(item.key, irCID.toString()) } } } diff --git a/src/scripts/getAll.ts b/src/scripts/getAll.ts index dcbf3a1dc54a0ad3bbaf2bb56cf41beccc6c8f4f..6e8077b53fc8f842f51f02d13e8f8246d8cc0b10 100644 --- a/src/scripts/getAll.ts +++ b/src/scripts/getAll.ts @@ -6,11 +6,7 @@ import { CESIUM_PLUS_PROFILE_IMPORT, CESIUM_PLUS_PROFILE_INSERT } from '../const const LOG_EVERY = 1000 -// const cid = CID.parse('bafyreifdwwsnv4p2ag7egt2hjbxne63u2mfbstvnjde4b6blfvijhpiuri') // 10 latest -// const cid = CID.parse('bafyreifsq3dtwbilnccpmjlgenqofhhgs4o5duh3mj6j5jiknmg3rdxl5a') -const cid = CID.parse('bafyreic6ozgzlzkp6kkrvhqubttz47mkix36ms6r5inanhsmd6d3mwi7cy') - - +const cid = CID.parse('bafyreidnn24efeospk67qbaj7wipug3ewkqzleniczt4tsdsevplz5ynwe') const iterator = getAll(cid) @@ -18,18 +14,16 @@ console.log('in', cid) async function doit() { let num = 0 - for await (const leaf of iterator) { - for (let item of leaf) { + for await (const item of iterator) { + for (let irCID of item.leaf) { num += 1 - // const [k, irCID] = item - const irCID = item kubo.dag .get(irCID) .then((res) => res.value) .then((r) => { const ir = r as IndexRequest if (ir.kind != CESIUM_PLUS_PROFILE_IMPORT) { - console.log(irCID.toString()) + console.log(item.key, irCID.toString()) } }) if (num % LOG_EVERY == 0) { @@ -41,3 +35,5 @@ async function doit() { } doit() + +// iterate over all nodes and show the ones that are not cesium plus imports \ No newline at end of file diff --git a/src/scripts/index-database.ts b/src/scripts/index-database.ts index 0dccb9dfaa1cbf8976e75021decc3a1fa708348b..de935051562383e92447e151886e6f8815403c79 100644 --- a/src/scripts/index-database.ts +++ b/src/scripts/index-database.ts @@ -12,11 +12,3 @@ const cid = CID.parse('bafyreiay2zpectyuxb4d5nxxkwcpdk76ivrm36qmrutpbaaxldh6yt46 // const irCID = CID.parse('bafyreifpzcrghku22vp5oqzqgj4v3hxfhewe5d23kqdkhnkfapnrjgdoje') // const ir = await kubo.dag.get(irCID) // await handleInsertRequest(irCID, ir.value) - -// // example to iterate over all -// for await (const leaf of getAll(cid)) { -// const ir: IndexRequest = (await kubo.dag.get(leaf[0])).value -// const data = (await kubo.dag.get(ir.data!)).value -// console.log(data.title) -// console.log(data.description) -// }