Skip to content
Snippets Groups Projects
Commit 8397b73a authored by Hugo Trentesaux's avatar Hugo Trentesaux
Browse files

use key when iterating over diff

parent f5f51fcc
Branches
No related tags found
No related merge requests found
Migrate ipfs repo after upgrade
```sh
docker run -it -v datapod_kubo_data:/data/ipfs -v ./emptydir:/container-init.d h30x/datapod-kubo:latest daemon --migrate=true
```
\ No newline at end of file
...@@ -29,7 +29,7 @@ export const GLOB_processQueue: Array<[CID, IndexRequest]> = [] ...@@ -29,7 +29,7 @@ export const GLOB_processQueue: Array<[CID, IndexRequest]> = []
let GLOB_lockTree: boolean = false let GLOB_lockTree: boolean = false
// queue of index requests waiting to be added to the tree // queue of index requests waiting to be added to the tree
// these are the requests coming from other pods // these are the requests coming from other pods
const GLOB_mergeQueue: Array<[string, CID]> = [] // FIXME at the moment the key is not used const GLOB_mergeQueue: Array<[string, CID]> = []
// queue of DiffData waiting to be processed // queue of DiffData waiting to be processed
const GLOB_diffQueue: Array<DiffData> = [] const GLOB_diffQueue: Array<DiffData> = []
// lock to prevent multiple database edits which would fake the last root cid // lock to prevent multiple database edits which would fake the last root cid
...@@ -173,10 +173,10 @@ export async function computeDiff(fromCID: CID, toCID: CID): Promise<void> { ...@@ -173,10 +173,10 @@ export async function computeDiff(fromCID: CID, toCID: CID): Promise<void> {
// iterate over all index requests of diff // iterate over all index requests of diff
const iterator = getDiff(fromCID, toCID) const iterator = getDiff(fromCID, toCID)
let num = 0 let num = 0
for await (const leaf of iterator) { for await (const item of iterator) {
for (let irCID of leaf) { for (let irCID of item.leaf) {
// add it to the process queue to be added in the tree // add it to the process queue to be added in the tree
GLOB_mergeQueue.push(['key', irCID]) // FIXME get key while iterating GLOB_mergeQueue.push([item.key, irCID])
num += 1 num += 1
} }
// make sure that collection is triggered regularly // make sure that collection is triggered regularly
...@@ -207,7 +207,7 @@ export async function takeFromMergeQueue() { ...@@ -207,7 +207,7 @@ export async function takeFromMergeQueue() {
return return
} }
// sort merge queue by key to give a better chance of good batching // sort merge queue by key to give a better chance of good batching
// mergeQueue.sort() // FIXME while key is not there, we can not sort GLOB_mergeQueue.sort()
// number of items retreived // number of items retreived
let num = 0 let num = 0
// item taker // item taker
......
...@@ -5,7 +5,7 @@ import { BASE, KEYSIZE } from './consts' ...@@ -5,7 +5,7 @@ import { BASE, KEYSIZE } from './consts'
import assert from 'assert' import assert from 'assert'
// interface to easily iterate over the AMT and get all CIDs inside leaves // interface to easily iterate over the AMT and get all CIDs inside leaves
export async function* getAll(cid: CID): AsyncIterable<CID[]> { export async function* getAll(cid: CID): AsyncIterable<IndexLeaf> {
const result = await kubo.dag.get(cid).catch((e) => { const result = await kubo.dag.get(cid).catch((e) => {
console.log('🕰 could not retreive ' + cid) console.log('🕰 could not retreive ' + cid)
console.log(e) console.log(e)
...@@ -13,16 +13,16 @@ export async function* getAll(cid: CID): AsyncIterable<CID[]> { ...@@ -13,16 +13,16 @@ export async function* getAll(cid: CID): AsyncIterable<CID[]> {
}) })
if (result == null) { if (result == null) {
// could not retreive this node, we are done iterating and return an emtpy array of CID // could not retreive this node, we are done iterating and return an emtpy array of CID
return [] return { key: "", val: [] }
} }
const node = result.value const node = result.value
if (node.leaf) { if (node.leaf) {
yield node.leaf yield node as IndexLeaf
} else { } else {
for (let b of node.children) { for (let b of node.children) {
if (b) { if (b) {
for await (const leaf of getAll(b[1])) { for await (const item of getAll(b[1])) {
yield leaf yield item
} }
} }
} }
...@@ -32,7 +32,7 @@ export async function* getAll(cid: CID): AsyncIterable<CID[]> { ...@@ -32,7 +32,7 @@ export async function* getAll(cid: CID): AsyncIterable<CID[]> {
// interface to easily iterate over diff of two AMT // interface to easily iterate over diff of two AMT
// similar to "compareInodes" below, but focused on what's new in cid2 and iterate all the way down to the index requests // similar to "compareInodes" below, but focused on what's new in cid2 and iterate all the way down to the index requests
// iterate over all CIDs of index requests // iterate over all CIDs of index requests
export async function* getDiff(cid1: CID, cid2: CID): AsyncIterable<CID[]> { export async function* getDiff(cid1: CID, cid2: CID): AsyncIterable<IndexLeaf> {
const [node1, node2] = await Promise.all([kubo.dag.get(cid1), kubo.dag.get(cid2)]) // .catch() can not compare when can not retreive const [node1, node2] = await Promise.all([kubo.dag.get(cid1), kubo.dag.get(cid2)]) // .catch() can not compare when can not retreive
const [leaf1, leaf2] = [node1.value as IndexLeaf, node2.value as IndexLeaf] const [leaf1, leaf2] = [node1.value as IndexLeaf, node2.value as IndexLeaf]
const [inode1, inode2] = [node1.value as IndexInode, node2.value as IndexInode] const [inode1, inode2] = [node1.value as IndexInode, node2.value as IndexInode]
...@@ -42,7 +42,8 @@ export async function* getDiff(cid1: CID, cid2: CID): AsyncIterable<CID[]> { ...@@ -42,7 +42,8 @@ export async function* getDiff(cid1: CID, cid2: CID): AsyncIterable<CID[]> {
if (added1.length != 0) { if (added1.length != 0) {
// console.debug('ignoring missing index request ' + added1) // console.debug('ignoring missing index request ' + added1)
} else { } else {
yield added2.map(([_k, v]) => v) // we put in leaf only the ones that were added
yield { key: leaf1.key, leaf: added2.map(([_k, v]) => v) }
} }
} else if (inode1.children && inode2.children) { } else if (inode1.children && inode2.children) {
// do the inode comparison // do the inode comparison
...@@ -53,7 +54,7 @@ export async function* getDiff(cid1: CID, cid2: CID): AsyncIterable<CID[]> { ...@@ -53,7 +54,7 @@ export async function* getDiff(cid1: CID, cid2: CID): AsyncIterable<CID[]> {
} }
// same as getDiff, but operates on index internal nodes only, not cids that can represent something else // same as getDiff, but operates on index internal nodes only, not cids that can represent something else
async function* getDiffInodes(inode1: IndexInode, inode2: IndexInode): AsyncIterable<CID[]> { async function* getDiffInodes(inode1: IndexInode, inode2: IndexInode): AsyncIterable<IndexLeaf> {
assert(inode1.ctx == inode2.ctx) assert(inode1.ctx == inode2.ctx)
const ctx = inode1.ctx const ctx = inode1.ctx
// iterate over nodes children // iterate over nodes children
......
...@@ -15,20 +15,21 @@ const fromCID = CID.parse('bafyreifqojtso7fkz2qg3d5p432jbcjgzah2bmvcvowfhbvn4dow ...@@ -15,20 +15,21 @@ const fromCID = CID.parse('bafyreifqojtso7fkz2qg3d5p432jbcjgzah2bmvcvowfhbvn4dow
const toCID = CID.parse('bafyreihwmnq3wtfgbe7mlbwkztasqfcr5auopxwgmw6bnmobhfccgolayu') const toCID = CID.parse('bafyreihwmnq3wtfgbe7mlbwkztasqfcr5auopxwgmw6bnmobhfccgolayu')
const iterator = getDiff(fromCID, toCID) const iterator = getDiff(fromCID, toCID)
// const iterator = getDiff(toCID, fromCID)
const BATCH_SIZE = 1000 const BATCH_SIZE = 1000
async function doit() { async function doit() {
let num = 0 let num = 0
for await (const leaf of iterator) { for await (const item of iterator) {
for (let irCID of leaf) { for (let irCID of item.leaf) {
num += 1 num += 1
if (num % BATCH_SIZE == 0) { if (num % BATCH_SIZE == 0) {
console.log(num) console.log(num)
} }
await setTimeout(1000) // wait 1 sec await setTimeout(1000) // wait 1 sec
console.log(irCID.toString()) console.log(item.key, irCID.toString())
} }
} }
} }
......
...@@ -6,11 +6,7 @@ import { CESIUM_PLUS_PROFILE_IMPORT, CESIUM_PLUS_PROFILE_INSERT } from '../const ...@@ -6,11 +6,7 @@ import { CESIUM_PLUS_PROFILE_IMPORT, CESIUM_PLUS_PROFILE_INSERT } from '../const
const LOG_EVERY = 1000 const LOG_EVERY = 1000
// const cid = CID.parse('bafyreifdwwsnv4p2ag7egt2hjbxne63u2mfbstvnjde4b6blfvijhpiuri') // 10 latest const cid = CID.parse('bafyreidnn24efeospk67qbaj7wipug3ewkqzleniczt4tsdsevplz5ynwe')
// const cid = CID.parse('bafyreifsq3dtwbilnccpmjlgenqofhhgs4o5duh3mj6j5jiknmg3rdxl5a')
const cid = CID.parse('bafyreic6ozgzlzkp6kkrvhqubttz47mkix36ms6r5inanhsmd6d3mwi7cy')
const iterator = getAll(cid) const iterator = getAll(cid)
...@@ -18,18 +14,16 @@ console.log('in', cid) ...@@ -18,18 +14,16 @@ console.log('in', cid)
async function doit() { async function doit() {
let num = 0 let num = 0
for await (const leaf of iterator) { for await (const item of iterator) {
for (let item of leaf) { for (let irCID of item.leaf) {
num += 1 num += 1
// const [k, irCID] = item
const irCID = item
kubo.dag kubo.dag
.get(irCID) .get(irCID)
.then((res) => res.value) .then((res) => res.value)
.then((r) => { .then((r) => {
const ir = r as IndexRequest const ir = r as IndexRequest
if (ir.kind != CESIUM_PLUS_PROFILE_IMPORT) { if (ir.kind != CESIUM_PLUS_PROFILE_IMPORT) {
console.log(irCID.toString()) console.log(item.key, irCID.toString())
} }
}) })
if (num % LOG_EVERY == 0) { if (num % LOG_EVERY == 0) {
...@@ -41,3 +35,5 @@ async function doit() { ...@@ -41,3 +35,5 @@ async function doit() {
} }
doit() doit()
// iterate over all nodes and show the ones that are not cesium plus imports
\ No newline at end of file
...@@ -12,11 +12,3 @@ const cid = CID.parse('bafyreiay2zpectyuxb4d5nxxkwcpdk76ivrm36qmrutpbaaxldh6yt46 ...@@ -12,11 +12,3 @@ const cid = CID.parse('bafyreiay2zpectyuxb4d5nxxkwcpdk76ivrm36qmrutpbaaxldh6yt46
// const irCID = CID.parse('bafyreifpzcrghku22vp5oqzqgj4v3hxfhewe5d23kqdkhnkfapnrjgdoje') // const irCID = CID.parse('bafyreifpzcrghku22vp5oqzqgj4v3hxfhewe5d23kqdkhnkfapnrjgdoje')
// const ir = await kubo.dag.get(irCID) // const ir = await kubo.dag.get(irCID)
// await handleInsertRequest(irCID, ir.value) // await handleInsertRequest(irCID, ir.value)
// // example to iterate over all
// for await (const leaf of getAll(cid)) {
// const ir: IndexRequest = (await kubo.dag.get(leaf[0])).value
// const data = (await kubo.dag.get(ir.data!)).value
// console.log(data.title)
// console.log(data.description)
// }
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment