Skip to content
Snippets Groups Projects
Commit 858cdf8a authored by Hugo Trentesaux's avatar Hugo Trentesaux
Browse files

prepare merge algo for C+ import

parent 37ce4cd5
No related branches found
No related tags found
No related merge requests found
import { CID } from 'multiformats' import { CID } from 'multiformats'
import { kubo } from './kubo' import { kubo } from './kubo'
import { IPNS, BASE, IPNS_HIST, KEYSIZE } from './consts' import { IPNS, BASE, IPNS_HIST, KEYSIZE } from './consts'
import { type IndexInode, emptyInode, type IndexLeaf, type IndexRequest, emptyLeaf, type IndexHist } from './types' import {
type IndexInode,
emptyInode,
type IndexLeaf,
emptyLeaf,
type IndexVinode,
type IndexRequest,
type IndexHist
} from './types'
// convert timestamp to key
function timestampToKey(timestamp: number): string {
return timestamp.toString(BASE).padStart(KEYSIZE, '0')
}
// add cid to index queue // add cid to index queue
export async function addToIndexQueue(cid: CID, indexRequest: IndexRequest) { export async function addToIndexQueue(cid: CID, indexRequest: IndexRequest) {
...@@ -23,21 +36,38 @@ export async function addToIndexQueue(cid: CID, indexRequest: IndexRequest) { ...@@ -23,21 +36,38 @@ export async function addToIndexQueue(cid: CID, indexRequest: IndexRequest) {
} }
} }
// returns a process function suitable for processInode that simply inserts request in a leaf
function insertRequest(indexRequestCid: CID): ProcessFunction {
return (maybeLeaf) => {
if (maybeLeaf == null) {
// in this case we want to create a new leaf
return processLeaf(emptyLeaf(), indexRequestCid)
} else {
// in this case we want to insert indexRequestCid in existing leaf
return processLeaf(maybeLeaf, indexRequestCid)
}
}
}
// simplest way to insert index request given its CID // simplest way to insert index request given its CID
// rootCid: root node of the AMT to add the index request in // rootCid: root node of the AMT to add the index request in
// indexRequestCid: index request to add // indexRequestCid: index request to add
// returns the new root cid // returns the new root cid
export async function insertIndexRequest(rootCid: CID, indexRequestCid: CID): Promise<CID> { export async function insertIndexRequest(rootCid: CID, indexRequestCid: CID): Promise<CID> {
const [rootDag, indexDag] = await Promise.all([kubo.dag.get(rootCid), kubo.dag.get(indexRequestCid)]) const [rootDag, indexDag] = await Promise.all([kubo.dag.get(rootCid), kubo.dag.get(indexRequestCid)])
const key = indexDag.value.timestamp.toString(BASE).padStart(KEYSIZE, '0') const key = timestampToKey(indexDag.value.timestamp)
return processInode(rootDag.value, key, indexRequestCid) return processInode(rootDag.value, key, insertRequest(indexRequestCid))
} }
// same as above but with known request // same as above but with known request
export async function insertKnownIndexRequest(rootCid: CID, indexRequestCid: CID, indexRequest: IndexRequest): Promise<CID> { export async function insertKnownIndexRequest(
const key = indexRequest.timestamp.toString(BASE).padStart(KEYSIZE, '0') rootCid: CID,
indexRequestCid: CID,
indexRequest: IndexRequest
): Promise<CID> {
const key = timestampToKey(indexRequest.timestamp)
const rootDag = await kubo.dag.get(rootCid) const rootDag = await kubo.dag.get(rootCid)
return processInode(rootDag.value, key, indexRequestCid) return processInode(rootDag.value, key, insertRequest(indexRequestCid))
} }
async function resolveHist(): Promise<CID> { async function resolveHist(): Promise<CID> {
...@@ -64,14 +94,27 @@ function publishHistory(cid: CID) { ...@@ -64,14 +94,27 @@ function publishHistory(cid: CID) {
}) })
} }
async function processInode(node: IndexInode, key: string, val: CID): Promise<CID> { // function used to process node
export interface ProcessFunction {
(maybeLeaf: null | IndexLeaf): Promise<CID>
}
// return bucket corresponding to given letter
function bucket(letter: string): number {
return parseInt(letter[0], BASE)
}
/// process internal node
/// insert the CID returned by `func` at the position given by `key`
/// returns the CID of the resulting modified parent node
export async function processInode(node: IndexInode, key: string, func: ProcessFunction): Promise<CID> {
// console.log("key: " + key) // console.log("key: " + key)
// bucket // bucket
const b = parseInt(key[0], BASE) const b = bucket(key)
// if bucket is available, place leaf in it // if bucket is available, place leaf in it
if (node.children[b] === null) { if (node.children[b] === null) {
node.children[b] = [key, await processLeaf(emptyLeaf(), val)] node.children[b] = [key, await func(null)]
} else { } else {
// must share bucket with a node // must share bucket with a node
const [k1, cid1] = node.children[b] as [string, CID] const [k1, cid1] = node.children[b] as [string, CID]
...@@ -89,13 +132,13 @@ async function processInode(node: IndexInode, key: string, val: CID): Promise<CI ...@@ -89,13 +132,13 @@ async function processInode(node: IndexInode, key: string, val: CID): Promise<CI
// we can not enter a leaf at this stage // we can not enter a leaf at this stage
throw Error('should not enter a leaf, this should have been an end') throw Error('should not enter a leaf, this should have been an end')
} }
node.children[b] = [e.common, await processInode(enterNodeAsInode, e.nk, val)] node.children[b] = [e.common, await processInode(enterNodeAsInode, e.nk, func)]
break break
case resultType.End: case resultType.End:
console.log('end') console.log('end')
const otherLeaf = (await kubo.dag.get(cid1)).value as IndexLeaf const otherLeaf = (await kubo.dag.get(cid1)).value as IndexLeaf
node.children[b] = [k1, await processLeaf(otherLeaf, val)] node.children[b] = [k1, await func(otherLeaf)]
break break
case resultType.Diff: case resultType.Diff:
...@@ -103,7 +146,7 @@ async function processInode(node: IndexInode, key: string, val: CID): Promise<CI ...@@ -103,7 +146,7 @@ async function processInode(node: IndexInode, key: string, val: CID): Promise<CI
console.log('diff on "' + c.common + '" keys "' + c.nk1 + '" / "' + c.nk2 + '"') console.log('diff on "' + c.common + '" keys "' + c.nk1 + '" / "' + c.nk2 + '"')
const newNode = emptyInode() const newNode = emptyInode()
newNode.children[c.b1] = [c.nk1, cid1] newNode.children[c.b1] = [c.nk1, cid1]
newNode.children[c.b2] = [c.nk2, await processLeaf(emptyLeaf(), val)] newNode.children[c.b2] = [c.nk2, await func(null)]
const newNodeCid = (await kubo.dag.put(newNode)) as CID const newNodeCid = (await kubo.dag.put(newNode)) as CID
node.children[b] = [c.common, newNodeCid] node.children[b] = [c.common, newNodeCid]
break break
...@@ -115,7 +158,9 @@ async function processInode(node: IndexInode, key: string, val: CID): Promise<CI ...@@ -115,7 +158,9 @@ async function processInode(node: IndexInode, key: string, val: CID): Promise<CI
return newCid return newCid
} }
async function processLeaf(node: IndexLeaf, val: CID): Promise<CID> { /// process leaf in the tree
/// children have to be unique and ordered
export async function processLeaf(node: IndexLeaf, val: CID): Promise<CID> {
if (!node.leaf.some((c) => c.toString() == val.toString())) { if (!node.leaf.some((c) => c.toString() == val.toString())) {
// only insert if not already there (avoid duplicate) // only insert if not already there (avoid duplicate)
node.leaf.push(val) node.leaf.push(val)
...@@ -126,6 +171,74 @@ async function processLeaf(node: IndexLeaf, val: CID): Promise<CID> { ...@@ -126,6 +171,74 @@ async function processLeaf(node: IndexLeaf, val: CID): Promise<CID> {
return newCid return newCid
} }
/// merge internal nodes, synchronous implementation
// useful to merge trees
export async function mergeInodesSync(nodeA: IndexInode | IndexLeaf, nodeB: IndexVinode | IndexLeaf): Promise<CID> {
if ((nodeB as IndexLeaf).leaf) {
// these are not internal nodes, but leaves, and we should merge them
const cidSet = new Set([...(nodeA as unknown as IndexLeaf).leaf, ...(nodeB as IndexLeaf).leaf])
const cidList = Array.from(cidSet).sort()
const newLeaf: IndexLeaf = {
leaf: cidList
}
return kubo.dag.put(newLeaf) as Promise<CID>
}
// if we are not yet at the leaf level, we can safely assume that nodeA and nodeB are indeed inodes
const noda = nodeA as IndexInode
const nodb = nodeB as IndexVinode
// process each bucket sequencially
for (let b = 0; b < BASE; b++) {
const nAcb = noda.children[b]
const nBcb = nodb.children[b]
if (nAcb == null && nBcb != null) {
// we can concretize nodeB directly
const [kB, childB] = nBcb
noda.children[b] = [kB, await concretize(childB)]
} else if (nAcb != null && nBcb != null) {
// both are non null
const [kA, childA] = nAcb
const [kB, childB] = nBcb
if (kA == kB) {
const childAnode = (await kubo.dag.get(childA)).value
noda.children[b] = [kA, await mergeInodesSync(childAnode, childB)]
} else {
// both keys must have same size since we can only merge nodes from same depth
// then because they are different, the only result type is diffResult
const c = compareKey(kA, kB) as diffResult
const newNode = emptyInode()
newNode.children[c.b1] = [c.nk1, childA]
newNode.children[c.b2] = [c.nk2, await concretize(childB)]
const newNodeCid = (await kubo.dag.put(newNode)) as CID
noda.children[b] = [c.common, newNodeCid]
}
} else {
// keep node untouched
}
}
// now that we have the new node, we can upload it and return its cid
return kubo.dag.put(noda) as Promise<CID>
}
/// concretize virtual node
async function concretize(node: IndexVinode | IndexLeaf): Promise<CID> {
if ((node as unknown as IndexLeaf).leaf) {
return kubo.dag.put(node) as Promise<CID>
}
// this is a virtual inode
const childrenPromise: Array<null | Promise<[string, CID]>> = (node as unknown as IndexVinode).children.map((c) => {
if (c == null) {
return null
}
const [k, v] = c
return concretize(v).then((cid) => [k, cid as CID])
})
const newNode: IndexInode = {
children: await Promise.all(childrenPromise)
}
return kubo.dag.put(newNode) as Promise<CID>
}
export interface diffResult { export interface diffResult {
type: resultType type: resultType
common: string common: string
...@@ -168,15 +281,15 @@ export function compareKey(k1: string, k2: string): compResult { ...@@ -168,15 +281,15 @@ export function compareKey(k1: string, k2: string): compResult {
return { return {
type: resultType.Diff, type: resultType.Diff,
common, common,
b1: parseInt(c1, BASE), b1: bucket(c1),
nk1: k1.slice(common.length), nk1: k1.slice(common.length),
b2: parseInt(c2, BASE), b2: bucket(c2),
nk2: k2.slice(common.length) nk2: k2.slice(common.length)
} }
} }
} }
if (k1.length < k2.length) { if (k1.length < k2.length) {
const b = parseInt(k2[l], BASE) const b = bucket(k2[l])
// we can enter the bucket // we can enter the bucket
return { return {
type: resultType.Enter, type: resultType.Enter,
......
...@@ -22,6 +22,16 @@ export interface IndexInode { ...@@ -22,6 +22,16 @@ export interface IndexInode {
// but in real world, we will use a bitvector, likely in base16, groups of 4 bits // but in real world, we will use a bitvector, likely in base16, groups of 4 bits
} }
// virtual internal node
// same as IndexInode but mutable and only in memory
// allows to process batch insert without having to change all intermediate nodes (including root node) each time
export interface IndexVinode {
// can be a total new index inode (null) or be already aware of its old CID
cid: null | CID
// same as IndexInode
children: (null | [string, IndexVinode | IndexLeaf])[]
}
export function emptyInode(): IndexInode { export function emptyInode(): IndexInode {
return { return {
children: new Array(BASE).fill(null) children: new Array(BASE).fill(null)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment