diff --git a/README.md b/README.md index 4bd14332cc8fee8016561c907395cb715bff8cb0..71be39ad452b633dabae5d2c87c54985aba94329 100644 --- a/README.md +++ b/README.md @@ -29,5 +29,6 @@ This will make easier to insert this data in any AMT or other data structure. # doMergeAMT() # takes about 50 seconds time npx tsx src/scripts/cesium-plus-import.ts -# bafyreie23z6aayg5pjrqiowwziv2zt55b3ijzch3bjf5u4ebfbjtl5raxe +# bafyreie23z6aayg5pjrqiowwziv2zt55b3ijzch3bjf5u4ebfbjtl5raxe (time) +# bafyreigif3w2js2e6xb6vrfwagig3e7oiscakr7ki5xzsngukmsorxekny (time × 1000) ``` diff --git a/src/cesium-plus.ts b/src/cesium-plus.ts index e245330ac304b3419395bf3fdf82143fd72a6565..e012ebd897e084fd51ae0f7619a52ecf445895e1 100644 --- a/src/cesium-plus.ts +++ b/src/cesium-plus.ts @@ -41,6 +41,16 @@ interface CplusProfileMore extends CplusProfile { avatar: Avatar | CID } +/// C+ profile to index request +function cplusProfileToIndexRequest(profile: CplusProfile, profileCid: CID): IndexRequest { + return { + pubkey: profile.issuer, + cid: profileCid, + timestamp: profile.time * 1000, + signature: '' // signature is inside document for C+ data + } +} + /// adds all cids by groups ot size `groupBy` export async function processCesiumPlusImport(profileCids: CID[], groupBy: number): Promise<CID> { const rootNode: Array<Promise<CID>> = [] @@ -188,30 +198,71 @@ export async function allCplusCids(cplusCID: CID): Promise<Array<[number, CID]>> return Promise.all(allCIDs) } -/// import all cplus cid to AMT chunk by chunk -// this allows to decrease maximum amount of concurrent connections -// 183 seconds -export async function allCplusCidsToAMTChunked(cplusCID: CID, rootNodeCid: CID): Promise<CID> { +/// retreive all C+ data as index requests +export async function allCplusAsIndexRequestCids(cplusrootCID: CID): Promise<Array<[string, CID]>> { console.log(Date.now() + ' getting all cplus data') - const cplusroot = await kubo.dag.get(cplusCID) + const allCIDs: Array<Promise<[string, CID]>> = [] + const cplusroot = await kubo.dag.get(cplusrootCID) for (let chunkcid of cplusroot.value) { - const allCIDs: Array<Promise<[number, CID]>> = [] const chunk = await kubo.dag.get(chunkcid) for (let pcid of chunk.value) { - const p = kubo.dag.get(pcid) - const profile: Promise<[number, CID]> = p.then((v) => [v.value.time * 1000, pcid]) - allCIDs.push(profile) + const profileIR: Promise<[string, CID]> = kubo.dag + .get(pcid) + .then((v) => cplusProfileToIndexRequest(v.value, pcid)) + .then((r: IndexRequest) => + Promise.all([timestampToKey(r.timestamp), kubo.dag.put(r)] as [string, Promise<CID>]) + ) + allCIDs.push(profileIR) } + } + return Promise.all(allCIDs) +} + +// /// import all cplus cid to AMT chunk by chunk +// // this allows to decrease maximum amount of concurrent connections +// // 183 seconds +// // not optimal either +// export async function allCplusCidsToAMTChunked(cplusCID: CID, rootNodeCid: CID): Promise<CID> { +// console.log(Date.now() + ' getting all cplus data') +// const cplusroot = await kubo.dag.get(cplusCID) +// for (let chunkcid of cplusroot.value) { +// const allCIDs: Array<Promise<[number, CID]>> = [] +// const chunk = await kubo.dag.get(chunkcid) +// for (let pcid of chunk.value) { +// const p = kubo.dag.get(pcid) +// const profile: Promise<[number, CID]> = p.then((v) => [v.value.time * 1000, pcid]) +// allCIDs.push(profile) +// } +// const rootNode = (await kubo.dag.get(rootNodeCid)).value +// rootNodeCid = await Promise.all(allCIDs) +// .then(sortCidsAndConvertKeys) +// .then(arrayToVinode) +// .then(async (inode) => { +// // console.log(await concretizeCid(inode)) +// console.log(Date.now() + ' merging') +// return mergeInodesSync(rootNode, inode) +// }) +// console.log(rootNodeCid) +// } +// return rootNodeCid +// } + +/// import cplus index requests to AMT +// about 90 seconds to get C+ data and convert to index requests +// about 90 seconds to merge data 1000 by 1000 +export async function cplusIndexRequestsToAMT(cplusrootCID: CID, rootNodeCid: CID) { + const chunkSize = 1000 + console.log('getting all cplus index requests') + const requests = await allCplusAsIndexRequestCids(cplusrootCID) + requests.sort() + const n = requests.length + console.log(Date.now() + ' merging') + for (let i = 0; i < n / chunkSize; i++) { + console.log(Date.now() + ' chunk number ' + i) + const chunk = requests.slice(i * chunkSize, (i + 1) * chunkSize) const rootNode = (await kubo.dag.get(rootNodeCid)).value - rootNodeCid = await Promise.all(allCIDs) - .then(sortCidsAndConvertKeys) - .then(arrayToVinode) - .then(async (inode) => { - // console.log(await concretizeCid(inode)) - console.log(Date.now() + ' merging') - return mergeInodesSync(rootNode, inode) - }) - console.log(rootNodeCid) + const tree = arrayToVinode(chunk) // partial tree for this chunk + rootNodeCid = await mergeInodesSync(rootNode, tree) + console.log('new root node ' + rootNodeCid.toString()) } - return rootNodeCid } diff --git a/src/scripts/cesium-plus-import.ts b/src/scripts/cesium-plus-import.ts index 457ed7b717cba406ea7100a3bf4da1f078390f24..005c790bb363dafa17746a1ceb5351b0fbe0b888 100644 --- a/src/scripts/cesium-plus-import.ts +++ b/src/scripts/cesium-plus-import.ts @@ -5,7 +5,7 @@ import { allCplusCids, sortCidsAndConvertKeys, arrayToVinode, - allCplusCidsToAMTChunked + cplusIndexRequestsToAMT } from '../cesium-plus' import * as fs from 'fs/promises' import { kubo } from '../kubo' @@ -77,11 +77,11 @@ async function doMergeAMT() { // doMergeAMT() -async function doAllCplusCidsToAMTChunked() { +async function doAllCplusCidsToAMT() { const cplusCID = CID.parse('bafyreie74jtf23zzz2tdgsz7axfrm4pidje43ypqn25v4gkdtfjbcj62km') // cesium plus import const rootNodeCid = CID.parse('bafyreicvlp2p65agkxpzcboedba7zit55us4zvtyyq2wesvsdedy6irwfy') // empty root cid - allCplusCidsToAMTChunked(cplusCID, rootNodeCid) + cplusIndexRequestsToAMT(cplusCID, rootNodeCid) } -doAllCplusCidsToAMTChunked() +doAllCplusCidsToAMT()