diff --git a/src/cesium-plus.ts b/src/cesium-plus.ts index 7e3b4fec844a23a7d2c873b9b98aa16c92818dd4..a1d0fe252500e96a5d83af450416a8ae738c6538 100644 --- a/src/cesium-plus.ts +++ b/src/cesium-plus.ts @@ -82,3 +82,31 @@ export async function cplusIndexRequestsToAMT(cplusrootCID: CID, rootNodeCid: CI console.log('new root node ' + rootNodeCid.toString()) } } + +/// wrap cplus raw document in index request +export async function cplusRawIrCID(cplus: CplusProfile, cplusRaw: string): Promise<CID> { + return kubo + .add(cplusRaw) + .then((cid) => ({ + pubkey: cplus.issuer, + kind: CESIUM_PLUS_PROFILE_IMPORT, + data: cid, + time: cplus.time * 1000, + sig: null + })) + .then((ir) => kubo.dag.put(ir)) +} + +/// import cplus index requests to AMT +export async function cplusIrToAMT(requests: Array<[string, CID]>, rootNodeCid: CID) { + const chunkSize = 5000 + const n = requests.length + console.log(Date.now() + ' merging') + for (let i = 0; i < n / chunkSize; i++) { + console.log(Date.now() + ' chunk number ' + i) + const chunk = requests.slice(i * chunkSize, (i + 1) * chunkSize) + const tree = arrayToVinode(chunk) // partial tree for this chunk + rootNodeCid = await mergeInodesSyncCID(rootNodeCid, tree) + console.log('new root node ' + rootNodeCid.toString()) + } +} diff --git a/src/consts.ts b/src/consts.ts index 425939491fecdbc259f36589019e9245b069b048..7bc9ce32783ec6fcc36bb05698c7c8c614241e15 100644 --- a/src/consts.ts +++ b/src/consts.ts @@ -11,16 +11,16 @@ export const BASE = 16 export const KEYSIZE = (64 * Math.log(2)) / Math.log(BASE) // empty root cid -export const EMPTY_NODE_CID = CID.parse('bafyreicvlp2p65agkxpzcboedba7zit55us4zvtyyq2wesvsdedy6irwfy') +export const EMPTY_NODE_CID = 'bafyreicvlp2p65agkxpzcboedba7zit55us4zvtyyq2wesvsdedy6irwfy' // document kind of old cesium plus profile imported in the indexer -export const CESIUM_PLUS_PROFILE_IMPORT = CID.parse('bafkreiawtammeqc55cssr2zepfpaxbmp7kquhikkagipvtefeadsw4mqvq') -export const CESIUM_PLUS_PROFILE_INSERT = CID.parse('bafkreigi5phtqpo6a2f3tx4obaja4fzevy3nyvnl4bnkcxylyqnfeowzbm') -export const CESIUM_PLUS_PROFILE_DELETE = CID.parse('bafkreic5bv5ytl7zv5rh5j2bd5mw6nfrn33mxhiobgmpsiu65yjw3eeduu') +export const CESIUM_PLUS_PROFILE_IMPORT = 'cplus_raw' +export const CESIUM_PLUS_PROFILE_INSERT = 'bafkreigi5phtqpo6a2f3tx4obaja4fzevy3nyvnl4bnkcxylyqnfeowzbm' +export const CESIUM_PLUS_PROFILE_DELETE = 'bafkreic5bv5ytl7zv5rh5j2bd5mw6nfrn33mxhiobgmpsiu65yjw3eeduu' // document kind for transaction comment (old ones and new ones) export const TRANSACTION_COMMENT_V1 = 'TODO' -export const TRANSACTION_COMMENT = CID.parse('bafkreiegjt5mrfj2hshuw6koejdfiykq57mzjeprfckxj5zpxxtqj4qzeu') +export const TRANSACTION_COMMENT = 'bafkreiegjt5mrfj2hshuw6koejdfiykq57mzjeprfckxj5zpxxtqj4qzeu' // ========== diff --git a/src/scripts/cesium-plus-import.ts b/src/scripts/cesium-plus-import.ts index f64ea1e7bf182e3a285f6adf3d50efe41621b0fd..99d0b3ca3e79e5ead28ac0c418c0e02b6dc98084 100644 --- a/src/scripts/cesium-plus-import.ts +++ b/src/scripts/cesium-plus-import.ts @@ -1,6 +1,13 @@ import { CID } from 'multiformats' -import { processCesiumPlusImport, processCesiumPlusProfile, cplusIndexRequestsToAMT } from '../cesium-plus' +import { + processCesiumPlusImport, + processCesiumPlusProfile, + cplusIndexRequestsToAMT, + cplusRawIrCID, + cplusIrToAMT +} from '../cesium-plus' import * as fs from 'fs/promises' +import { timestampToKey } from '../processor' // profile files // const PROFILES = '/home/hugo/ipfs/v2s-datapod/migrate_csplus/profile_csplus.json' @@ -47,6 +54,81 @@ async function doAllCplusCidsToAMT() { cplusIndexRequestsToAMT(cplusCID, rootNodeCid) } +// fetch raw profile +// fetchRawCplus('38QzVPhRLbEiqJtvCmRY6A6SraheNA6fJbomFX75b2qb').then(console.log) +async function fetchRawCplus(id: string): Promise<string> { + const ENDPOINT = 'https://g1.data.e-is.pro' + return fetch(ENDPOINT + '/user/profile/' + id + '/_source').then((b) => b.text()) +} + +/// download all c+ data and add them to kubo +async function getAllCplusIr(): Promise<Array<[string, CID]>> { + const SCROLL_TIME = '5m' + const PAGE_SIZE = 100 + const ENDPOINT = 'https://g1.data.e-is.pro' + + const cids: Array<Promise<[string, CID]>> = [] + const URL = ENDPOINT + '/user/profile/_search?scroll=' + SCROLL_TIME + + const decoder = new TextDecoder() + + // first batch + let batch = await fetch(URL + '&_source=false&filter_path=took,_scroll_id,hits.hits._id,hits.total', { + method: 'post', + body: JSON.stringify({ + query: { match_all: {} }, + size: PAGE_SIZE + }) + }).then((b) => b.json()) + let scroll_id = batch._scroll_id + const total = batch.hits.total + let imported = 0 + let totalimported = 0 + + console.log(`importing ${total} cplus profiles...`) + + // process batches while available + while (batch.took > 0) { + // add to the list + for (const hit of batch.hits.hits) { + cids.push( + fetchRawCplus(hit._id) + .then((cplusRaw) => [cplusRaw, JSON.parse(cplusRaw)]) + .then(([cplusRaw, cplus]) => Promise.all([timestampToKey(cplus.time), cplusRawIrCID(cplus, cplusRaw)])) + ) + } + // console.log(ids) + imported += batch.took + totalimported += batch.took + if (imported > 100) { + console.log(`${totalimported.toString().padStart(5)}/${total} imported`) + imported = 0 + await Promise.all(cids) + } + + // take next batch + batch = await fetch(ENDPOINT + '/_search/scroll', { + method: 'post', + body: JSON.stringify({ + scroll: SCROLL_TIME, + scroll_id: scroll_id + }) + }).then((b) => b.json()) + + scroll_id = batch._scroll_id + // console.log(scroll_id) + } + + return Promise.all(cids).then((l) => l.sort()) +} + +async function importAllCplusToAMT() { + const rootNodeCid = CID.parse('bafyreicvlp2p65agkxpzcboedba7zit55us4zvtyyq2wesvsdedy6irwfy') // empty root cid + const requests = await getAllCplusIr() + await cplusIrToAMT(requests, rootNodeCid) +} + // TODO use command line args to choose what to do // doImport() // doAllCplusCidsToAMT() +importAllCplusToAMT() diff --git a/src/types.ts b/src/types.ts index a182eafbe131efd1edc369b15f52e825196b3e96..e2373df082b398d3d185738617bbdad5f75c637e 100644 --- a/src/types.ts +++ b/src/types.ts @@ -11,24 +11,24 @@ export interface Pointer<T> { // and indexer could add it to their database export interface IndexRequest { /// pubkey of the author of the document - // used to verify signature + // used to verify signature, should be in a format handeled by the datapod (ex: ss58 address, base58 pubkey...) pubkey: string /// timestamp of the document // used as u64 in the signature // note that the user can send whatever timestamp it wants // but the collector can choose to reject document if the timestamp differs too much from his local time time: number - /// kind of the document - // the CID points to something describing the kind of the document - // this allows to filter documents based on kinds of interest - // TODO allow number for more compact known kinds - kind: CID - /// data coming with the index request - // only nullable field, useful for deletion document for example + /// kind of the document, arbitrary string interpreted by the datapod + // can be a CID of the document format description + // allows to filter documents based on kinds of interest + kind: string + /// data coming with the index request is separed from it + // when null, it means there is no data, for example in the case of a document deletion data: CID | null /// signature of the following byte payload : /// ["dd" prefix for 'duniter datapods' | timestamp as u64 bytes | kind bytes | data bytes or 0x00] - sig: string + // can be null in the case where the signature is inside the data (C+ profile formats for example) + sig: string | null } // internal node