Skip to content
Snippets Groups Projects
Commit 728dfdac authored by Hugo Trentesaux's avatar Hugo Trentesaux
Browse files

wip raw cplus import

parent d36aa2f9
No related branches found
No related tags found
No related merge requests found
...@@ -82,3 +82,31 @@ export async function cplusIndexRequestsToAMT(cplusrootCID: CID, rootNodeCid: CI ...@@ -82,3 +82,31 @@ export async function cplusIndexRequestsToAMT(cplusrootCID: CID, rootNodeCid: CI
console.log('new root node ' + rootNodeCid.toString()) console.log('new root node ' + rootNodeCid.toString())
} }
} }
/// wrap cplus raw document in index request
export async function cplusRawIrCID(cplus: CplusProfile, cplusRaw: string): Promise<CID> {
return kubo
.add(cplusRaw)
.then((cid) => ({
pubkey: cplus.issuer,
kind: CESIUM_PLUS_PROFILE_IMPORT,
data: cid,
time: cplus.time * 1000,
sig: null
}))
.then((ir) => kubo.dag.put(ir))
}
/// import cplus index requests to AMT
export async function cplusIrToAMT(requests: Array<[string, CID]>, rootNodeCid: CID) {
const chunkSize = 5000
const n = requests.length
console.log(Date.now() + ' merging')
for (let i = 0; i < n / chunkSize; i++) {
console.log(Date.now() + ' chunk number ' + i)
const chunk = requests.slice(i * chunkSize, (i + 1) * chunkSize)
const tree = arrayToVinode(chunk) // partial tree for this chunk
rootNodeCid = await mergeInodesSyncCID(rootNodeCid, tree)
console.log('new root node ' + rootNodeCid.toString())
}
}
...@@ -11,16 +11,16 @@ export const BASE = 16 ...@@ -11,16 +11,16 @@ export const BASE = 16
export const KEYSIZE = (64 * Math.log(2)) / Math.log(BASE) export const KEYSIZE = (64 * Math.log(2)) / Math.log(BASE)
// empty root cid // empty root cid
export const EMPTY_NODE_CID = CID.parse('bafyreicvlp2p65agkxpzcboedba7zit55us4zvtyyq2wesvsdedy6irwfy') export const EMPTY_NODE_CID = 'bafyreicvlp2p65agkxpzcboedba7zit55us4zvtyyq2wesvsdedy6irwfy'
// document kind of old cesium plus profile imported in the indexer // document kind of old cesium plus profile imported in the indexer
export const CESIUM_PLUS_PROFILE_IMPORT = CID.parse('bafkreiawtammeqc55cssr2zepfpaxbmp7kquhikkagipvtefeadsw4mqvq') export const CESIUM_PLUS_PROFILE_IMPORT = 'cplus_raw'
export const CESIUM_PLUS_PROFILE_INSERT = CID.parse('bafkreigi5phtqpo6a2f3tx4obaja4fzevy3nyvnl4bnkcxylyqnfeowzbm') export const CESIUM_PLUS_PROFILE_INSERT = 'bafkreigi5phtqpo6a2f3tx4obaja4fzevy3nyvnl4bnkcxylyqnfeowzbm'
export const CESIUM_PLUS_PROFILE_DELETE = CID.parse('bafkreic5bv5ytl7zv5rh5j2bd5mw6nfrn33mxhiobgmpsiu65yjw3eeduu') export const CESIUM_PLUS_PROFILE_DELETE = 'bafkreic5bv5ytl7zv5rh5j2bd5mw6nfrn33mxhiobgmpsiu65yjw3eeduu'
// document kind for transaction comment (old ones and new ones) // document kind for transaction comment (old ones and new ones)
export const TRANSACTION_COMMENT_V1 = 'TODO' export const TRANSACTION_COMMENT_V1 = 'TODO'
export const TRANSACTION_COMMENT = CID.parse('bafkreiegjt5mrfj2hshuw6koejdfiykq57mzjeprfckxj5zpxxtqj4qzeu') export const TRANSACTION_COMMENT = 'bafkreiegjt5mrfj2hshuw6koejdfiykq57mzjeprfckxj5zpxxtqj4qzeu'
// ========== // ==========
......
import { CID } from 'multiformats' import { CID } from 'multiformats'
import { processCesiumPlusImport, processCesiumPlusProfile, cplusIndexRequestsToAMT } from '../cesium-plus' import {
processCesiumPlusImport,
processCesiumPlusProfile,
cplusIndexRequestsToAMT,
cplusRawIrCID,
cplusIrToAMT
} from '../cesium-plus'
import * as fs from 'fs/promises' import * as fs from 'fs/promises'
import { timestampToKey } from '../processor'
// profile files // profile files
// const PROFILES = '/home/hugo/ipfs/v2s-datapod/migrate_csplus/profile_csplus.json' // const PROFILES = '/home/hugo/ipfs/v2s-datapod/migrate_csplus/profile_csplus.json'
...@@ -47,6 +54,81 @@ async function doAllCplusCidsToAMT() { ...@@ -47,6 +54,81 @@ async function doAllCplusCidsToAMT() {
cplusIndexRequestsToAMT(cplusCID, rootNodeCid) cplusIndexRequestsToAMT(cplusCID, rootNodeCid)
} }
// fetch raw profile
// fetchRawCplus('38QzVPhRLbEiqJtvCmRY6A6SraheNA6fJbomFX75b2qb').then(console.log)
async function fetchRawCplus(id: string): Promise<string> {
const ENDPOINT = 'https://g1.data.e-is.pro'
return fetch(ENDPOINT + '/user/profile/' + id + '/_source').then((b) => b.text())
}
/// download all c+ data and add them to kubo
async function getAllCplusIr(): Promise<Array<[string, CID]>> {
const SCROLL_TIME = '5m'
const PAGE_SIZE = 100
const ENDPOINT = 'https://g1.data.e-is.pro'
const cids: Array<Promise<[string, CID]>> = []
const URL = ENDPOINT + '/user/profile/_search?scroll=' + SCROLL_TIME
const decoder = new TextDecoder()
// first batch
let batch = await fetch(URL + '&_source=false&filter_path=took,_scroll_id,hits.hits._id,hits.total', {
method: 'post',
body: JSON.stringify({
query: { match_all: {} },
size: PAGE_SIZE
})
}).then((b) => b.json())
let scroll_id = batch._scroll_id
const total = batch.hits.total
let imported = 0
let totalimported = 0
console.log(`importing ${total} cplus profiles...`)
// process batches while available
while (batch.took > 0) {
// add to the list
for (const hit of batch.hits.hits) {
cids.push(
fetchRawCplus(hit._id)
.then((cplusRaw) => [cplusRaw, JSON.parse(cplusRaw)])
.then(([cplusRaw, cplus]) => Promise.all([timestampToKey(cplus.time), cplusRawIrCID(cplus, cplusRaw)]))
)
}
// console.log(ids)
imported += batch.took
totalimported += batch.took
if (imported > 100) {
console.log(`${totalimported.toString().padStart(5)}/${total} imported`)
imported = 0
await Promise.all(cids)
}
// take next batch
batch = await fetch(ENDPOINT + '/_search/scroll', {
method: 'post',
body: JSON.stringify({
scroll: SCROLL_TIME,
scroll_id: scroll_id
})
}).then((b) => b.json())
scroll_id = batch._scroll_id
// console.log(scroll_id)
}
return Promise.all(cids).then((l) => l.sort())
}
async function importAllCplusToAMT() {
const rootNodeCid = CID.parse('bafyreicvlp2p65agkxpzcboedba7zit55us4zvtyyq2wesvsdedy6irwfy') // empty root cid
const requests = await getAllCplusIr()
await cplusIrToAMT(requests, rootNodeCid)
}
// TODO use command line args to choose what to do // TODO use command line args to choose what to do
// doImport() // doImport()
// doAllCplusCidsToAMT() // doAllCplusCidsToAMT()
importAllCplusToAMT()
...@@ -11,24 +11,24 @@ export interface Pointer<T> { ...@@ -11,24 +11,24 @@ export interface Pointer<T> {
// and indexer could add it to their database // and indexer could add it to their database
export interface IndexRequest { export interface IndexRequest {
/// pubkey of the author of the document /// pubkey of the author of the document
// used to verify signature // used to verify signature, should be in a format handeled by the datapod (ex: ss58 address, base58 pubkey...)
pubkey: string pubkey: string
/// timestamp of the document /// timestamp of the document
// used as u64 in the signature // used as u64 in the signature
// note that the user can send whatever timestamp it wants // note that the user can send whatever timestamp it wants
// but the collector can choose to reject document if the timestamp differs too much from his local time // but the collector can choose to reject document if the timestamp differs too much from his local time
time: number time: number
/// kind of the document /// kind of the document, arbitrary string interpreted by the datapod
// the CID points to something describing the kind of the document // can be a CID of the document format description
// this allows to filter documents based on kinds of interest // allows to filter documents based on kinds of interest
// TODO allow number for more compact known kinds kind: string
kind: CID /// data coming with the index request is separed from it
/// data coming with the index request // when null, it means there is no data, for example in the case of a document deletion
// only nullable field, useful for deletion document for example
data: CID | null data: CID | null
/// signature of the following byte payload : /// signature of the following byte payload :
/// ["dd" prefix for 'duniter datapods' | timestamp as u64 bytes | kind bytes | data bytes or 0x00] /// ["dd" prefix for 'duniter datapods' | timestamp as u64 bytes | kind bytes | data bytes or 0x00]
sig: string // can be null in the case where the signature is inside the data (C+ profile formats for example)
sig: string | null
} }
// internal node // internal node
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment