From c47c57102051925eeec586c141e5d64c5632ba23 Mon Sep 17 00:00:00 2001 From: Hugo Trentesaux <hugo@trentesaux.fr> Date: Wed, 10 Apr 2024 19:16:23 +0200 Subject: [PATCH] update index request format --- src/cesium-plus.ts | 29 ++++++++++++----------------- src/collector.ts | 15 ++++++++------- src/components/FeedRow.vue | 8 ++++---- src/consts.ts | 3 +++ src/main.ts | 7 ++++--- src/processor.ts | 10 ++++++---- src/scripts/start-indexer.ts | 12 ++++++++---- src/types.ts | 26 ++++++++++++++++++++++---- 8 files changed, 67 insertions(+), 43 deletions(-) diff --git a/src/cesium-plus.ts b/src/cesium-plus.ts index 9dc91d0..2a8fd05 100644 --- a/src/cesium-plus.ts +++ b/src/cesium-plus.ts @@ -3,6 +3,7 @@ import { kubo } from './kubo' import { Buffer } from 'buffer' import { timestampToKey, mergeInodesSync, arrayToVinode } from './processor' import { type IndexRequest } from './types' +import { CESIUM_PLUS_PROFILE_IMPORT } from './consts' // ========================= types @@ -16,6 +17,9 @@ export interface CplusProfile { issuer: string hash: string signature: string + socials?: Social[] + tags?: string[] + avatar?: Avatar | CID } // social @@ -30,22 +34,16 @@ interface Avatar { _content: string // base64 encoded } -// optional fields -interface CplusProfileMore extends CplusProfile { - socials: Social[] - tags: string[] - avatar: Avatar | CID -} - // ========================= import functions /// C+ profile to index request function cplusProfileToIndexRequest(profile: CplusProfile, profileCid: CID): IndexRequest { return { - pubkey: profile.issuer, - cid: profileCid, - timestamp: profile.time * 1000, - signature: '' // signature is inside document for C+ data + pubk: profile.issuer, + kind: CESIUM_PLUS_PROFILE_IMPORT, + data: profileCid, + time: profile.time * 1000, + sig: '' // signature is inside document for old C+ data } } @@ -62,8 +60,7 @@ export async function processCesiumPlusImport(profileCids: CID[], groupBy: numbe /// if avatar is present, upload it as a separate file instead export async function processCesiumPlusProfile(obj: CplusProfile): Promise<CID> { - const profile = obj as CplusProfileMore - const { avatar, ...profileWithoutAvatar } = profile + const { avatar, ...profileWithoutAvatar } = obj if (avatar != undefined && (avatar as Avatar)._content != undefined) { const buffer = Buffer.from((avatar as Avatar)._content, 'base64') const fileCandidate = { content: new Uint8Array(buffer) } @@ -71,7 +68,7 @@ export async function processCesiumPlusProfile(obj: CplusProfile): Promise<CID> .add(fileCandidate) .then((result) => kubo.dag.put({ ...profileWithoutAvatar, avatar: result.cid as CID }) as Promise<CID>) } else { - return kubo.dag.put(profile) as Promise<CID> + return kubo.dag.put(obj) as Promise<CID> } } @@ -88,9 +85,7 @@ async function allCplusAsIndexRequestCids(cplusrootCID: CID): Promise<Array<[str const profileIR: Promise<[string, CID]> = kubo.dag .get(pcid) .then((v) => cplusProfileToIndexRequest(v.value, pcid)) - .then((r: IndexRequest) => - Promise.all([timestampToKey(r.timestamp), kubo.dag.put(r)] as [string, Promise<CID>]) - ) + .then((r: IndexRequest) => Promise.all([timestampToKey(r.time), kubo.dag.put(r)] as [string, Promise<CID>])) allCIDs.push(profileIR) } } diff --git a/src/collector.ts b/src/collector.ts index d421efb..a2e90f2 100644 --- a/src/collector.ts +++ b/src/collector.ts @@ -5,13 +5,14 @@ import { CID } from 'multiformats' import { kubo } from './kubo' import { bigintToUint8Array } from './utils' import type { IndexRequest } from './types' -import { addToIndexQueue } from './processor' +import { addToIndex } from './processor' // build payload to sign -export function buildBytesPayload(timestamp: number, cid: CID) { +export function buildBytesPayload(ir: IndexRequest) { const prefix = Uint8Array.from([100, 100]) // "dd" for duniter datapods - const u64timestamp = bigintToUint8Array(BigInt(timestamp)) - const bytesPayload = Uint8Array.from([...prefix, ...u64timestamp, ...cid.bytes]) + const u64timestamp = bigintToUint8Array(BigInt(ir.time)) + const databytes = ir.data ? ir.data.bytes : Uint8Array.from([0, 0]) + const bytesPayload = Uint8Array.from([...prefix, ...u64timestamp, ...ir.kind.bytes, ...databytes]) return bytesPayload } @@ -41,8 +42,8 @@ function handleMessage(message: CustomEvent<Message>) { // TODO some validation on dag to ensure it is exact format // TODO some validation on timestamp to prevent too much control on the key // example: 0 < Date.now() - timestamp < 1 minute - const bytesPayload = buildBytesPayload(dag.timestamp, dag.cid) - const isValid = isValidSignature(bytesPayload, dag.signature, dag.pubkey) + const bytesPayload = buildBytesPayload(dag) + const isValid = isValidSignature(bytesPayload, dag.sig, dag.pubk) if (isValid) { // at this point we can apply different treatment based on the key // we could for example have a trust list published in ipfs @@ -53,7 +54,7 @@ function handleMessage(message: CustomEvent<Message>) { // high trust => common timestamp AMT // low trust => sandboxed HAMT - addToIndexQueue(cid, dag) + addToIndex(cid, dag) // this is all that this indexer does // the rest (autopinning, postgres indexing... will be managed by an other part of the indexer) diff --git a/src/components/FeedRow.vue b/src/components/FeedRow.vue index 54ca04e..fb96ff8 100644 --- a/src/components/FeedRow.vue +++ b/src/components/FeedRow.vue @@ -14,10 +14,10 @@ const props = defineProps<{ }>() props.dag.then((r) => { - pubkey.value = r.pubkey.toString() - timestamp.value = r.timestamp.toString() - target_cid.value = r.cid.toString() - signature.value = r.signature.toString() + pubkey.value = r.pubk.toString() + timestamp.value = r.time.toString() + target_cid.value = r.data?.toString() || '[none]' + signature.value = r.sig.toString() }) </script> diff --git a/src/consts.ts b/src/consts.ts index efb887e..f7d9dd7 100644 --- a/src/consts.ts +++ b/src/consts.ts @@ -12,3 +12,6 @@ export const KEYSIZE = (64 * Math.log(2)) / Math.log(BASE) // empty root cid export const EMPTY_NODE_CID = CID.parse('bafyreicvlp2p65agkxpzcboedba7zit55us4zvtyyq2wesvsdedy6irwfy') + +// document kind of old cesium plus profile imported in the indexer +export const CESIUM_PLUS_PROFILE_IMPORT = CID.parse('/ipfs/bafkreiawtammeqc55cssr2zepfpaxbmp7kquhikkagipvtefeadsw4mqvq') diff --git a/src/main.ts b/src/main.ts index 4800718..5179324 100644 --- a/src/main.ts +++ b/src/main.ts @@ -13,6 +13,7 @@ import { CID } from 'multiformats' import { kubo } from './kubo' import type { IndexRequest } from './types' import { buildBytesPayload, isValidSignature } from './collector' +import { addToIndex } from './processor' // start libp2p // there is no need to do that if we have a kubo node other than demo purpose @@ -32,11 +33,11 @@ pubsub.addEventListener('message', (message) => { feed.value.push(msg) kubo.dag.get(cid).then(function (d) { const dag = d.value as IndexRequest - const bytesPayload = buildBytesPayload(dag.timestamp, dag.cid) - const isValid = isValidSignature(bytesPayload, dag.signature, dag.pubkey) + const bytesPayload = buildBytesPayload(dag) + const isValid = isValidSignature(bytesPayload, dag.sig, dag.pubk) if (isValid) { // here we would do the processing - // addToIndexQueue(cid, dag) + addToIndex(cid, dag) } else { feed.value.push('[invalid sig] ' + msg) } diff --git a/src/processor.ts b/src/processor.ts index 549fab3..535da3b 100644 --- a/src/processor.ts +++ b/src/processor.ts @@ -43,10 +43,10 @@ export function publishHistory(cid: CID) { // ===================== main API ===================== -// add cid to index queue -export async function addToIndexQueue(cid: CID, indexRequest: IndexRequest) { +// add cid to index +export async function addToIndex(cid: CID, indexRequest: IndexRequest) { // at this point we might want to add a queue to avoid recomputing the whole tree at each new entry - // but for the moment we will not group entries and re-compute the whole tree each time + // but for the moment we will not group entries and do re-compute the whole tree each time // // see this presentation for more info about the merkle tree // https://polkadot-blockchain-academy.github.io/pba-book/substrate/storage/slides.html#/ @@ -56,6 +56,8 @@ export async function addToIndexQueue(cid: CID, indexRequest: IndexRequest) { const rootCID = CID.parse(name.slice(6)) // insert request into it const newRootCID = await insertKnownIndexRequest(rootCID, cid, indexRequest) + // pin new root CID recursively to be sure to have all data and to meet the "pinned only" reprovide strategy + kubo.pin.add(newRootCID, { recursive: true }) // publish the new root CID to IPNS await kubo.name.publish(newRootCID, { ttl: '1s' }) // also update history @@ -79,7 +81,7 @@ export async function insertKnownIndexRequest( indexRequestCid: CID, indexRequest: IndexRequest ): Promise<CID> { - const key = timestampToKey(indexRequest.timestamp) + const key = timestampToKey(indexRequest.time) const rootDag = await kubo.dag.get(rootCid) return processInode(rootDag.value, key, insertRequest(indexRequestCid)) } diff --git a/src/scripts/start-indexer.ts b/src/scripts/start-indexer.ts index 910d065..877709e 100644 --- a/src/scripts/start-indexer.ts +++ b/src/scripts/start-indexer.ts @@ -93,8 +93,8 @@ function handleMessage(message: any) { // TODO some validation on dag to ensure it is exact format // TODO some validation on timestamp to prevent too much control on the key // example: 0 < Date.now() - timestamp < 1 minute - const bytesPayload = buildBytesPayload(dag.timestamp, dag.cid) - const isValid = isValidSignature(bytesPayload, dag.signature, dag.pubkey) + const bytesPayload = buildBytesPayload(dag.time, dag.cid) + const isValid = isValidSignature(bytesPayload, dag.sig, dag.pubk) if (isValid) { // at this point we can apply different treatment based on the key // we could for example have a trust list published in ipfs @@ -133,7 +133,7 @@ function handleBatch() { items.push(i) } // convert it to a list of [key, cid] for batch insert (merge) - const requests = items.map(([cid, dag]) => [timestampToKey(dag.timestamp), cid]).sort() as Array<[string, CID]> + const requests = items.map(([cid, dag]) => [timestampToKey(dag.time), cid]).sort() as Array<[string, CID]> const tree = arrayToVinode(requests) // insert them @@ -141,10 +141,14 @@ function handleBatch() { .get(rootCID) .then((v) => mergeInodesSync(v.value, tree)) .then((cid) => { - // update CID and schedule publishing of new CID in history + // - update CID + // - pin recursively (for "pinned only" reprovide strategy) + // - publish new CID + // - publish history TODO limit rate at lower frequency (e.g. 1/minute) const oldCID = rootCID rootCID = cid console.log(`new root CID ${cid}`) + kubo.pin.add(rootCID, { recursive: true }) kubo.name.publish(rootCID, { ttl: '1s' }) publishHistory(rootCID) isProcessingQueue = false diff --git a/src/types.ts b/src/types.ts index fbf19dd..8942d05 100644 --- a/src/types.ts +++ b/src/types.ts @@ -5,11 +5,29 @@ export interface Pointer<T> { value: T } +/// index request +// the CID of this document is sent through pubsub +// so that collectors could add it to their AMT +// and indexer could add it to their database export interface IndexRequest { - pubkey: string - cid: CID - timestamp: number - signature: string + /// pubkey of the author of the document + // used to verify signature + pubk: string + /// timestamp of the document + // used as u64 in the signature + // note that the user can send whatever timestamp it wants + // but the collector can choose to reject document if the timestamp differs too much from his local time + time: number + /// kind of the document + // the CID points to something describing the kind of the document + // this allows to filter documents based on kinds of interest + kind: CID + /// data coming with the index request + // only nullable field, useful for deletion document for example + data: CID | null + /// signature of the following byte payload : + /// ["dd" prefix for 'duniter datapods' | timestamp as u64 bytes | kind bytes | data bytes or 0x00] + sig: string } // internal node -- GitLab