From 1e8ea641eae9276ddbfb2cdfe3cc6584b4c8d08a Mon Sep 17 00:00:00 2001 From: Kevin Jahns Date: Thu, 24 Apr 2025 00:03:00 +0200 Subject: [PATCH] add another example and create a writeup for attributing content. --- attributing-content.md | 131 +++++++++++++++++++++++++++++++++++++++++ src/index.js | 6 +- src/utils/IdSet.js | 31 ++++++++++ tests/y-text.tests.js | 33 +++++++++++ 4 files changed, 200 insertions(+), 1 deletion(-) create mode 100644 attributing-content.md diff --git a/attributing-content.md b/attributing-content.md new file mode 100644 index 00000000..78040133 --- /dev/null +++ b/attributing-content.md @@ -0,0 +1,131 @@ +# IdSets and IdMaps + +`IdSet` is a data structure (formerly `DeleteSet`) that allows us to efficiently +represent ranges of ids in Yjs (all content is identifyable by ids). + +`IdMap` is a new data structure that allows us to efficiently map ids to +attributes. It can be efficiently encoded. + +We can perform all usual set operations on `IdMap`s and `IdSet`s: diff, merge, +intersect. + +# Attribution of content + +In order to implement a Google Docs-like versioning feature, we want to be able +to attribute content with additional information (who created the change, +when was this change created, ..). + +When we click on a version in Google Docs, we might get annotated changes like +this: + +``` +# E.g. If Bob appends "world" to the previous version "hello " +[{ insert: 'hello' }, { insert: 'world', color: 'blue', creator: 'Bob', when: 'yesterday' }] +# E.g. If Bob deletes "world" from the previous version "hello world" +[{ insert: 'hello' }, { insert: 'world', backgroundColor: 'red', creator: 'Bob', when: 'yesterday' }] +``` + +In Yjs, we can now "attribute" changes with additional information. When we +render content using methods like `toString()` or `getDelta()`, Yjs will render +the unattributed content as-is, but it will render the attributed content with +the additional information. As all changes in Yjs are identifyable by Ids, we +can use `IdMap`s to map changes to "attributions". For example, we could +attribute deletions and insertions of a change and render them: + +```js +// We create some initial content "Hello World!". Then we create another +// document that will have a bunch of changes (make "Hell" italic, replace "World" +// with "Attribution"). +const ydocVersion0 = new Y.Doc({ gc: false }) +ydocVersion0.getText().insert(0, 'Hello World!') +const ydoc = new Y.Doc({ gc: false }) +Y.applyUpdate(ydoc, Y.encodeStateAsUpdate(ydocVersion0)) +const ytext = ydoc.getText() +ytext.applyDelta([{ retain: 4, attributes: { italic: true } }, { retain: 2 }, { delete: 5 }, { insert: 'attributions' }]) +// this represents to all insertions of ydoc +const insertionSet = Y.createInsertionSetFromStructStore(ydoc.store) +const deleteSet = Y.createDeleteSetFromStructStore(ydoc.store) +// exclude the changes from `ydocVersion0` +const insertionSetDiff = Y.diffIdSet(insertionSet, Y.createInsertionSetFromStructStore(ydocVersion0.store)) +const deleteSetDiff = Y.diffIdSet(deleteSet, Y.createDeleteSetFromStructStore(ydocVersion0.store)) +// assign attributes to the diff +const attributedInsertions = createIdMapFromIdSet(insertionSetDiff, [new Y.Attribution('insert', 'Bob')]) +const attributedDeletions = createIdMapFromIdSet(deleteSetDiff, [new Y.Attribution('delete', 'Bob')]) +// now we can define an attribution manager that maps these changes to output. One of the +// implementations is the TwosetAttributionManager +const attributionManager = new TwosetAttributionManager(attributedInsertions, attributedDeletions) +// we render the attributed content with the attributionManager +let attributedContent = ytext.getContent(attributionManager) +console.log(JSON.stringify(attributedContent.toJSON().ops, null, 2)) +let expectedContent = delta.create().insert('Hell', { italic: true }, { attributes: { italic: ['Bob'] } }).insert('o ').insert('World', {}, { delete: ['Bob'] }).insert('attributions', {}, { insert: ['Bob'] }).insert('!') +t.assert(attributedContent.equals(expectedContent)) + +// this is how the output would look like +const output = [ + { + "insert": "Hell", + "attributes": { + "italic": true + }, + "attribution": { + "attributes": { + "italic": [ + "Bob" + ] + } + } + }, + { + "insert": "o " + }, + { + "insert": "World", + "attribution": { + "delete": [ + "Bob" + ] + } + }, + { + "insert": "attributions", + "attribution": { + "insert": [ + "Bob" + ] + } + }, + { + "insert": "!" + } +] +``` + +We get a similar output to Google Docs: Insertions, Deletions, and changes to +formatting (attributes) are clearly associated to users. It will be the job of +the editor to render those changes with background-color etc.. + +Of course, we could associated changes also to multiple users like this: + +```js +const attributedDeletions = createIdMapFromIdSet(deleteSetDiff, [new Y.Attribution('insert', 'Bob'), new Y.Attribution('insert', 'OpenAI o3')]) +``` + +You could use the same output to calculate a real diff as well (consisting of +deletions and insertions only, without Attributions). + +`AttributionManager` is an abstract class for mapping attributions. It is +possible to highlight arbitrary content with this approach. + +The next steps are to: + +- finish the implementation for Y.Map and Y.Xml* (which should be easy, compared +to Y.Map). +- Implement an AttributionManager-CRDT for the backend that sits there and +associates changes with users. +- use `getContent(attributionManager)` instead of `toDelta` in y-prosemirror. +Would like to make the attribution part of y-prosemirror, however Nick can also +use this approach to customly render the changes in ProseMirror. + +The AttributionManager is encodes very efficiently. The ids are encoded using +run-length encoding and the Attributes are de-duplicated and only encoded once. +The above example encodes in 20 bytes. diff --git a/src/index.js b/src/index.js index 325092a2..6aea1c52 100644 --- a/src/index.js +++ b/src/index.js @@ -105,7 +105,11 @@ export { IdMap, createIdMap, createAttribution, - Attribution + createInsertionSetFromStructStore, + diffIdMap, + diffIdSet, + Attribution, + encodeIdMap } from './internals.js' const glo = /** @type {any} */ (typeof globalThis !== 'undefined' diff --git a/src/utils/IdSet.js b/src/utils/IdSet.js index 4a344e4c..6c1b805e 100644 --- a/src/utils/IdSet.js +++ b/src/utils/IdSet.js @@ -355,6 +355,37 @@ export const createDeleteSetFromStructStore = ss => { return ds } +/** + * @param {import('../internals.js').StructStore} ss + */ +export const createInsertionSetFromStructStore = ss => { + const idset = createIdSet() + ss.clients.forEach((structs, client) => { + /** + * @type {Array} + */ + const iditems = [] + for (let i = 0; i < structs.length; i++) { + const struct = structs[i] + if (!struct.deleted) { + const clock = struct.id.clock + let len = struct.length + if (i + 1 < structs.length) { + for (let next = structs[i + 1]; i + 1 < structs.length && !next.deleted; next = structs[++i + 1]) { + len += next.length + } + } + iditems.push(new IdRange(clock, len)) + } + } + if (iditems.length > 0) { + idset.clients.set(client, new IdRanges(iditems)) + } + }) + return idset +} + + /** * @param {IdSetEncoderV1 | IdSetEncoderV2} encoder * @param {IdSet} idSet diff --git a/tests/y-text.tests.js b/tests/y-text.tests.js index 409a2b07..f1b278f1 100644 --- a/tests/y-text.tests.js +++ b/tests/y-text.tests.js @@ -2330,6 +2330,38 @@ export const testAttributedContent = _tc => { }) } +/** + * @param {t.TestCase} _tc + */ +export const testAttributedDiffing = _tc => { + const ydocVersion0 = new Y.Doc({ gc: false }) + ydocVersion0.clientID = 0 + ydocVersion0.getText().insert(0, 'Hello World!') + const ydoc = new Y.Doc({ gc: false }) + ydoc.clientID = 1 + Y.applyUpdate(ydoc, Y.encodeStateAsUpdate(ydocVersion0)) + const ytext = ydoc.getText() + ytext.applyDelta([{ retain: 4, attributes: { italic: true } }, { retain: 2 }, { delete: 5 }, { insert: 'attributions' }]) + // this represents to all insertions of ydoc + const insertionSet = Y.createInsertionSetFromStructStore(ydoc.store) + const deleteSet = Y.createDeleteSetFromStructStore(ydoc.store) + // exclude the changes from `ydocVersion0` + const insertionSetDiff = Y.diffIdSet(insertionSet, Y.createInsertionSetFromStructStore(ydocVersion0.store)) + const deleteSetDiff = Y.diffIdSet(deleteSet, Y.createDeleteSetFromStructStore(ydocVersion0.store)) + // assign attributes to the diff + const attributedInsertions = createIdMapFromIdSet(insertionSetDiff, [new Y.Attribution('insert', 'Bob')]) + const attributedDeletions = createIdMapFromIdSet(deleteSetDiff, [new Y.Attribution('delete', 'Bob')]) + // now we can define an attribution manager that maps these changes to output. One of the + // implementations is the TwosetAttributionManager + const attributionManager = new TwosetAttributionManager(attributedInsertions, attributedDeletions) + // we render the attributed content with the attributionManager + let attributedContent = ytext.getContent(attributionManager) + console.log(JSON.stringify(attributedContent.toJSON().ops, null, 2)) + let expectedContent = delta.create().insert('Hell', { italic: true }, { attributes: { italic: ['Bob'] } }).insert('o ').insert('World', {}, { delete: ['Bob'] }).insert('attributions', {}, { insert: ['Bob'] }).insert('!') + t.assert(attributedContent.equals(expectedContent)) + console.log(Y.encodeIdMap(attributedInsertions).length) +} + // RANDOM TESTS let charCounter = 0 @@ -2556,6 +2588,7 @@ const checkResult = result => { */ const typeToObject = d => d.insert instanceof Y.AbstractType ? d.insert.toJSON() : d + t.info('length of text = ' + result.users[i-1].getText('text').length) t.measureTime('original toDelta perf', () => { result.users[i-1].getText('text').toDelta().map(typeToObject) })