From fb1b4e20df12373e3e2919f267054f3a8b4dbdb2 Mon Sep 17 00:00:00 2001
From: David Bosschaert
Date: Fri, 5 Sep 2025 14:11:57 +0100
Subject: [PATCH 1/3] fix: use document GUID as prosemirror reference to avoid
updating deleted documents
Previously the prosemirror document would be stored in the 'prosemirror' attribute
on the YDoc. This changes that to be prosemirror-${guid}.
This is to avoid accidentally left open editors from accidentally re-creating a deleted document.
---
src/collab.js | 18 ++--
src/edge.js | 4 +-
src/shareddoc.js | 115 +++++++++++++++++++---
test/collab.test.js | 98 +++++++++----------
test/collab2.test.js | 4 +-
test/shareddoc.test.js | 216 +++++++++++++++++++++++++++++++++++++----
6 files changed, 362 insertions(+), 93 deletions(-)
diff --git a/src/collab.js b/src/collab.js
index dfbfc66..265877e 100644
--- a/src/collab.js
+++ b/src/collab.js
@@ -10,9 +10,9 @@
* governing permissions and limitations under the License.
*/
import {
- prosemirrorToYXmlFragment, yDocToProsemirror,
+ prosemirrorToYXmlFragment, yDocToProsemirrorJSON,
} from 'y-prosemirror';
-import { DOMParser, DOMSerializer } from 'prosemirror-model';
+import { DOMParser, DOMSerializer, Node } from 'prosemirror-model';
import { fromHtml } from 'hast-util-from-html';
import { matches } from 'hast-util-select';
import { getSchema } from './schema.js';
@@ -142,7 +142,7 @@ function removeComments(node) {
export const EMPTY_DOC = '';
-export function aem2doc(html, ydoc) {
+export function aem2doc(html, ydoc, guid) {
if (!html) {
// eslint-disable-next-line no-param-reassign
html = EMPTY_DOC;
@@ -280,7 +280,7 @@ export function aem2doc(html, ydoc) {
};
const json = DOMParser.fromSchema(getSchema()).parse(new Proxy(main || tree, handler2));
- prosemirrorToYXmlFragment(json, ydoc.getXmlFragment('prosemirror'));
+ prosemirrorToYXmlFragment(json, ydoc.getXmlFragment(`prosemirror-${guid}`));
}
const getAttrString = (attributes) => Object.entries(attributes).map(([key, value]) => ` ${key}="${value}"`).join('');
@@ -366,9 +366,15 @@ export function tableToBlock(child, fragment) {
});
}
-export function doc2aem(ydoc) {
+export function doc2aem(ydoc, guid) {
+ if (!guid) {
+ // this is a brand new document
+ return EMPTY_DOC;
+ }
+
const schema = getSchema();
- const json = yDocToProsemirror(schema, ydoc);
+ const state = yDocToProsemirrorJSON(ydoc, `prosemirror-${guid}`);
+ const json = Node.fromJSON(schema, state);
const fragment = { type: 'div', children: [], attributes: {} };
const handler3 = {
diff --git a/src/edge.js b/src/edge.js
index fe9c1fb..e716172 100644
--- a/src/edge.js
+++ b/src/edge.js
@@ -9,7 +9,7 @@
* OF ANY KIND, either express or implied. See the License for the specific language
* governing permissions and limitations under the License.
*/
-import { invalidateFromAdmin, setupWSConnection } from './shareddoc.js';
+import { deleteFromAdmin, invalidateFromAdmin, setupWSConnection } from './shareddoc.js';
// This is the Edge Worker, built using Durable Objects!
@@ -243,7 +243,7 @@ export class DocRoom {
const api = url.searchParams.get('api');
switch (api) {
case 'deleteAdmin':
- if (await invalidateFromAdmin(baseURL)) {
+ if (await deleteFromAdmin(baseURL)) {
return new Response(null, { status: 204 });
} else {
return new Response('Not Found', { status: 404 });
diff --git a/src/shareddoc.js b/src/shareddoc.js
index b7c67d2..862f7d2 100644
--- a/src/shareddoc.js
+++ b/src/shareddoc.js
@@ -189,7 +189,7 @@ export const persistence = {
* @param {string} docName - The document name
* @param {string} auth - The authorization header
* @param {object} daadmin - The da-admin worker service binding
- * @returns {Promise} - The content of the document
+ * @returns {object} - text: The content of the document and guid: the guid of the document.
*/
get: async (docName, auth, daadmin) => {
const initalOpts = {};
@@ -198,7 +198,7 @@ export const persistence = {
}
const initialReq = await daadmin.fetch(docName, initalOpts);
if (initialReq.ok) {
- return initialReq.text();
+ return { text: await initialReq.text(), guid: initialReq.headers.get('X-da-id') };
} else if (initialReq.status === 404) {
return null;
} else {
@@ -215,11 +215,12 @@ export const persistence = {
* @param {string} content - The content to store
* @returns {object} The response from da-admin.
*/
- put: async (ydoc, content) => {
+ put: async (ydoc, content, guid) => {
const blob = new Blob([content], { type: 'text/html' });
const formData = new FormData();
formData.append('data', blob);
+ formData.append('guid', guid);
const opts = { method: 'PUT', body: formData };
const keys = Array.from(ydoc.conns.keys());
@@ -258,16 +259,64 @@ export const persistence = {
* @param {WSSharedDoc} ydoc - the ydoc that has been updated.
* @param {string} current - the current content of the document previously
* obtained from da-admin
+ * @param {object} guidHolder - an object containing the guid of the document.
+ * If the document exists, it will hold its guid. If the document does not yet
+ * exists, it will be modified to set its guid in this method so that its known
+ * for subsequent calls.
* @returns {string} - the new content of the document in da-admin.
*/
- update: async (ydoc, current) => {
+ update: async (ydoc, current, guidHolder) => {
let closeAll = false;
try {
- const content = doc2aem(ydoc);
+ const { guid } = guidHolder;
+
+ // The guid array contains the known guids. We sort it by timestamp so that we
+ // know to find the latest. Any other guids are considered stale.
+ // Objects on the guid array may also contain a newDoc flag, which is set to true
+ // when the document is just opened in the browser.
+ const guidArray = ydoc.getArray('prosemirror-guids');
+ const copy = [...guidArray];
+ if (copy.length === 0) {
+ // eslint-disable-next-line no-console
+ console.log('No guid array found in update. Ignoring.');
+ return current;
+ }
+ copy.sort((a, b) => a.ts - b.ts);
+ const { newDoc, guid: curGuid, ts: createdTS } = copy.pop();
+
+ if (guid && curGuid !== guid) {
+ // Guid mismatch, need to update the editor
+ ydoc.transact(() => {
+ guidArray.delete(0, guidArray.length); // Delete the entire array
+ guidArray.push([{ guid, ts: createdTS + 1 }]);
+ });
+ return current;
+ }
+
+ if (!newDoc && !guid) {
+ // Someone is still editing a document in the browser that has since been deleted
+ // we know it's deleted because guid from da-admin is not set.
+ // eslint-disable-next-line no-console
+ console.log('Document GUID mismatch, da-admin guid:', guid, 'edited guid:', curGuid);
+ showError(ydoc, { message: 'This document has since been deleted, your edits are not persisted' });
+ return current;
+ }
+
+ const content = doc2aem(ydoc, curGuid);
if (current !== content) {
// Only store the document if it was actually changed.
- const { ok, status, statusText } = await persistence.put(ydoc, content);
+ const { ok, status, statusText } = await persistence.put(ydoc, content, curGuid);
+ if (newDoc) {
+ // Update the guid in the guidHolder so that in subsequent calls we know what it is
+ // eslint-disable-next-line no-param-reassign
+ guidHolder.guid = curGuid;
+ // Remove the stale guids, and set the array to the current
+ ydoc.transact(() => {
+ guidArray.delete(0, guidArray.length); // Delete the entire array
+ guidArray.push([{ guid: curGuid, ts: createdTS }]);
+ });
+ }
if (!ok) {
closeAll = (status === 401 || status === 403);
throw new Error(`${status} - ${statusText}`);
@@ -300,11 +349,18 @@ export const persistence = {
let timingDaAdminGetDuration;
let current;
+ let guid;
let restored = false; // True if restored from worker storage
try {
let newDoc = false;
const timingBeforeDaAdminGet = Date.now();
- current = await persistence.get(docName, conn.auth, ydoc.daadmin);
+ const cur = await persistence.get(docName, conn.auth, ydoc.daadmin);
+ if (cur === null) {
+ current = null;
+ } else {
+ current = cur?.text;
+ guid = cur?.guid;
+ }
timingDaAdminGetDuration = Date.now() - timingBeforeDaAdminGet;
const timingBeforeReadState = Date.now();
@@ -327,7 +383,7 @@ export const persistence = {
// Check if the state from the worker storage is the same as the current state in da-admin.
// So for example if da-admin doesn't have the doc any more, or if it has been altered in
// another way, we don't use the state of the worker storage.
- const fromStorage = doc2aem(ydoc);
+ const fromStorage = doc2aem(ydoc, guid);
if (fromStorage === current) {
restored = true;
@@ -347,21 +403,25 @@ export const persistence = {
showError(ydoc, error);
}
- if (!restored) {
+ if (!restored && guid) {
// The doc was not restored from worker persistence, so read it from da-admin,
- // but do this async to give the ydoc some time to get synced up first. Without
- // this timeout, the ydoc can get confused which may result in duplicated content.
+ // but only if the doc actually exists in da-admin (guid has a value).
+ // If it's a brand new document, subsequent update() calls will set it in
+ // da-admin and provide the guid to use.
+
+ // Do this async to give the ydoc some time to get synced up first. Without this
+ // timeout, the ydoc can get confused which may result in duplicated content.
// eslint-disable-next-line no-console
console.log('Could not be restored, trying to restore from da-admin', docName);
setTimeout(() => {
if (ydoc === docs.get(docName)) {
- const rootType = ydoc.getXmlFragment('prosemirror');
+ const rootType = ydoc.getXmlFragment(`prosemirror-${guid}`);
ydoc.transact(() => {
try {
// clear document
rootType.delete(0, rootType.length);
// restore from da-admin
- aem2doc(current, ydoc);
+ aem2doc(current, ydoc, guid);
// eslint-disable-next-line no-console
console.log('Restored from da-admin', docName);
@@ -382,11 +442,15 @@ export const persistence = {
}
});
+ // Use a holder for the guid. This is needed in case the guid is not known yet
+ // for a new document so that it can be updated later once its known.
+ const guidHolder = { guid };
+
ydoc.on('update', debounce(async () => {
// If we receive an update on the document, store it in da-admin, but debounce it
// to avoid excessive da-admin calls.
if (ydoc === docs.get(docName)) {
- current = await persistence.update(ydoc, current);
+ current = await persistence.update(ydoc, current, guidHolder);
}
}, 2000, { maxWait: 10000 }));
@@ -544,6 +608,29 @@ export const messageListener = (conn, doc, message) => {
}
};
+export const deleteFromAdmin = async (docName) => {
+ // eslint-disable-next-line no-console
+ console.log('Delete from Admin received', docName);
+ const ydoc = docs.get(docName);
+ if (ydoc) {
+ // empty out all known docs, should normally just be one
+ for (const { guid } of ydoc.getArray('prosemirror-guids')) {
+ ydoc.transact(() => {
+ const rootType = ydoc.getXmlFragment(`prosemirror-${guid}`);
+ rootType.delete(0, rootType.length);
+ });
+ }
+
+ // Reset the connections to flush the guids
+ ydoc.conns.forEach((_, c) => closeConn(ydoc, c));
+ return true;
+ } else {
+ // eslint-disable-next-line no-console
+ console.log('Document not found', docName);
+ }
+ return false;
+};
+
/**
* Invalidate the worker storage for the document, which will ensure that when accessed
* the worker will fetch the latest version of the document from the da-admin.
diff --git a/test/collab.test.js b/test/collab.test.js
index 5155196..44ddca6 100644
--- a/test/collab.test.js
+++ b/test/collab.test.js
@@ -38,8 +38,8 @@ describe('Parsing test suite', () => {
html = collapseWhitespace(html);
const yDoc = new Y.Doc();
- aem2doc(html, yDoc);
- const result = doc2aem(yDoc);
+ aem2doc(html, yDoc, 'test-guid');
+ const result = doc2aem(yDoc, 'test-guid');
assert.equal(collapseWhitespace(result), html);
});
@@ -64,8 +64,8 @@ describe('Parsing test suite', () => {
html = collapseWhitespace(html);
const yDoc = new Y.Doc();
- aem2doc(html, yDoc);
- const result = doc2aem(yDoc);
+ aem2doc(html, yDoc, 'some-guid');
+ const result = doc2aem(yDoc, 'some-guid');
assert.equal(collapseWhitespace(result), html);
});
@@ -79,8 +79,8 @@ describe('Parsing test suite', () => {
+
+
+
+
+
+ From daadmin
+
+
From daadmin
-