mirror of
https://github.com/streetwriters/notesnook.git
synced 2025-12-16 11:47:54 +01:00
core: improve content conflict detection using proper HTML diffing (#1183)
Since HTML is a tree-like language it is futile to compare it character for character. `html1 === html2` is almost always false. This commit introduces a simple diffing algorithm that only checks the text inside the html + a few other attributes to decide whether the 2 HTMLs are actually different or not. This is obviously not foolproof and it will ignore everything aesthetic (b, em, strong tags etc.). This is actually desireable because in our case only the text difference should warrant a conflict. Everything else can easily be brought back. Similarly, this also ignores whitespace differences surrouding the tags. All in all it'll provide a more reliable alternative to MD5 hashing the 2 HTMLs.
This commit is contained in:
@@ -35,7 +35,7 @@ export default function MigrationDialog(props: MigrationDialogProps) {
|
||||
isOpen={true}
|
||||
title={"Database migration required"}
|
||||
description={
|
||||
"Due to new features we need to migrate your data to a newer version. This is NOT a destructive operation."
|
||||
"Due to new features we need to migrate your data to a newer version."
|
||||
}
|
||||
positiveButton={{
|
||||
text: "Backup and migrate",
|
||||
@@ -62,18 +62,7 @@ export default function MigrationDialog(props: MigrationDialogProps) {
|
||||
your data.
|
||||
</Text>
|
||||
<Text as="li" variant={"body"}>
|
||||
Some <b>merge conflicts</b> are expected in your notes after a
|
||||
migration. It is <b>recommended</b> that you resolve them carefully.
|
||||
<Text as="ol" sx={{ paddingInlineStart: 20 }}>
|
||||
<Text as="li" variant={"body"}>
|
||||
<b>But if you are feeling reckless enough</b> to risk losing some
|
||||
data, you can logout & log back in.
|
||||
</Text>
|
||||
</Text>
|
||||
</Text>
|
||||
<Text as="li" variant={"body"}>
|
||||
If you face any other issues or if you are unsure about what to do,
|
||||
feel free to reach out to us via{" "}
|
||||
If you face any issues feel free to reach out to us via{" "}
|
||||
<a href="https://discord.com/invite/zQBK97EE22">Discord</a> or email
|
||||
us at{" "}
|
||||
<a href="mailto:support@streetwriters.co">support@streetwriters.co</a>
|
||||
|
||||
@@ -18,9 +18,9 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
import { migrateItem } from "../../migrations";
|
||||
import SparkMD5 from "spark-md5";
|
||||
import setManipulator from "../../utils/set";
|
||||
import { logger } from "../../logger";
|
||||
import { isHTMLEqual } from "../../utils/html-diff";
|
||||
|
||||
class Merger {
|
||||
/**
|
||||
@@ -69,7 +69,7 @@ class Merger {
|
||||
!local.data ||
|
||||
!remote.data ||
|
||||
remote.data === "undefined" || //TODO not sure about this
|
||||
SparkMD5.hash(local.data) === SparkMD5.hash(remote.data))
|
||||
isHTMLEqual(local.data, remote.data))
|
||||
)
|
||||
return;
|
||||
|
||||
|
||||
21
packages/core/package-lock.json
generated
21
packages/core/package-lock.json
generated
@@ -16,7 +16,8 @@
|
||||
"dayjs": "^1.11.3",
|
||||
"entities": "^4.3.1",
|
||||
"fflate": "^0.7.3",
|
||||
"linkedom": "^0.14.12",
|
||||
"htmlparser2": "^8.0.1",
|
||||
"linkedom": "^0.14.17",
|
||||
"liqe": "^1.13.0",
|
||||
"qclone": "^1.1.0",
|
||||
"showdown": "github:thecodrr/showdown",
|
||||
@@ -7315,8 +7316,9 @@
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/linkedom": {
|
||||
"version": "0.14.12",
|
||||
"license": "ISC",
|
||||
"version": "0.14.17",
|
||||
"resolved": "https://registry.npmjs.org/linkedom/-/linkedom-0.14.17.tgz",
|
||||
"integrity": "sha512-PD6GQKvZ4s6Ai4/WkpyHc8MhiZdCz4hWmMOWJk+MO3/kl1QvPUbo4nQWS9+VHO7lRBk1ucIa9ONS9qzCUcBAmQ==",
|
||||
"dependencies": {
|
||||
"css-select": "^5.1.0",
|
||||
"cssom": "^0.5.0",
|
||||
@@ -13261,7 +13263,8 @@
|
||||
},
|
||||
"jest-pnp-resolver": {
|
||||
"version": "1.2.2",
|
||||
"dev": true
|
||||
"dev": true,
|
||||
"requires": {}
|
||||
},
|
||||
"jest-regex-util": {
|
||||
"version": "28.0.2",
|
||||
@@ -13804,7 +13807,8 @@
|
||||
"dependencies": {
|
||||
"ws": {
|
||||
"version": "8.8.1",
|
||||
"dev": true
|
||||
"dev": true,
|
||||
"requires": {}
|
||||
}
|
||||
}
|
||||
},
|
||||
@@ -13841,7 +13845,9 @@
|
||||
"dev": true
|
||||
},
|
||||
"linkedom": {
|
||||
"version": "0.14.12",
|
||||
"version": "0.14.17",
|
||||
"resolved": "https://registry.npmjs.org/linkedom/-/linkedom-0.14.17.tgz",
|
||||
"integrity": "sha512-PD6GQKvZ4s6Ai4/WkpyHc8MhiZdCz4hWmMOWJk+MO3/kl1QvPUbo4nQWS9+VHO7lRBk1ucIa9ONS9qzCUcBAmQ==",
|
||||
"requires": {
|
||||
"css-select": "^5.1.0",
|
||||
"cssom": "^0.5.0",
|
||||
@@ -14725,7 +14731,8 @@
|
||||
}
|
||||
},
|
||||
"ws": {
|
||||
"version": "7.5.9"
|
||||
"version": "7.5.9",
|
||||
"requires": {}
|
||||
},
|
||||
"xml-name-validator": {
|
||||
"version": "4.0.0",
|
||||
|
||||
@@ -41,7 +41,8 @@
|
||||
"dayjs": "^1.11.3",
|
||||
"entities": "^4.3.1",
|
||||
"fflate": "^0.7.3",
|
||||
"linkedom": "^0.14.12",
|
||||
"htmlparser2": "^8.0.1",
|
||||
"linkedom": "^0.14.17",
|
||||
"liqe": "^1.13.0",
|
||||
"qclone": "^1.1.0",
|
||||
"showdown": "github:thecodrr/showdown",
|
||||
|
||||
81
packages/core/utils/__tests__/html-diff.test.js
Normal file
81
packages/core/utils/__tests__/html-diff.test.js
Normal file
@@ -0,0 +1,81 @@
|
||||
/*
|
||||
This file is part of the Notesnook project (https://notesnook.com/)
|
||||
|
||||
Copyright (C) 2022 Streetwriters (Private) Limited
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
import { isHTMLEqual } from "../html-diff";
|
||||
|
||||
const equalPairs = [
|
||||
[
|
||||
"ignore whitespace difference",
|
||||
`<div>hello \n\n\n\n\n</div>\n\n\n\n\n\n`,
|
||||
`<div>hello</div>`
|
||||
],
|
||||
["ignore html structure", `<p><b>hello</b>world</p>`, "<p>helloworld</p>"],
|
||||
[
|
||||
"ignore attributes",
|
||||
`<p id="ignored"><b id="ignored">hello</b>world</p>`,
|
||||
"<p>helloworld</p>"
|
||||
],
|
||||
[
|
||||
"ignore empty tags",
|
||||
"<div>helloworld</div><p></p>",
|
||||
"<div>helloworld</div>"
|
||||
],
|
||||
["ignore br", "<p>hello<br/>world</p><p><br/><br/></p>", "<p>helloworld</p>"],
|
||||
[
|
||||
"image with same src",
|
||||
`<img src="./img.jpeg" />`,
|
||||
`<img id="hello" class="diff" src="./img.jpeg" />`
|
||||
],
|
||||
[
|
||||
"link with same href",
|
||||
`<a href="google.com" />`,
|
||||
`<a id="hello" class="diff" href="google.com" />`
|
||||
]
|
||||
];
|
||||
|
||||
describe("pairs should be equal", () => {
|
||||
test.each(equalPairs)("%s", (_id, one, two) => {
|
||||
expect(isHTMLEqual(one, two)).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
const inequalPairs = [
|
||||
[
|
||||
"textual difference",
|
||||
`<div>hello \n\n\n\n\nworld</div>\n\n\n\n\n\n`,
|
||||
`<div>hello</div>`
|
||||
],
|
||||
[
|
||||
"image with different src",
|
||||
`<img src="./img.jpeg" />`,
|
||||
`<img id="hello" class="diff" src="./img.png" />`
|
||||
],
|
||||
[
|
||||
"link with different href",
|
||||
`<a href="brave.com" />`,
|
||||
`<a id="hello" class="diff" href="google.com" />`
|
||||
],
|
||||
["non-string", {}, {}]
|
||||
];
|
||||
|
||||
describe("pairs should not be equal", () => {
|
||||
test.each(inequalPairs)("%s", (_id, one, two) => {
|
||||
expect(isHTMLEqual(one, two)).toBe(false);
|
||||
});
|
||||
});
|
||||
41
packages/core/utils/html-diff.js
Normal file
41
packages/core/utils/html-diff.js
Normal file
@@ -0,0 +1,41 @@
|
||||
/*
|
||||
This file is part of the Notesnook project (https://notesnook.com/)
|
||||
|
||||
Copyright (C) 2022 Streetwriters (Private) Limited
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
import { Parser } from "htmlparser2";
|
||||
|
||||
const ALLOWED_ATTRIBUTES = ["href", "src"];
|
||||
|
||||
export function isHTMLEqual(one, two) {
|
||||
if (typeof one !== "string" || typeof two !== "string") return false;
|
||||
|
||||
return toDiffable(one) === toDiffable(two);
|
||||
}
|
||||
|
||||
function toDiffable(html) {
|
||||
let text = "";
|
||||
const parser = new Parser({
|
||||
ontext: (data) => (text += data.trim()),
|
||||
onattribute: (name, value) => {
|
||||
if (ALLOWED_ATTRIBUTES.includes(name)) text += value.trim();
|
||||
}
|
||||
});
|
||||
parser.write(html);
|
||||
parser.end();
|
||||
return text;
|
||||
}
|
||||
Reference in New Issue
Block a user