Files
notesnook/packages/core/utils/html-diff.js
Abdullah Atta e1fc116994 core: improve content conflict detection using proper HTML diffing (#1183)
Since HTML is a tree-like language it is futile to compare it character
for character. `html1 === html2` is almost always false. This commit
introduces a simple diffing algorithm that only checks the text inside
the html + a few other attributes to decide whether the 2 HTMLs are
actually different or not. This is obviously not foolproof and it will
ignore everything aesthetic (b, em, strong tags etc.). This is actually
desireable because in our case only the text difference should
warrant a conflict. Everything else can easily be brought back.
Similarly, this also ignores whitespace differences surrouding the
tags.

All in all it'll provide a more reliable alternative to MD5 hashing the
2 HTMLs.
2022-10-13 19:22:32 +05:00

42 lines
1.2 KiB
JavaScript

/*
This file is part of the Notesnook project (https://notesnook.com/)
Copyright (C) 2022 Streetwriters (Private) Limited
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
import { Parser } from "htmlparser2";
const ALLOWED_ATTRIBUTES = ["href", "src"];
export function isHTMLEqual(one, two) {
if (typeof one !== "string" || typeof two !== "string") return false;
return toDiffable(one) === toDiffable(two);
}
function toDiffable(html) {
let text = "";
const parser = new Parser({
ontext: (data) => (text += data.trim()),
onattribute: (name, value) => {
if (ALLOWED_ATTRIBUTES.includes(name)) text += value.trim();
}
});
parser.write(html);
parser.end();
return text;
}