mirror of
https://github.com/streetwriters/notesnook.git
synced 2025-12-25 16:09:42 +01:00
893 lines
28 KiB
JavaScript
893 lines
28 KiB
JavaScript
/* eslint-disable no-restricted-globals */
|
|
/*
|
|
* htmldiff.js is a library that compares HTML content. It creates a diff between two
|
|
* HTML documents by combining the two documents and wrapping the differences with
|
|
* <ins> and <del> tags. Here is a high-level overview of how the diff works.
|
|
*
|
|
* 1. Tokenize the before and after HTML with html_to_tokens.
|
|
* 2. Generate a list of operations that convert the before list of tokens to the after
|
|
* list of tokens with calculate_operations, which does the following:
|
|
* a. Find all the matching blocks of tokens between the before and after lists of
|
|
* tokens with find_matching_blocks. This is done by finding the single longest
|
|
* matching block with find_match, then recursively finding the next longest
|
|
* matching block that precede and follow the longest matching block with
|
|
* recursively_find_matching_blocks.
|
|
* b. Determine insertions, deletions, and replacements from the matching blocks.
|
|
* This is done in calculate_operations.
|
|
* 3. Render the list of operations by wrapping tokens with <ins> and <del> tags where
|
|
* appropriate with render_operations.
|
|
*
|
|
* Example usage:
|
|
*
|
|
* var htmldiff = require('htmldiff.js');
|
|
*
|
|
* htmldiff('<p>this is some text</p>', '<p>this is some more text</p>')
|
|
* == '<p>this is some <ins>more </ins>text</p>'
|
|
*
|
|
* htmldiff('<p>this is some text</p>', '<p>this is some more text</p>', 'diff-class')
|
|
* == '<p>this is some <ins class="diff-class">more </ins>text</p>'
|
|
*/
|
|
|
|
function is_start_of_tag(char) {
|
|
return char === "<";
|
|
}
|
|
const IS_CLOSE_TAG_REGEX = /^\s*<\s*\/[^>]+>\s*$/;
|
|
function is_close_tag(tag) {
|
|
return IS_CLOSE_TAG_REGEX.test(tag);
|
|
}
|
|
|
|
const IS_TAG_REGEX = /^\s*<[^>]+>\s*$/;
|
|
function is_tag(token) {
|
|
return IS_TAG_REGEX.test(token);
|
|
}
|
|
|
|
function isnt_tag(token) {
|
|
return !is_tag(token);
|
|
}
|
|
|
|
/*
|
|
* Checks if the current word is the beginning of an atomic tag. An atomic tag is one whose
|
|
* child nodes should not be compared - the entire tag should be treated as one token. This
|
|
* is useful for tags where it does not make sense to insert <ins> and <del> tags.
|
|
*
|
|
* @param {string} word The characters of the current token read so far.
|
|
*
|
|
* @return {string|null} The name of the atomic tag if the word will be an atomic tag,
|
|
* null otherwise
|
|
*/
|
|
const IS_START_ATOMIC_TAG = /^<(iframe|object|math|svg|script)/;
|
|
function is_start_of_atomic_tag(word) {
|
|
var result = IS_START_ATOMIC_TAG.exec(word);
|
|
if (result) {
|
|
result = result[1];
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* Checks if a tag is a void tag.
|
|
*
|
|
* @param {string} token The token to check.
|
|
*
|
|
* @return {boolean} True if the token is a void tag, false otherwise.
|
|
*/
|
|
const IS_VOID_TAG_REGEX = /^\s*<[^>]+\/>\s*$/;
|
|
function is_void_tag(token) {
|
|
return IS_VOID_TAG_REGEX.test(token);
|
|
}
|
|
|
|
/*
|
|
* Checks if a token can be wrapped inside a tag.
|
|
*
|
|
* @param {string} token The token to check.
|
|
*
|
|
* @return {boolean} True if the token can be wrapped inside a tag, false otherwise.
|
|
*/
|
|
function is_wrappable(token) {
|
|
return isnt_tag(token) || is_start_of_atomic_tag(token) || is_void_tag(token);
|
|
}
|
|
|
|
/*
|
|
* A Match stores the information of a matching block. A matching block is a list of
|
|
* consecutive tokens that appear in both the before and after lists of tokens.
|
|
*
|
|
* @param {number} start_in_before The index of the first token in the list of before tokens.
|
|
* @param {number} start_in_after The index of the first token in the list of after tokens.
|
|
* @param {number} length The number of consecutive matching tokens in this block.
|
|
*/
|
|
function Match(start_in_before, start_in_after, length) {
|
|
this.start_in_before = start_in_before;
|
|
this.start_in_after = start_in_after;
|
|
this.length = length;
|
|
this.end_in_before = this.start_in_before + this.length - 1;
|
|
this.end_in_after = this.start_in_after + this.length - 1;
|
|
}
|
|
|
|
function return_dual_pane(before, after) {
|
|
return {
|
|
before: before,
|
|
after: after,
|
|
};
|
|
}
|
|
|
|
/*
|
|
* Merge tokens in component by regex rule.
|
|
*
|
|
* @param {find_regex} dictate witch tag we want to merge.
|
|
* @param {words Array.<string>} input data already parsed array(html tags) by html_tokens method.
|
|
* @return {Array.<string>} The list of tokens with merged components requested by component_search_role.
|
|
*/
|
|
function merge_html_by_class(find_regex, words) {
|
|
var merged_words_array = [],
|
|
merged_words_to_string = "",
|
|
count_open_tags = 0;
|
|
|
|
for (var i = 0; i < words.length; i++) {
|
|
var tag_part = words[i];
|
|
//merge html tags until parent tag is not closed (also ignore self closed html tags)
|
|
if (count_open_tags > 0 || find_regex.test(tag_part)) {
|
|
if (
|
|
is_start_of_tag(tag_part.slice(0, 1)) &&
|
|
!is_void_tag(tag_part) &&
|
|
!is_start_of_atomic_tag(tag_part)
|
|
) {
|
|
count_open_tags += is_close_tag(tag_part) ? -1 : 1;
|
|
}
|
|
//combine all html string from html tokens while all condition are true
|
|
merged_words_to_string += tag_part;
|
|
//after parent tag is closed push as one html token
|
|
if (count_open_tags === 0) {
|
|
merged_words_array.push(merged_words_to_string);
|
|
merged_words_to_string = "";
|
|
}
|
|
} else {
|
|
//all tags that are not part of components we wont to merge just ignore
|
|
merged_words_array.push(tag_part);
|
|
}
|
|
}
|
|
return merged_words_array;
|
|
}
|
|
|
|
/*
|
|
* Tokenizes a string of HTML.
|
|
*
|
|
* @param {string} html The string to tokenize.
|
|
*
|
|
* @return {Array.<string>} The list of tokens.
|
|
*/
|
|
|
|
function html_to_tokens(html, components_search_rule) {
|
|
let words = [];
|
|
const range = {
|
|
start: 0,
|
|
end: 0,
|
|
tagOpen: false,
|
|
whitespaceOpen: false,
|
|
ampersandOpen: false,
|
|
};
|
|
for (var i = 0; i < html.length; ++i) {
|
|
const s = html[i];
|
|
|
|
if (s === " " || s === "\n" || s === "\r" || s === "\t") {
|
|
if (range.tagOpen || range.whitespaceOpen) continue;
|
|
|
|
//range.end = i;
|
|
if (range.start !== i) words.push(html.slice(range.start, i));
|
|
|
|
range.whitespaceOpen = true;
|
|
range.start = i;
|
|
} else if (range.whitespaceOpen) {
|
|
range.whitespaceOpen = false;
|
|
// range.end = i;
|
|
words.push(html.slice(range.start, i));
|
|
range.start = i;
|
|
}
|
|
|
|
if (s === "<") {
|
|
range.tagOpen = true;
|
|
|
|
if (range.start !== i) {
|
|
words.push(html.slice(range.start, i));
|
|
}
|
|
|
|
range.start = i;
|
|
} else if (s === ">") {
|
|
range.tagOpen = false;
|
|
words.push(html.slice(range.start, i + 1));
|
|
range.start = i + 1;
|
|
} else if (!range.ampersandOpen && s === "&") {
|
|
range.ampersandOpen = true;
|
|
if (range.start !== i) words.push(html.slice(range.start, i));
|
|
range.start = i;
|
|
} else if (range.ampersandOpen && s === ";") {
|
|
range.ampersandOpen = false;
|
|
words.push(html.slice(range.start, i + 1));
|
|
range.start = i + 1;
|
|
}
|
|
}
|
|
if (range.start !== html.length) {
|
|
words.push(html.slice(range.start, html.length));
|
|
}
|
|
|
|
return components_search_rule
|
|
? merge_html_by_class(components_search_rule, words)
|
|
: words;
|
|
}
|
|
|
|
/*
|
|
* Creates a key that should be used to match tokens. This is useful, for example, if we want
|
|
* to consider two open tag tokens as equal, even if they don't have the same attributes. We
|
|
* use a key instead of overwriting the token because we may want to render the original string
|
|
* without losing the attributes.
|
|
*
|
|
* @param {string} token The token to create the key for.
|
|
*
|
|
* @return {string} The identifying key that should be used to match before and after tokens.
|
|
*/
|
|
function get_key_for_token(token) {
|
|
var tag_name = /<([^\s>]+)[\s>]/.exec(token);
|
|
if (tag_name) {
|
|
return "<" + tag_name[1].toLowerCase() + ">";
|
|
}
|
|
if (token) {
|
|
return token.replace(/(\s+| | )/g, " ");
|
|
}
|
|
return token;
|
|
}
|
|
|
|
/*
|
|
* Finds the matching block with the most consecutive tokens within the given range in the
|
|
* before and after lists of tokens.
|
|
*
|
|
* @param {Array.<string>} before_tokens The before list of tokens.
|
|
* @param {Array.<string>} after_tokens The after list of tokens.
|
|
* @param {Object} index_of_before_locations_in_after_tokens The index that is used to search
|
|
* for tokens in the after list.
|
|
* @param {number} start_in_before The beginning of the range in the list of before tokens.
|
|
* @param {number} end_in_before The end of the range in the list of before tokens.
|
|
* @param {number} start_in_after The beginning of the range in the list of after tokens.
|
|
* @param {number} end_in_after The end of the range in the list of after tokens.
|
|
*
|
|
* @return {Match} A Match that describes the best matching block in the given range.
|
|
*/
|
|
|
|
function find_match(
|
|
before_tokens,
|
|
after_tokens,
|
|
index_of_before_locations_in_after_tokens,
|
|
start_in_before,
|
|
end_in_before,
|
|
start_in_after,
|
|
end_in_after
|
|
) {
|
|
var best_match_in_before = start_in_before;
|
|
var best_match_in_after = start_in_after;
|
|
var best_match_length = 0;
|
|
var match_length_at = {};
|
|
|
|
for (
|
|
var index_in_before = start_in_before;
|
|
index_in_before < end_in_before;
|
|
index_in_before++
|
|
) {
|
|
var new_match_length_at = {};
|
|
var looking_for = get_key_for_token(before_tokens[index_in_before]);
|
|
|
|
var locations_in_after =
|
|
index_of_before_locations_in_after_tokens[looking_for];
|
|
|
|
for (var i = 0; i < locations_in_after.length; i++) {
|
|
var index_in_after = locations_in_after[i];
|
|
if (index_in_after < start_in_after) continue;
|
|
if (index_in_after >= end_in_after) break;
|
|
|
|
if (!match_length_at[index_in_after - 1]) {
|
|
match_length_at[index_in_after - 1] = 0;
|
|
}
|
|
var new_match_length = match_length_at[index_in_after - 1] + 1;
|
|
new_match_length_at[index_in_after] = new_match_length;
|
|
|
|
if (new_match_length > best_match_length) {
|
|
best_match_in_before = index_in_before - new_match_length + 1;
|
|
best_match_in_after = index_in_after - new_match_length + 1;
|
|
best_match_length = new_match_length;
|
|
}
|
|
}
|
|
|
|
match_length_at = new_match_length_at;
|
|
}
|
|
if (best_match_length !== 0) {
|
|
return new Match(
|
|
best_match_in_before,
|
|
best_match_in_after,
|
|
best_match_length
|
|
);
|
|
}
|
|
return null;
|
|
}
|
|
|
|
/*
|
|
* Finds all the matching blocks within the given range in the before and after lists of
|
|
* tokens. This function is called recursively to find the next best matches that precede
|
|
* and follow the first best match.
|
|
*
|
|
* @param {Array.<string>} before_tokens The before list of tokens.
|
|
* @param {Array.<string>} after_tokens The after list of tokens.
|
|
* @param {Object} index_of_before_locations_in_after_tokens The index that is used to search
|
|
* for tokens in the after list.
|
|
* @param {number} start_in_before The beginning of the range in the list of before tokens.
|
|
* @param {number} end_in_before The end of the range in the list of before tokens.
|
|
* @param {number} start_in_after The beginning of the range in the list of after tokens.
|
|
* @param {number} end_in_after The end of the range in the list of after tokens.
|
|
* @param {Array.<Match>} matching_blocks The list of matching blocks found so far.
|
|
*
|
|
* @return {Array.<Match>} The list of matching blocks in this range.
|
|
*/
|
|
function recursively_find_matching_blocks(
|
|
before_tokens,
|
|
after_tokens,
|
|
index_of_before_locations_in_after_tokens,
|
|
start_in_before,
|
|
end_in_before,
|
|
start_in_after,
|
|
end_in_after,
|
|
matching_blocks
|
|
) {
|
|
var match = find_match(
|
|
before_tokens,
|
|
after_tokens,
|
|
index_of_before_locations_in_after_tokens,
|
|
start_in_before,
|
|
end_in_before,
|
|
start_in_after,
|
|
end_in_after
|
|
);
|
|
if (match) {
|
|
if (
|
|
start_in_before < match.start_in_before &&
|
|
start_in_after < match.start_in_after
|
|
) {
|
|
recursively_find_matching_blocks(
|
|
before_tokens,
|
|
after_tokens,
|
|
index_of_before_locations_in_after_tokens,
|
|
start_in_before,
|
|
match.start_in_before,
|
|
start_in_after,
|
|
match.start_in_after,
|
|
matching_blocks
|
|
);
|
|
}
|
|
matching_blocks.push(match);
|
|
if (
|
|
match.end_in_before <= end_in_before &&
|
|
match.end_in_after <= end_in_after
|
|
) {
|
|
recursively_find_matching_blocks(
|
|
before_tokens,
|
|
after_tokens,
|
|
index_of_before_locations_in_after_tokens,
|
|
match.end_in_before + 1,
|
|
end_in_before,
|
|
match.end_in_after + 1,
|
|
end_in_after,
|
|
matching_blocks
|
|
);
|
|
}
|
|
}
|
|
return matching_blocks;
|
|
}
|
|
|
|
/*
|
|
* Creates an index (A.K.A. hash table) that will be used to match the list of before
|
|
* tokens with the list of after tokens.
|
|
*
|
|
* @param {Object} options An object with the following:
|
|
* - {Array.<string>} find_these The list of tokens that will be used to search.
|
|
* - {Array.<string>} in_these The list of tokens that will be returned.
|
|
*
|
|
* @return {Object} An index that can be used to search for tokens.
|
|
*/
|
|
function create_index(options) {
|
|
if (!options.find_these) {
|
|
throw new Error("params must have find_these key");
|
|
}
|
|
if (!options.in_these) {
|
|
throw new Error("params must have in_these key");
|
|
}
|
|
var queries = options.find_these.map(function (token) {
|
|
return get_key_for_token(token);
|
|
});
|
|
var results = options.in_these.map(function (token) {
|
|
return get_key_for_token(token);
|
|
});
|
|
var index = {};
|
|
for (var i = 0; i < queries.length; i++) {
|
|
var query = queries[i];
|
|
index[query] = [];
|
|
}
|
|
|
|
results.forEach((item, i) => {
|
|
const array = index[item];
|
|
if (array) array.push(i);
|
|
});
|
|
return index;
|
|
}
|
|
|
|
/*
|
|
* Finds all the matching blocks in the before and after lists of tokens. This function
|
|
* is a wrapper for the recursive function recursively_find_matching_blocks.
|
|
*
|
|
* @param {Array.<string>} before_tokens The before list of tokens.
|
|
* @param {Array.<string>} after_tokens The after list of tokens.
|
|
*
|
|
* @return {Array.<Match>} The list of matching blocks.
|
|
*/
|
|
function find_matching_blocks(before_tokens, after_tokens) {
|
|
var matching_blocks = [];
|
|
|
|
var index_of_before_locations_in_after_tokens = create_index({
|
|
find_these: before_tokens,
|
|
in_these: after_tokens,
|
|
});
|
|
|
|
return recursively_find_matching_blocks(
|
|
before_tokens,
|
|
after_tokens,
|
|
index_of_before_locations_in_after_tokens,
|
|
0,
|
|
before_tokens.length,
|
|
0,
|
|
after_tokens.length,
|
|
matching_blocks
|
|
);
|
|
}
|
|
|
|
/*
|
|
* Gets a list of operations required to transform the before list of tokens into the
|
|
* after list of tokens. An operation describes whether a particular list of consecutive
|
|
* tokens are equal, replaced, inserted, or deleted.
|
|
*
|
|
* @param {Array.<string>} before_tokens The before list of tokens.
|
|
* @param {Array.<string>} after_tokens The after list of tokens.
|
|
*
|
|
* @return {Array.<Object>} The list of operations to transform the before list of
|
|
* tokens into the after list of tokens, where each operation has the following
|
|
* keys:
|
|
* - {string} action One of {'replace', 'insert', 'delete', 'equal'}.
|
|
* - {number} start_in_before The beginning of the range in the list of before tokens.
|
|
* - {number} end_in_before The end of the range in the list of before tokens.
|
|
* - {number} start_in_after The beginning of the range in the list of after tokens.
|
|
* - {number} end_in_after The end of the range in the list of after tokens.
|
|
*/
|
|
function calculate_operations(before_tokens, after_tokens) {
|
|
if (!before_tokens) {
|
|
throw new Error("before_tokens?");
|
|
}
|
|
if (!after_tokens) {
|
|
throw new Error("after_tokens?");
|
|
}
|
|
|
|
var position_in_before = 0;
|
|
var position_in_after = 0;
|
|
var operations = [];
|
|
var action_map = {
|
|
"false,false": "replace",
|
|
"true,false": "insert",
|
|
"false,true": "delete",
|
|
"true,true": "none",
|
|
};
|
|
var matches = find_matching_blocks(before_tokens, after_tokens);
|
|
matches.push(new Match(before_tokens.length, after_tokens.length, 0));
|
|
|
|
for (var index = 0; index < matches.length; index++) {
|
|
var match = matches[index];
|
|
var match_starts_at_current_position_in_before =
|
|
position_in_before === match.start_in_before;
|
|
var match_starts_at_current_position_in_after =
|
|
position_in_after === match.start_in_after;
|
|
var action_up_to_match_positions =
|
|
action_map[
|
|
[
|
|
match_starts_at_current_position_in_before,
|
|
match_starts_at_current_position_in_after,
|
|
].toString()
|
|
];
|
|
if (action_up_to_match_positions !== "none") {
|
|
operations.push({
|
|
action: action_up_to_match_positions,
|
|
start_in_before: position_in_before,
|
|
end_in_before:
|
|
action_up_to_match_positions !== "insert"
|
|
? match.start_in_before - 1
|
|
: void 0,
|
|
start_in_after: position_in_after,
|
|
end_in_after:
|
|
action_up_to_match_positions !== "delete"
|
|
? match.start_in_after - 1
|
|
: void 0,
|
|
});
|
|
}
|
|
if (match.length !== 0) {
|
|
operations.push({
|
|
action: "equal",
|
|
start_in_before: match.start_in_before,
|
|
end_in_before: match.end_in_before,
|
|
start_in_after: match.start_in_after,
|
|
end_in_after: match.end_in_after,
|
|
});
|
|
}
|
|
position_in_before = match.end_in_before + 1;
|
|
position_in_after = match.end_in_after + 1;
|
|
}
|
|
|
|
var post_processed = [];
|
|
var last_op = {
|
|
action: "none",
|
|
};
|
|
|
|
function is_single_whitespace(op) {
|
|
if (op.action !== "equal") {
|
|
return false;
|
|
}
|
|
if (op.end_in_before - op.start_in_before !== 0) {
|
|
return false;
|
|
}
|
|
return /^\s$/.test(
|
|
before_tokens.slice(op.start_in_before, op.end_in_before + 1)
|
|
);
|
|
}
|
|
|
|
for (var i = 0; i < operations.length; i++) {
|
|
var op = operations[i];
|
|
|
|
if (
|
|
(is_single_whitespace(op) && last_op.action === "replace") ||
|
|
(op.action === "replace" && last_op.action === "replace")
|
|
) {
|
|
last_op.end_in_before = op.end_in_before;
|
|
last_op.end_in_after = op.end_in_after;
|
|
} else {
|
|
post_processed.push(op);
|
|
last_op = op;
|
|
}
|
|
}
|
|
return post_processed;
|
|
}
|
|
|
|
/*
|
|
* Returns a list of tokens of a particular type starting at a given index.
|
|
*
|
|
* @param {number} start The index of first token to test.
|
|
* @param {Array.<string>} content The list of tokens.
|
|
* @param {function} predicate A function that returns true if a token is of
|
|
* a particular type, false otherwise. It should accept the following
|
|
* parameters:
|
|
* - {string} The token to test.
|
|
*/
|
|
function consecutive_where(start, content, predicate) {
|
|
content = content.slice(start, content.length + 1);
|
|
var last_matching_index = null;
|
|
|
|
for (var index = 0; index < content.length; index++) {
|
|
var token = content[index];
|
|
var answer = predicate(token);
|
|
|
|
if (answer === true) {
|
|
last_matching_index = index;
|
|
}
|
|
if (answer === false) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (last_matching_index !== null) {
|
|
return content.slice(0, last_matching_index + 1);
|
|
}
|
|
return [];
|
|
}
|
|
|
|
/*
|
|
* Wraps and concatenates a list of tokens with a tag. Does not wrap tag tokens,
|
|
* unless they are wrappable (i.e. void and atomic tags).
|
|
*
|
|
* @param {sting} tag The tag name of the wrapper tags.
|
|
* @param {Array.<string>} content The list of tokens to wrap.
|
|
* @param {string} class_name (Optional) The class name to include in the wrapper tag.
|
|
*/
|
|
function wrap(tag, content, class_name) {
|
|
var rendering = "";
|
|
var position = 0;
|
|
var length = content.length;
|
|
|
|
while (true) {
|
|
if (position >= length) break;
|
|
var non_tags = consecutive_where(position, content, is_wrappable);
|
|
position += non_tags.length;
|
|
if (non_tags.length !== 0) {
|
|
var val = non_tags.join("");
|
|
if (val.trim()) {
|
|
rendering += `<span class="diff-${tag} ${
|
|
class_name || ""
|
|
}">${val}</span>`;
|
|
}
|
|
}
|
|
|
|
if (position >= length) break;
|
|
var tags = consecutive_where(position, content, is_tag);
|
|
position += tags.length;
|
|
rendering += tags.join("");
|
|
}
|
|
return rendering;
|
|
}
|
|
|
|
/*
|
|
* op_map.equal/insert/delete/replace are functions that render an operation into
|
|
* HTML content.
|
|
*
|
|
* @param {Object} op The operation that applies to a prticular list of tokens. Has the
|
|
* following keys:
|
|
* - {string} action One of {'replace', 'insert', 'delete', 'equal'}.
|
|
* - {number} start_in_before The beginning of the range in the list of before tokens.
|
|
* - {number} end_in_before The end of the range in the list of before tokens.
|
|
* - {number} start_in_after The beginning of the range in the list of after tokens.
|
|
* - {number} end_in_after The end of the range in the list of after tokens.
|
|
* @param {Array.<string>} before_tokens The before list of tokens.
|
|
* @param {Array.<string>} after_tokens The after list of tokens.
|
|
* @param {string} class_name (Optional) The class name to include in the wrapper tag.
|
|
*
|
|
* @return {string} The rendering of that operation.
|
|
*/
|
|
var op_map = {
|
|
equal: function (op, before_tokens, after_tokens, class_name) {
|
|
return after_tokens.slice(op.start_in_after, op.end_in_after + 1).join("");
|
|
},
|
|
insert: function (op, before_tokens, after_tokens, class_name) {
|
|
var val = after_tokens.slice(op.start_in_after, op.end_in_after + 1);
|
|
return wrap("ins", val, class_name);
|
|
},
|
|
delete: function (op, before_tokens, after_tokens, class_name) {
|
|
var val = before_tokens.slice(op.start_in_before, op.end_in_before + 1);
|
|
return wrap("del", val, class_name);
|
|
},
|
|
};
|
|
|
|
op_map.replace = function (op, before_tokens, after_tokens, class_name) {
|
|
return [
|
|
op_map["delete"](op, before_tokens, after_tokens, class_name),
|
|
op_map.insert(op, before_tokens, after_tokens, class_name),
|
|
];
|
|
};
|
|
|
|
/*
|
|
* Renders a list of operations into HTML content. The result is the combined version
|
|
* of the before and after tokens with the differences wrapped in tags.
|
|
*
|
|
* @param {Array.<string>} before_tokens The before list of tokens.
|
|
* @param {Array.<string>} after_tokens The after list of tokens.
|
|
* @param {Array.<Object>} operations The list of operations to transform the before
|
|
* list of tokens into the after list of tokens, where each operation has the
|
|
* following keys:
|
|
* - {string} action One of {'replace', 'insert', 'delete', 'equal'}.
|
|
* - {number} start_in_before The beginning of the range in the list of before tokens.
|
|
* - {number} end_in_before The end of the range in the list of before tokens.
|
|
* - {number} start_in_after The beginning of the range in the list of after tokens.
|
|
* - {number} end_in_after The end of the range in the list of after tokens.
|
|
* @param {string} class_name (Optional) The class name to include in the wrapper tag.
|
|
*
|
|
* @return {string} The rendering of the list of operations.
|
|
*/
|
|
function render_operations(
|
|
before_tokens,
|
|
after_tokens,
|
|
operations,
|
|
class_name
|
|
) {
|
|
var rendering = "";
|
|
for (var i = 0; i < operations.length; i++) {
|
|
var op = operations[i];
|
|
const result = op_map[op.action](
|
|
op,
|
|
before_tokens,
|
|
after_tokens,
|
|
class_name
|
|
);
|
|
if (op.action === "replace") {
|
|
rendering += result[0] + result[1];
|
|
} else {
|
|
rendering += result;
|
|
}
|
|
}
|
|
return rendering;
|
|
}
|
|
|
|
/**
|
|
* htmldiff thinks that atomic or component objects are equal if their tag names are the same, because of it <div class="slideshow"> === <div class="here"> for example
|
|
* this function compares them
|
|
*/
|
|
function render_equal_components(
|
|
before,
|
|
after,
|
|
ops,
|
|
class_name,
|
|
components_search_rule
|
|
) {
|
|
var i, j, r, attrs;
|
|
var op;
|
|
var amount;
|
|
var before_token, after_token;
|
|
var before_token_match, after_token_match;
|
|
for (i = 0; i < ops.length; i++) {
|
|
op = ops[i];
|
|
if (op.action === "equal") {
|
|
amount = op.end_in_before - op.start_in_before + 1; // +1 since the array is 0 based
|
|
for (j = 0; j < amount; j++) {
|
|
before_token = before[op.start_in_before + j];
|
|
after_token = after[op.start_in_after + j];
|
|
if (before_token !== after_token) {
|
|
before_token_match = components_search_rule.exec(before_token);
|
|
after_token_match = components_search_rule.exec(after_token);
|
|
if (before_token_match && after_token_match) {
|
|
if (
|
|
before_token_match[1].toLowerCase() ===
|
|
after_token_match[1].toLowerCase()
|
|
) {
|
|
// compare content of these two same components
|
|
r = diff(before_token, after_token, class_name);
|
|
after[op.start_in_after + j] = r;
|
|
} else {
|
|
// components are different, wrap before by 'del' and after by 'ins'
|
|
attrs = class_name ? ' class="' + class_name + '"' : "";
|
|
r =
|
|
"<del" +
|
|
attrs +
|
|
">" +
|
|
before_token +
|
|
"</del>" +
|
|
"<ins" +
|
|
attrs +
|
|
">" +
|
|
after_token +
|
|
"</ins>";
|
|
after[op.start_in_after + j] = r;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Compares two pieces of HTML content and returns the combined content with differences
|
|
* wrapped in <ins> and <del> tags.
|
|
*
|
|
* @param {string} before The HTML content before the changes.
|
|
* @param {string} after The HTML content after the changes.
|
|
* @param {string} class_name (Optional) The class attribute to include in <ins> and <del> tags.
|
|
*
|
|
* @return {string} The combined HTML content with differences wrapped in <ins> and <del> tags.
|
|
*/
|
|
function diff(before, after, class_name, components_search_rule) {
|
|
if (before === after) return before;
|
|
before = html_to_tokens(before, components_search_rule);
|
|
after = html_to_tokens(after, components_search_rule);
|
|
var ops = calculate_operations(before, after);
|
|
|
|
if (components_search_rule) {
|
|
render_equal_components(
|
|
before,
|
|
after,
|
|
ops,
|
|
class_name,
|
|
components_search_rule
|
|
);
|
|
}
|
|
|
|
return render_operations(before, after, ops, class_name);
|
|
}
|
|
|
|
function render_operations_dual_pane(before_tokens, after_tokens, operations) {
|
|
var after_render, before_render, next_block, op, _i, _len;
|
|
before_render = "";
|
|
after_render = "";
|
|
for (_i = 0, _len = operations.length; _i < _len; _i++) {
|
|
op = operations[_i];
|
|
next_block = op_map[op.action](op, before_tokens, after_tokens);
|
|
switch (op.action) {
|
|
case "equal":
|
|
before_render += next_block;
|
|
after_render += next_block;
|
|
break;
|
|
case "insert":
|
|
after_render += next_block;
|
|
break;
|
|
case "delete":
|
|
before_render += next_block;
|
|
break;
|
|
case "replace":
|
|
before_render += next_block[0];
|
|
after_render += next_block[1];
|
|
break;
|
|
default:
|
|
continue;
|
|
}
|
|
}
|
|
return return_dual_pane(before_render, after_render);
|
|
}
|
|
|
|
function diff_dual_pane(before, after) {
|
|
if (!before || !after) return { before, after };
|
|
|
|
var ops;
|
|
if (before === after) {
|
|
return return_dual_pane(before, after);
|
|
}
|
|
before = html_to_tokens(before);
|
|
after = html_to_tokens(after);
|
|
ops = calculate_operations(before, after);
|
|
return render_operations_dual_pane(before, after, ops);
|
|
}
|
|
|
|
diff.html_to_tokens = html_to_tokens;
|
|
diff.merge_html_by_class = merge_html_by_class;
|
|
diff.find_matching_blocks = find_matching_blocks;
|
|
|
|
find_matching_blocks.find_match = find_match;
|
|
find_matching_blocks.create_index = create_index;
|
|
find_matching_blocks.get_key_for_token = get_key_for_token;
|
|
|
|
diff.calculate_operations = calculate_operations;
|
|
diff.render_operations = render_operations;
|
|
diff.diff_dual_pane = diff_dual_pane;
|
|
|
|
diff.clean = function (html) {
|
|
const tokens = html_to_tokens(html);
|
|
let isDiffSpanOpen = false;
|
|
return tokens.reduce((result, token) => {
|
|
if (token.includes(`span class="diff-`)) {
|
|
isDiffSpanOpen = true;
|
|
return result;
|
|
} else if (isDiffSpanOpen && token.includes("</span>")) {
|
|
isDiffSpanOpen = false;
|
|
return result;
|
|
}
|
|
return result + token;
|
|
}, "");
|
|
};
|
|
|
|
if (self.document) {
|
|
self.htmlDiff = diff;
|
|
} else {
|
|
self.addEventListener("message", onMessage);
|
|
}
|
|
|
|
function onMessage(ev) {
|
|
const { type, data, messageId } = ev.data;
|
|
try {
|
|
switch (type) {
|
|
case "generate": {
|
|
const { before, after } = data;
|
|
const result = diff.diff_dual_pane(before, after);
|
|
sendMessage("generate", result, messageId);
|
|
break;
|
|
}
|
|
case "clean": {
|
|
const { html } = data;
|
|
const cleanHtml = diff.clean(html);
|
|
sendMessage("clean", cleanHtml, messageId);
|
|
break;
|
|
}
|
|
default:
|
|
return;
|
|
}
|
|
} catch (error) {
|
|
console.error("Crypto worker error:", error, messageId, type);
|
|
sendMessage(type, { error: error.message }, messageId);
|
|
}
|
|
}
|
|
|
|
function sendMessage(type, data, messageId) {
|
|
postMessage({ type, data, messageId });
|
|
}
|