/* eslint-disable no-restricted-globals */ /* * htmldiff.js is a library that compares HTML content. It creates a diff between two * HTML documents by combining the two documents and wrapping the differences with * and tags. Here is a high-level overview of how the diff works. * * 1. Tokenize the before and after HTML with html_to_tokens. * 2. Generate a list of operations that convert the before list of tokens to the after * list of tokens with calculate_operations, which does the following: * a. Find all the matching blocks of tokens between the before and after lists of * tokens with find_matching_blocks. This is done by finding the single longest * matching block with find_match, then recursively finding the next longest * matching block that precede and follow the longest matching block with * recursively_find_matching_blocks. * b. Determine insertions, deletions, and replacements from the matching blocks. * This is done in calculate_operations. * 3. Render the list of operations by wrapping tokens with and tags where * appropriate with render_operations. * * Example usage: * * var htmldiff = require('htmldiff.js'); * * htmldiff('
this is some text
', '
this is some more text
') * == '
this is some more text
' * * htmldiff('
this is some text
', '
this is some more text
', 'diff-class') * == '
this is some more text
' */ function is_start_of_tag(char) { return char === "<"; } const IS_CLOSE_TAG_REGEX = /^\s*<\s*\/[^>]+>\s*$/; function is_close_tag(tag) { return IS_CLOSE_TAG_REGEX.test(tag); } const IS_TAG_REGEX = /^\s*<[^>]+>\s*$/; function is_tag(token) { return IS_TAG_REGEX.test(token); } function isnt_tag(token) { return !is_tag(token); } /* * Checks if the current word is the beginning of an atomic tag. An atomic tag is one whose * child nodes should not be compared - the entire tag should be treated as one token. This * is useful for tags where it does not make sense to insert and tags. * * @param {string} word The characters of the current token read so far. * * @return {string|null} The name of the atomic tag if the word will be an atomic tag, * null otherwise */ const IS_START_ATOMIC_TAG = /^<(iframe|object|math|svg|script)/; function is_start_of_atomic_tag(word) { var result = IS_START_ATOMIC_TAG.exec(word); if (result) { result = result[1]; } return result; } /* * Checks if a tag is a void tag. * * @param {string} token The token to check. * * @return {boolean} True if the token is a void tag, false otherwise. */ const IS_VOID_TAG_REGEX = /^\s*<[^>]+\/>\s*$/; function is_void_tag(token) { return IS_VOID_TAG_REGEX.test(token); } /* * Checks if a token can be wrapped inside a tag. * * @param {string} token The token to check. * * @return {boolean} True if the token can be wrapped inside a tag, false otherwise. */ function is_wrappable(token) { return isnt_tag(token) || is_start_of_atomic_tag(token) || is_void_tag(token); } /* * A Match stores the information of a matching block. A matching block is a list of * consecutive tokens that appear in both the before and after lists of tokens. * * @param {number} start_in_before The index of the first token in the list of before tokens. * @param {number} start_in_after The index of the first token in the list of after tokens. * @param {number} length The number of consecutive matching tokens in this block. */ function Match(start_in_before, start_in_after, length) { this.start_in_before = start_in_before; this.start_in_after = start_in_after; this.length = length; this.end_in_before = this.start_in_before + this.length - 1; this.end_in_after = this.start_in_after + this.length - 1; } function return_dual_pane(before, after) { return { before: before, after: after, }; } /* * Merge tokens in component by regex rule. * * @param {find_regex} dictate witch tag we want to merge. * @param {words Array.} input data already parsed array(html tags) by html_tokens method. * @return {Array.} The list of tokens with merged components requested by component_search_role. */ function merge_html_by_class(find_regex, words) { var merged_words_array = [], merged_words_to_string = "", count_open_tags = 0; for (var i = 0; i < words.length; i++) { var tag_part = words[i]; //merge html tags until parent tag is not closed (also ignore self closed html tags) if (count_open_tags > 0 || find_regex.test(tag_part)) { if ( is_start_of_tag(tag_part.slice(0, 1)) && !is_void_tag(tag_part) && !is_start_of_atomic_tag(tag_part) ) { count_open_tags += is_close_tag(tag_part) ? -1 : 1; } //combine all html string from html tokens while all condition are true merged_words_to_string += tag_part; //after parent tag is closed push as one html token if (count_open_tags === 0) { merged_words_array.push(merged_words_to_string); merged_words_to_string = ""; } } else { //all tags that are not part of components we wont to merge just ignore merged_words_array.push(tag_part); } } return merged_words_array; } /* * Tokenizes a string of HTML. * * @param {string} html The string to tokenize. * * @return {Array.} The list of tokens. */ function html_to_tokens(html, components_search_rule) { let words = []; const range = { start: 0, end: 0, tagOpen: false, whitespaceOpen: false, ampersandOpen: false, }; for (var i = 0; i < html.length; ++i) { const s = html[i]; if (s === " " || s === "\n" || s === "\r" || s === "\t") { if (range.tagOpen || range.whitespaceOpen) continue; //range.end = i; if (range.start !== i) words.push(html.slice(range.start, i)); range.whitespaceOpen = true; range.start = i; } else if (range.whitespaceOpen) { range.whitespaceOpen = false; // range.end = i; words.push(html.slice(range.start, i)); range.start = i; } if (s === "<") { range.tagOpen = true; if (range.start !== i) { words.push(html.slice(range.start, i)); } range.start = i; } else if (s === ">") { range.tagOpen = false; words.push(html.slice(range.start, i + 1)); range.start = i + 1; } else if (!range.ampersandOpen && s === "&") { range.ampersandOpen = true; if (range.start !== i) words.push(html.slice(range.start, i)); range.start = i; } else if (range.ampersandOpen && s === ";") { range.ampersandOpen = false; words.push(html.slice(range.start, i + 1)); range.start = i + 1; } } if (range.start !== html.length) { words.push(html.slice(range.start, html.length)); } return components_search_rule ? merge_html_by_class(components_search_rule, words) : words; } /* * Creates a key that should be used to match tokens. This is useful, for example, if we want * to consider two open tag tokens as equal, even if they don't have the same attributes. We * use a key instead of overwriting the token because we may want to render the original string * without losing the attributes. * * @param {string} token The token to create the key for. * * @return {string} The identifying key that should be used to match before and after tokens. */ function get_key_for_token(token) { var tag_name = /<([^\s>]+)[\s>]/.exec(token); if (tag_name) { return "<" + tag_name[1].toLowerCase() + ">"; } if (token) { return token.replace(/(\s+| | )/g, " "); } return token; } /* * Finds the matching block with the most consecutive tokens within the given range in the * before and after lists of tokens. * * @param {Array.} before_tokens The before list of tokens. * @param {Array.} after_tokens The after list of tokens. * @param {Object} index_of_before_locations_in_after_tokens The index that is used to search * for tokens in the after list. * @param {number} start_in_before The beginning of the range in the list of before tokens. * @param {number} end_in_before The end of the range in the list of before tokens. * @param {number} start_in_after The beginning of the range in the list of after tokens. * @param {number} end_in_after The end of the range in the list of after tokens. * * @return {Match} A Match that describes the best matching block in the given range. */ function find_match( before_tokens, after_tokens, index_of_before_locations_in_after_tokens, start_in_before, end_in_before, start_in_after, end_in_after ) { var best_match_in_before = start_in_before; var best_match_in_after = start_in_after; var best_match_length = 0; var match_length_at = {}; for ( var index_in_before = start_in_before; index_in_before < end_in_before; index_in_before++ ) { var new_match_length_at = {}; var looking_for = get_key_for_token(before_tokens[index_in_before]); var locations_in_after = index_of_before_locations_in_after_tokens[looking_for]; for (var i = 0; i < locations_in_after.length; i++) { var index_in_after = locations_in_after[i]; if (index_in_after < start_in_after) continue; if (index_in_after >= end_in_after) break; if (!match_length_at[index_in_after - 1]) { match_length_at[index_in_after - 1] = 0; } var new_match_length = match_length_at[index_in_after - 1] + 1; new_match_length_at[index_in_after] = new_match_length; if (new_match_length > best_match_length) { best_match_in_before = index_in_before - new_match_length + 1; best_match_in_after = index_in_after - new_match_length + 1; best_match_length = new_match_length; } } match_length_at = new_match_length_at; } if (best_match_length !== 0) { return new Match( best_match_in_before, best_match_in_after, best_match_length ); } return null; } /* * Finds all the matching blocks within the given range in the before and after lists of * tokens. This function is called recursively to find the next best matches that precede * and follow the first best match. * * @param {Array.} before_tokens The before list of tokens. * @param {Array.} after_tokens The after list of tokens. * @param {Object} index_of_before_locations_in_after_tokens The index that is used to search * for tokens in the after list. * @param {number} start_in_before The beginning of the range in the list of before tokens. * @param {number} end_in_before The end of the range in the list of before tokens. * @param {number} start_in_after The beginning of the range in the list of after tokens. * @param {number} end_in_after The end of the range in the list of after tokens. * @param {Array.} matching_blocks The list of matching blocks found so far. * * @return {Array.} The list of matching blocks in this range. */ function recursively_find_matching_blocks( before_tokens, after_tokens, index_of_before_locations_in_after_tokens, start_in_before, end_in_before, start_in_after, end_in_after, matching_blocks ) { var match = find_match( before_tokens, after_tokens, index_of_before_locations_in_after_tokens, start_in_before, end_in_before, start_in_after, end_in_after ); if (match) { if ( start_in_before < match.start_in_before && start_in_after < match.start_in_after ) { recursively_find_matching_blocks( before_tokens, after_tokens, index_of_before_locations_in_after_tokens, start_in_before, match.start_in_before, start_in_after, match.start_in_after, matching_blocks ); } matching_blocks.push(match); if ( match.end_in_before <= end_in_before && match.end_in_after <= end_in_after ) { recursively_find_matching_blocks( before_tokens, after_tokens, index_of_before_locations_in_after_tokens, match.end_in_before + 1, end_in_before, match.end_in_after + 1, end_in_after, matching_blocks ); } } return matching_blocks; } /* * Creates an index (A.K.A. hash table) that will be used to match the list of before * tokens with the list of after tokens. * * @param {Object} options An object with the following: * - {Array.} find_these The list of tokens that will be used to search. * - {Array.} in_these The list of tokens that will be returned. * * @return {Object} An index that can be used to search for tokens. */ function create_index(options) { if (!options.find_these) { throw new Error("params must have find_these key"); } if (!options.in_these) { throw new Error("params must have in_these key"); } var queries = options.find_these.map(function (token) { return get_key_for_token(token); }); var results = options.in_these.map(function (token) { return get_key_for_token(token); }); var index = {}; for (var i = 0; i < queries.length; i++) { var query = queries[i]; index[query] = []; } results.forEach((item, i) => { const array = index[item]; if (array) array.push(i); }); return index; } /* * Finds all the matching blocks in the before and after lists of tokens. This function * is a wrapper for the recursive function recursively_find_matching_blocks. * * @param {Array.} before_tokens The before list of tokens. * @param {Array.} after_tokens The after list of tokens. * * @return {Array.} The list of matching blocks. */ function find_matching_blocks(before_tokens, after_tokens) { var matching_blocks = []; var index_of_before_locations_in_after_tokens = create_index({ find_these: before_tokens, in_these: after_tokens, }); return recursively_find_matching_blocks( before_tokens, after_tokens, index_of_before_locations_in_after_tokens, 0, before_tokens.length, 0, after_tokens.length, matching_blocks ); } /* * Gets a list of operations required to transform the before list of tokens into the * after list of tokens. An operation describes whether a particular list of consecutive * tokens are equal, replaced, inserted, or deleted. * * @param {Array.} before_tokens The before list of tokens. * @param {Array.} after_tokens The after list of tokens. * * @return {Array.