notesnook/apps/web/public/diff.worker.js

/* eslint-disable no-restricted-globals */
/*
 * htmldiff.js is a library that compares HTML content. It creates a diff between two
 * HTML documents by combining the two documents and wrapping the differences with
 * <ins> and <del> tags. Here is a high-level overview of how the diff works.
 *
 * 1. Tokenize the before and after HTML with html_to_tokens.
 * 2. Generate a list of operations that convert the before list of tokens to the after
 *    list of tokens with calculate_operations, which does the following:
 *      a. Find all the matching blocks of tokens between the before and after lists of
 *         tokens with find_matching_blocks. This is done by finding the single longest
 *         matching block with find_match, then recursively finding the next longest
 *         matching block that precede and follow the longest matching block with
 *         recursively_find_matching_blocks.
 *      b. Determine insertions, deletions, and replacements from the matching blocks.
 *         This is done in calculate_operations.
 * 3. Render the list of operations by wrapping tokens with <ins> and <del> tags where
 *    appropriate with render_operations.
 *
 * Example usage:
 *
 *   var htmldiff = require('htmldiff.js');
 *
 *   htmldiff('<p>this is some text</p>', '<p>this is some more text</p>')
 *   == '<p>this is some <ins>more </ins>text</p>'
 *
 *   htmldiff('<p>this is some text</p>', '<p>this is some more text</p>', 'diff-class')
 *   == '<p>this is some <ins class="diff-class">more </ins>text</p>'
 */

function is_start_of_tag(char) {
  return char === "<";
}
const IS_CLOSE_TAG_REGEX = /^\s*<\s*\/[^>]+>\s*$/;
function is_close_tag(tag) {
  return IS_CLOSE_TAG_REGEX.test(tag);
}

const IS_TAG_REGEX = /^\s*<[^>]+>\s*$/;
function is_tag(token) {
  return IS_TAG_REGEX.test(token);
}

function isnt_tag(token) {
  return !is_tag(token);
}

/*
 * Checks if the current word is the beginning of an atomic tag. An atomic tag is one whose
 * child nodes should not be compared - the entire tag should be treated as one token. This
 * is useful for tags where it does not make sense to insert <ins> and <del> tags.
 *
 * @param {string} word The characters of the current token read so far.
 *
 * @return {string|null} The name of the atomic tag if the word will be an atomic tag,
 *    null otherwise
 */
const IS_START_ATOMIC_TAG = /^<(iframe|object|math|svg|script)/;
function is_start_of_atomic_tag(word) {
  var result = IS_START_ATOMIC_TAG.exec(word);
  if (result) {
    result = result[1];
  }
  return result;
}

/*
 * Checks if a tag is a void tag.
 *
 * @param {string} token The token to check.
 *
 * @return {boolean} True if the token is a void tag, false otherwise.
 */
const IS_VOID_TAG_REGEX = /^\s*<[^>]+\/>\s*$/;
function is_void_tag(token) {
  return IS_VOID_TAG_REGEX.test(token);
}

/*
 * Checks if a token can be wrapped inside a tag.
 *
 * @param {string} token The token to check.
 *
 * @return {boolean} True if the token can be wrapped inside a tag, false otherwise.
 */
function is_wrappable(token) {
  return isnt_tag(token) || is_start_of_atomic_tag(token) || is_void_tag(token);
}

/*
 * A Match stores the information of a matching block. A matching block is a list of
 * consecutive tokens that appear in both the before and after lists of tokens.
 *
 * @param {number} start_in_before The index of the first token in the list of before tokens.
 * @param {number} start_in_after The index of the first token in the list of after tokens.
 * @param {number} length The number of consecutive matching tokens in this block.
 */
function Match(start_in_before, start_in_after, length) {
  this.start_in_before = start_in_before;
  this.start_in_after = start_in_after;
  this.length = length;
  this.end_in_before = this.start_in_before + this.length - 1;
  this.end_in_after = this.start_in_after + this.length - 1;
}

function return_dual_pane(before, after) {
  return {
    before: before,
    after: after,
  };
}

/*
 * Merge tokens in component by regex rule.
 *
 * @param {find_regex} dictate witch tag we want to merge.
 * @param {words Array.<string>} input data already  parsed array(html tags) by html_tokens method.
 * @return {Array.<string>} The list of tokens with merged components requested by component_search_role.
 */
function merge_html_by_class(find_regex, words) {
  var merged_words_array = [],
    merged_words_to_string = "",
    count_open_tags = 0;

  for (var i = 0; i < words.length; i++) {
    var tag_part = words[i];
    //merge html tags until parent tag is not closed (also ignore self closed html tags)
    if (count_open_tags > 0 || find_regex.test(tag_part)) {
      if (
        is_start_of_tag(tag_part.slice(0, 1)) &&
        !is_void_tag(tag_part) &&
        !is_start_of_atomic_tag(tag_part)
      ) {
        count_open_tags += is_close_tag(tag_part) ? -1 : 1;
      }
      //combine all html string from html tokens while all condition are true
      merged_words_to_string += tag_part;
      //after parent tag is closed push as one html token
      if (count_open_tags === 0) {
        merged_words_array.push(merged_words_to_string);
        merged_words_to_string = "";
      }
    } else {
      //all tags that are not part of components we wont to merge just ignore
      merged_words_array.push(tag_part);
    }
  }
  return merged_words_array;
}

/*
 * Tokenizes a string of HTML.
 *
 * @param {string} html The string to tokenize.
 *
 * @return {Array.<string>} The list of tokens.
 */

function html_to_tokens(html, components_search_rule) {
  let words = [];
  const range = {
    start: 0,
    end: 0,
    tagOpen: false,
    whitespaceOpen: false,
    ampersandOpen: false,
  };
  for (var i = 0; i < html.length; ++i) {
    const s = html[i];

    if (s === " " || s === "\n" || s === "\r" || s === "\t") {
      if (range.tagOpen || range.whitespaceOpen) continue;

      //range.end = i;
      if (range.start !== i) words.push(html.slice(range.start, i));

      range.whitespaceOpen = true;
      range.start = i;
    } else if (range.whitespaceOpen) {
      range.whitespaceOpen = false;
      // range.end = i;
      words.push(html.slice(range.start, i));
      range.start = i;
    }

    if (s === "<") {
      range.tagOpen = true;

      if (range.start !== i) {
        words.push(html.slice(range.start, i));
      }

      range.start = i;
    } else if (s === ">") {
      range.tagOpen = false;
      words.push(html.slice(range.start, i + 1));
      range.start = i + 1;
    } else if (!range.ampersandOpen && s === "&") {
      range.ampersandOpen = true;
      if (range.start !== i) words.push(html.slice(range.start, i));
      range.start = i;
    } else if (range.ampersandOpen && s === ";") {
      range.ampersandOpen = false;
      words.push(html.slice(range.start, i + 1));
      range.start = i + 1;
    }
  }
  if (range.start !== html.length) {
    words.push(html.slice(range.start, html.length));
  }

  return components_search_rule
    ? merge_html_by_class(components_search_rule, words)
    : words;
}

/*
 * Creates a key that should be used to match tokens. This is useful, for example, if we want
 * to consider two open tag tokens as equal, even if they don't have the same attributes. We
 * use a key instead of overwriting the token because we may want to render the original string
 * without losing the attributes.
 *
 * @param {string} token The token to create the key for.
 *
 * @return {string} The identifying key that should be used to match before and after tokens.
 */
function get_key_for_token(token) {
  var tag_name = /<([^\s>]+)[\s>]/.exec(token);
  if (tag_name) {
    return "<" + tag_name[1].toLowerCase() + ">";
  }
  if (token) {
    return token.replace(/(\s+|&nbsp;|&#160;)/g, " ");
  }
  return token;
}

/*
 * Finds the matching block with the most consecutive tokens within the given range in the
 * before and after lists of tokens.
 *
 * @param {Array.<string>} before_tokens The before list of tokens.
 * @param {Array.<string>} after_tokens The after list of tokens.
 * @param {Object} index_of_before_locations_in_after_tokens The index that is used to search
 *      for tokens in the after list.
 * @param {number} start_in_before The beginning of the range in the list of before tokens.
 * @param {number} end_in_before The end of the range in the list of before tokens.
 * @param {number} start_in_after The beginning of the range in the list of after tokens.
 * @param {number} end_in_after The end of the range in the list of after tokens.
 *
 * @return {Match} A Match that describes the best matching block in the given range.
 */

function find_match(
  before_tokens,
  after_tokens,
  index_of_before_locations_in_after_tokens,
  start_in_before,
  end_in_before,
  start_in_after,
  end_in_after
) {
  var best_match_in_before = start_in_before;
  var best_match_in_after = start_in_after;
  var best_match_length = 0;
  var match_length_at = {};

  for (
    var index_in_before = start_in_before;
    index_in_before < end_in_before;
    index_in_before++
  ) {
    var new_match_length_at = {};
    var looking_for = get_key_for_token(before_tokens[index_in_before]);

    var locations_in_after =
      index_of_before_locations_in_after_tokens[looking_for];

    for (var i = 0; i < locations_in_after.length; i++) {
      var index_in_after = locations_in_after[i];
      if (index_in_after < start_in_after) continue;
      if (index_in_after >= end_in_after) break;

      if (!match_length_at[index_in_after - 1]) {
        match_length_at[index_in_after - 1] = 0;
      }
      var new_match_length = match_length_at[index_in_after - 1] + 1;
      new_match_length_at[index_in_after] = new_match_length;

      if (new_match_length > best_match_length) {
        best_match_in_before = index_in_before - new_match_length + 1;
        best_match_in_after = index_in_after - new_match_length + 1;
        best_match_length = new_match_length;
      }
    }

    match_length_at = new_match_length_at;
  }
  if (best_match_length !== 0) {
    return new Match(
      best_match_in_before,
      best_match_in_after,
      best_match_length
    );
  }
  return null;
}

/*
 * Finds all the matching blocks within the given range in the before and after lists of
 * tokens. This function is called recursively to find the next best matches that precede
 * and follow the first best match.
 *
 * @param {Array.<string>} before_tokens The before list of tokens.
 * @param {Array.<string>} after_tokens The after list of tokens.
 * @param {Object} index_of_before_locations_in_after_tokens The index that is used to search
 *      for tokens in the after list.
 * @param {number} start_in_before The beginning of the range in the list of before tokens.
 * @param {number} end_in_before The end of the range in the list of before tokens.
 * @param {number} start_in_after The beginning of the range in the list of after tokens.
 * @param {number} end_in_after The end of the range in the list of after tokens.
 * @param {Array.<Match>} matching_blocks The list of matching blocks found so far.
 *
 * @return {Array.<Match>} The list of matching blocks in this range.
 */
function recursively_find_matching_blocks(
  before_tokens,
  after_tokens,
  index_of_before_locations_in_after_tokens,
  start_in_before,
  end_in_before,
  start_in_after,
  end_in_after,
  matching_blocks
) {
  var match = find_match(
    before_tokens,
    after_tokens,
    index_of_before_locations_in_after_tokens,
    start_in_before,
    end_in_before,
    start_in_after,
    end_in_after
  );
  if (match) {
    if (
      start_in_before < match.start_in_before &&
      start_in_after < match.start_in_after
    ) {
      recursively_find_matching_blocks(
        before_tokens,
        after_tokens,
        index_of_before_locations_in_after_tokens,
        start_in_before,
        match.start_in_before,
        start_in_after,
        match.start_in_after,
        matching_blocks
      );
    }
    matching_blocks.push(match);
    if (
      match.end_in_before <= end_in_before &&
      match.end_in_after <= end_in_after
    ) {
      recursively_find_matching_blocks(
        before_tokens,
        after_tokens,
        index_of_before_locations_in_after_tokens,
        match.end_in_before + 1,
        end_in_before,
        match.end_in_after + 1,
        end_in_after,
        matching_blocks
      );
    }
  }
  return matching_blocks;
}

/*
 * Creates an index (A.K.A. hash table) that will be used to match the list of before
 * tokens with the list of after tokens.
 *
 * @param {Object} options An object with the following:
 *    - {Array.<string>} find_these The list of tokens that will be used to search.
 *    - {Array.<string>} in_these The list of tokens that will be returned.
 *
 * @return {Object} An index that can be used to search for tokens.
 */
function create_index(options) {
  if (!options.find_these) {
    throw new Error("params must have find_these key");
  }
  if (!options.in_these) {
    throw new Error("params must have in_these key");
  }
  var queries = options.find_these.map(function (token) {
    return get_key_for_token(token);
  });
  var results = options.in_these.map(function (token) {
    return get_key_for_token(token);
  });
  var index = {};
  for (var i = 0; i < queries.length; i++) {
    var query = queries[i];
    index[query] = [];
  }

  results.forEach((item, i) => {
    const array = index[item];
    if (array) array.push(i);
  });
  return index;
}

/*
 * Finds all the matching blocks in the before and after lists of tokens. This function
 * is a wrapper for the recursive function recursively_find_matching_blocks.
 *
 * @param {Array.<string>} before_tokens The before list of tokens.
 * @param {Array.<string>} after_tokens The after list of tokens.
 *
 * @return {Array.<Match>} The list of matching blocks.
 */
function find_matching_blocks(before_tokens, after_tokens) {
  var matching_blocks = [];

  var index_of_before_locations_in_after_tokens = create_index({
    find_these: before_tokens,
    in_these: after_tokens,
  });

  return recursively_find_matching_blocks(
    before_tokens,
    after_tokens,
    index_of_before_locations_in_after_tokens,
    0,
    before_tokens.length,
    0,
    after_tokens.length,
    matching_blocks
  );
}

/*
 * Gets a list of operations required to transform the before list of tokens into the
 * after list of tokens. An operation describes whether a particular list of consecutive
 * tokens are equal, replaced, inserted, or deleted.
 *
 * @param {Array.<string>} before_tokens The before list of tokens.
 * @param {Array.<string>} after_tokens The after list of tokens.
 *
 * @return {Array.<Object>} The list of operations to transform the before list of
 *      tokens into the after list of tokens, where each operation has the following
 *      keys:
 *      - {string} action One of {'replace', 'insert', 'delete', 'equal'}.
 *      - {number} start_in_before The beginning of the range in the list of before tokens.
 *      - {number} end_in_before The end of the range in the list of before tokens.
 *      - {number} start_in_after The beginning of the range in the list of after tokens.
 *      - {number} end_in_after The end of the range in the list of after tokens.
 */
function calculate_operations(before_tokens, after_tokens) {
  if (!before_tokens) {
    throw new Error("before_tokens?");
  }
  if (!after_tokens) {
    throw new Error("after_tokens?");
  }

  var position_in_before = 0;
  var position_in_after = 0;
  var operations = [];
  var action_map = {
    "false,false": "replace",
    "true,false": "insert",
    "false,true": "delete",
    "true,true": "none",
  };
  var matches = find_matching_blocks(before_tokens, after_tokens);
  matches.push(new Match(before_tokens.length, after_tokens.length, 0));

  for (var index = 0; index < matches.length; index++) {
    var match = matches[index];
    var match_starts_at_current_position_in_before =
      position_in_before === match.start_in_before;
    var match_starts_at_current_position_in_after =
      position_in_after === match.start_in_after;
    var action_up_to_match_positions =
      action_map[
        [
          match_starts_at_current_position_in_before,
          match_starts_at_current_position_in_after,
        ].toString()
      ];
    if (action_up_to_match_positions !== "none") {
      operations.push({
        action: action_up_to_match_positions,
        start_in_before: position_in_before,
        end_in_before:
          action_up_to_match_positions !== "insert"
            ? match.start_in_before - 1
            : void 0,
        start_in_after: position_in_after,
        end_in_after:
          action_up_to_match_positions !== "delete"
            ? match.start_in_after - 1
            : void 0,
      });
    }
    if (match.length !== 0) {
      operations.push({
        action: "equal",
        start_in_before: match.start_in_before,
        end_in_before: match.end_in_before,
        start_in_after: match.start_in_after,
        end_in_after: match.end_in_after,
      });
    }
    position_in_before = match.end_in_before + 1;
    position_in_after = match.end_in_after + 1;
  }

  var post_processed = [];
  var last_op = {
    action: "none",
  };

  function is_single_whitespace(op) {
    if (op.action !== "equal") {
      return false;
    }
    if (op.end_in_before - op.start_in_before !== 0) {
      return false;
    }
    return /^\s$/.test(
      before_tokens.slice(op.start_in_before, op.end_in_before + 1)
    );
  }

  for (var i = 0; i < operations.length; i++) {
    var op = operations[i];

    if (
      (is_single_whitespace(op) && last_op.action === "replace") ||
      (op.action === "replace" && last_op.action === "replace")
    ) {
      last_op.end_in_before = op.end_in_before;
      last_op.end_in_after = op.end_in_after;
    } else {
      post_processed.push(op);
      last_op = op;
    }
  }
  return post_processed;
}

/*
 * Returns a list of tokens of a particular type starting at a given index.
 *
 * @param {number} start The index of first token to test.
 * @param {Array.<string>} content The list of tokens.
 * @param {function} predicate A function that returns true if a token is of
 *      a particular type, false otherwise. It should accept the following
 *      parameters:
 *      - {string} The token to test.
 */
function consecutive_where(start, content, predicate) {
  content = content.slice(start, content.length + 1);
  var last_matching_index = null;

  for (var index = 0; index < content.length; index++) {
    var token = content[index];
    var answer = predicate(token);

    if (answer === true) {
      last_matching_index = index;
    }
    if (answer === false) {
      break;
    }
  }

  if (last_matching_index !== null) {
    return content.slice(0, last_matching_index + 1);
  }
  return [];
}

/*
 * Wraps and concatenates a list of tokens with a tag. Does not wrap tag tokens,
 * unless they are wrappable (i.e. void and atomic tags).
 *
 * @param {sting} tag The tag name of the wrapper tags.
 * @param {Array.<string>} content The list of tokens to wrap.
 * @param {string} class_name (Optional) The class name to include in the wrapper tag.
 */
function wrap(tag, content, class_name) {
  var rendering = "";
  var position = 0;
  var length = content.length;

  while (true) {
    if (position >= length) break;
    var non_tags = consecutive_where(position, content, is_wrappable);
    position += non_tags.length;
    if (non_tags.length !== 0) {
      var val = non_tags.join("");
      if (val.trim()) {
        rendering += `<span class="diff-${tag} ${
          class_name || ""
        }">${val}</span>`;
      }
    }

    if (position >= length) break;
    var tags = consecutive_where(position, content, is_tag);
    position += tags.length;
    rendering += tags.join("");
  }
  return rendering;
}

/*
 * op_map.equal/insert/delete/replace are functions that render an operation into
 * HTML content.
 *
 * @param {Object} op The operation that applies to a prticular list of tokens. Has the
 *      following keys:
 *      - {string} action One of {'replace', 'insert', 'delete', 'equal'}.
 *      - {number} start_in_before The beginning of the range in the list of before tokens.
 *      - {number} end_in_before The end of the range in the list of before tokens.
 *      - {number} start_in_after The beginning of the range in the list of after tokens.
 *      - {number} end_in_after The end of the range in the list of after tokens.
 * @param {Array.<string>} before_tokens The before list of tokens.
 * @param {Array.<string>} after_tokens The after list of tokens.
 * @param {string} class_name (Optional) The class name to include in the wrapper tag.
 *
 * @return {string} The rendering of that operation.
 */
var op_map = {
  equal: function (op, before_tokens, after_tokens, class_name) {
    return after_tokens.slice(op.start_in_after, op.end_in_after + 1).join("");
  },
  insert: function (op, before_tokens, after_tokens, class_name) {
    var val = after_tokens.slice(op.start_in_after, op.end_in_after + 1);
    return wrap("ins", val, class_name);
  },
  delete: function (op, before_tokens, after_tokens, class_name) {
    var val = before_tokens.slice(op.start_in_before, op.end_in_before + 1);
    return wrap("del", val, class_name);
  },
};

op_map.replace = function (op, before_tokens, after_tokens, class_name) {
  return [
    op_map["delete"](op, before_tokens, after_tokens, class_name),
    op_map.insert(op, before_tokens, after_tokens, class_name),
  ];
};

/*
 * Renders a list of operations into HTML content. The result is the combined version
 * of the before and after tokens with the differences wrapped in tags.
 *
 * @param {Array.<string>} before_tokens The before list of tokens.
 * @param {Array.<string>} after_tokens The after list of tokens.
 * @param {Array.<Object>} operations The list of operations to transform the before
 *      list of tokens into the after list of tokens, where each operation has the
 *      following keys:
 *      - {string} action One of {'replace', 'insert', 'delete', 'equal'}.
 *      - {number} start_in_before The beginning of the range in the list of before tokens.
 *      - {number} end_in_before The end of the range in the list of before tokens.
 *      - {number} start_in_after The beginning of the range in the list of after tokens.
 *      - {number} end_in_after The end of the range in the list of after tokens.
 * @param {string} class_name (Optional) The class name to include in the wrapper tag.
 *
 * @return {string} The rendering of the list of operations.
 */
function render_operations(
  before_tokens,
  after_tokens,
  operations,
  class_name
) {
  var rendering = "";
  for (var i = 0; i < operations.length; i++) {
    var op = operations[i];
    const result = op_map[op.action](
      op,
      before_tokens,
      after_tokens,
      class_name
    );
    if (op.action === "replace") {
      rendering += result[0] + result[1];
    } else {
      rendering += result;
    }
  }
  return rendering;
}

/**
 * htmldiff thinks that atomic or component objects are equal if their tag names are the same, because of it <div class="slideshow"> === <div class="here"> for example
 * this function compares them
 */
function render_equal_components(
  before,
  after,
  ops,
  class_name,
  components_search_rule
) {
  var i, j, r, attrs;
  var op;
  var amount;
  var before_token, after_token;
  var before_token_match, after_token_match;
  for (i = 0; i < ops.length; i++) {
    op = ops[i];
    if (op.action === "equal") {
      amount = op.end_in_before - op.start_in_before + 1; // +1 since the array is 0 based
      for (j = 0; j < amount; j++) {
        before_token = before[op.start_in_before + j];
        after_token = after[op.start_in_after + j];
        if (before_token !== after_token) {
          before_token_match = components_search_rule.exec(before_token);
          after_token_match = components_search_rule.exec(after_token);
          if (before_token_match && after_token_match) {
            if (
              before_token_match[1].toLowerCase() ===
              after_token_match[1].toLowerCase()
            ) {
              // compare content of these two same components
              r = diff(before_token, after_token, class_name);
              after[op.start_in_after + j] = r;
            } else {
              // components are different, wrap before by 'del' and after by 'ins'
              attrs = class_name ? ' class="' + class_name + '"' : "";
              r =
                "<del" +
                attrs +
                ">" +
                before_token +
                "</del>" +
                "<ins" +
                attrs +
                ">" +
                after_token +
                "</ins>";
              after[op.start_in_after + j] = r;
            }
          }
        }
      }
    }
  }
}

/*
 * Compares two pieces of HTML content and returns the combined content with differences
 * wrapped in <ins> and <del> tags.
 *
 * @param {string} before The HTML content before the changes.
 * @param {string} after The HTML content after the changes.
 * @param {string} class_name (Optional) The class attribute to include in <ins> and <del> tags.
 *
 * @return {string} The combined HTML content with differences wrapped in <ins> and <del> tags.
 */
function diff(before, after, class_name, components_search_rule) {
  if (before === after) return before;
  before = html_to_tokens(before, components_search_rule);
  after = html_to_tokens(after, components_search_rule);
  var ops = calculate_operations(before, after);

  if (components_search_rule) {
    render_equal_components(
      before,
      after,
      ops,
      class_name,
      components_search_rule
    );
  }

  return render_operations(before, after, ops, class_name);
}

function render_operations_dual_pane(before_tokens, after_tokens, operations) {
  var after_render, before_render, next_block, op, _i, _len;
  before_render = "";
  after_render = "";
  for (_i = 0, _len = operations.length; _i < _len; _i++) {
    op = operations[_i];
    next_block = op_map[op.action](op, before_tokens, after_tokens);
    switch (op.action) {
      case "equal":
        before_render += next_block;
        after_render += next_block;
        break;
      case "insert":
        after_render += next_block;
        break;
      case "delete":
        before_render += next_block;
        break;
      case "replace":
        before_render += next_block[0];
        after_render += next_block[1];
        break;
      default:
        continue;
    }
  }
  return return_dual_pane(before_render, after_render);
}

function diff_dual_pane(before, after) {
  if (!before || !after) return { before, after };

  var ops;
  if (before === after) {
    return return_dual_pane(before, after);
  }
  before = html_to_tokens(before);
  after = html_to_tokens(after);
  ops = calculate_operations(before, after);
  return render_operations_dual_pane(before, after, ops);
}

diff.html_to_tokens = html_to_tokens;
diff.merge_html_by_class = merge_html_by_class;
diff.find_matching_blocks = find_matching_blocks;

find_matching_blocks.find_match = find_match;
find_matching_blocks.create_index = create_index;
find_matching_blocks.get_key_for_token = get_key_for_token;

diff.calculate_operations = calculate_operations;
diff.render_operations = render_operations;
diff.diff_dual_pane = diff_dual_pane;

diff.clean = function (html) {
  const tokens = html_to_tokens(html);
  let isDiffSpanOpen = false;
  return tokens.reduce((result, token) => {
    if (token.includes(`span class="diff-`)) {
      isDiffSpanOpen = true;
      return result;
    } else if (isDiffSpanOpen && token.includes("</span>")) {
      isDiffSpanOpen = false;
      return result;
    }
    return result + token;
  }, "");
};

if (self.document) {
  self.htmlDiff = diff;
} else {
  self.addEventListener("message", onMessage);
}

function onMessage(ev) {
  const { type, data, messageId } = ev.data;
  try {
    switch (type) {
      case "generate": {
        const { before, after } = data;
        const result = diff.diff_dual_pane(before, after);
        sendMessage("generate", result, messageId);
        break;
      }
      case "clean": {
        const { html } = data;
        const cleanHtml = diff.clean(html);
        sendMessage("clean", cleanHtml, messageId);
        break;
      }
      default:
        return;
    }
  } catch (error) {
    console.error("Crypto worker error:", error, messageId, type);
    sendMessage(type, { error: error.message }, messageId);
  }
}

function sendMessage(type, data, messageId) {
  postMessage({ type, data, messageId });
}