const unspace = (str: string) => str.replace(/\s\s+/g, " ").trim();

export const NGRAM_MIN_LENGTH = 3;
export const NGRAM_MAX_LENGTH = 10;

export const makeNGrams = (...inputs: string[]) => {
  // Takes one or more strings, and returns a list of NGrams
  // ensure entire string is added to list even if it wouldn't otherwise qualify (e.g. pass code "C")
  const ngrams = new Set<string>(
    inputs
      .filter((input) => input && input.length >= 1)
      .map((input) => unspace(input.toLowerCase().replace(/[^\w\s]+/g, "")))
  );
  inputs
    .filter((input) => input && input.length >= NGRAM_MIN_LENGTH)
    .forEach((input) => {
      const normalised = input.toLowerCase();
      const normalisedSet = new Set<string>();
      normalisedSet.add(unspace(normalised.replace(/[^\w\s]+/g, "")));
      normalisedSet.add(unspace(normalised.replace(/[^\w\s]+/g, " ")));
      normalisedSet.forEach((entry) => {
        const tokens = entry.split(/\s/);
        for (let i = 0; i < tokens.length; ++i) {
          const section = tokens.slice(i).join(" ");
          for (
            let j = NGRAM_MIN_LENGTH;
            j <= NGRAM_MAX_LENGTH && j <= section.length;
            ++j
          ) {
            ngrams.add(section.slice(0, j).replace(/\W+$/, ""));
          }
        }
      });
    });
  return Array.from(ngrams);
};

export const searchPrepare = (input: string) => {
  const trimmed = input.replace(/[^\w\s]+/g, "").trim();
  if (trimmed.match(/^CMJA\d{6}$/i)) {
    return trimmed.toUpperCase();
  }
  return trimmed.toLowerCase().slice(0, NGRAM_MAX_LENGTH);
};
