import cluster from 'set-clustering';
import leven from 'leven';

const STOP_WORDS = ['the', 'an', 'a'];

/**
 * Find suggested merges based on fuzzy matching.
 *
 * @param {Array<[Object, string]>} data - Array of tuples - the object to group and the string to group it by
 * @returns {Array<Array<Object>>} - Returns an array of groups of the objects that were passed in, excluding groups with a count of 1
 */
export default function findSuggestedMerges(data) {
  return cluster(
    data.map(([item, string]) => [item, cleanLabel(string)]),
    similarity
  )
    .similarGroups(0.76)
    .map(group => group.map(([item]) => item))
    .filter(group => group.length > 1);
}

function cleanLabel(label) {
  return label
    .trim()
    .toLowerCase()
    .replace(/[^\w\s]/g, '')
    .split(' ')
    .filter(word => !STOP_WORDS.includes(word.trim().toLowerCase()))
    .join(' ');
}

function similarity([, x], [, y]) {
  return 1 - leven(x, y) / Math.max(x.length, y.length);
}
