import { distance as damerauLevenshteinDistance } from 'damerau-levenshtein-js';
import range from 'lodash/range';
import sum from 'lodash/sum';
import slugify from 'slugify';
import analytics from 'src/analytics';
import { stateCodeNameMap } from 'src/utils/states';

export type GlideAddress = {
  street: string;
  unit?: string;
  city: string;
  state: string;
  zip_code: string;
  lat?: number | undefined;
  long?: number | undefined;
};
export type Address = {
  street: string;
  unit?: string;
  city: string;
  state: string;
  zipCode: string;
};

// FUnction that takes street, city, state and zipCode to form a single string
const addressComponentsToString = ({ street, city, state, zipCode }: Address) =>
  `${street || 'NO STREET'} ${city || 'NO CITY'}, ${state || 'NO STATE'} ${
    zipCode || 'NO ZIP CODE'
  }`;

export const parseStringAsAddress = (address?: string) => {
  const addressComponents = (address && address.split(',')) || [];
  if (addressComponents.length > 2) {
    const street = addressComponents[0];
    const city = addressComponents[1];
    const stateZip = addressComponents[2].trim();
    const stateZipParts = stateZip.split(' ');
    if (stateZipParts.length > 1) {
      const state = stateZipParts[0];
      const zipCode = stateZipParts[1];
      return {
        address,
        street,
        unit: '',
        city,
        state,
        zip_code: zipCode,
      };
    }
  }
  return {
    street: '',
    unit: '',
    city: '',
    state: '',
    zip_code: '',
  };
};

export const formatFirstLine = (address?: Address) => {
  if (!address) {
    return '';
  }
  const { street, unit = '' } = address;
  return `${street} ${unit}`.trim();
};

export const formatSecondLine = (address?: Address) => {
  if (!address) {
    return '';
  }
  const { city, state, zipCode } = address;
  return `${city}, ${state} ${zipCode}`.trim();
};

export const formatFullAddress = (address?: Address) => {
  if (!address) {
    return '';
  }
  const { street, unit = '', city, state, zipCode } = address;
  return [`${street} ${unit}`, city, `${state} ${zipCode}`]
    .map((a) => a.trim())
    .filter((a) => a)
    .join(', ');
};

// Basic string cleansing consist on removing duped blanks and turning string into lowercase slug
const stringCleansing = (str?: string) =>
  slugify((str || '').replace(/ +/g, ' '), {
    lower: true,
  });

// Function that takes a string and a list of replacement terms and outputs the sam string with every term in the its same form
const replacementsStringCleansing = (
  string: string,
  cleansingTerms: string[][]
) =>
  cleansingTerms.reduce(
    (cleansedString, termsGroup) =>
      termsGroup.reduce(
        (cs: string, term: string) =>
          cs.replace(new RegExp(`\\b${term}\\b`, 'g'), termsGroup[0]),
        cleansedString
      ),
    stringCleansing(string)
  );

const defaultStatesReplacements = stateCodeNameMap.map(([code, name]) => [
  code.toLowerCase(),
  slugify(name, {
    lower: true,
  }),
]);

// Default cleansing of state names - it takes a 2 letter acronym or full state name and turns it into the 2 letter acronym
const stateCleansing = (
  state: string,
  statesReplacements: string[][] = defaultStatesReplacements
) => replacementsStringCleansing(state, statesReplacements);

const getStreetNumberAndCleanName = (
  street: string,
  splChar: string = '-'
): [string | null, string] => {
  // If first word of street is comprised of only numeric characters, and the the string has more than 1 word, then it is assumed that the first word of the string is the street number, and the remainder of the string is the actual street
  if (
    !street.split(splChar, 1)[0].replace(/\d/g, '') &&
    street.split(splChar).length > 1
  ) {
    return [
      street.split(splChar, 1)[0],
      street.split(splChar).slice(1).join(splChar),
    ];
  }
  return [null, street];
};

// Source: https://pe.usps.com/text/pub28/28apc_002.htm
const defaultStreetCleansingTerms = [
  ['aly', 'allee', 'alley', 'ally'],
  ['anx', 'anex', 'annex', 'annx'],
  ['arc', 'arcade'],
  ['ave', 'av', 'aven', 'avenu', 'avenue', 'avn', 'avnue'],
  ['byu', 'bayoo', 'bayou'],
  ['bch', 'beach'],
  ['bnd', 'bend'],
  ['blf', 'bluf', 'bluff'],
  ['blfs', 'bluffs'],
  ['btm', 'bot', 'bottm', 'bottom'],
  ['blvd', 'boul', 'boulevard', 'boulv'],
  ['br', 'brnch', 'branch'],
  ['brg', 'brdge', 'bridge'],
  ['brk', 'brook'],
  ['brks', 'brooks'],
  ['bg', 'burg'],
  ['bgs', 'burgs'],
  ['byp', 'bypa', 'bypas', 'bypass', 'byps'],
  ['cp', 'camp', 'cmp'],
  ['cyn', 'canyn', 'canyon', 'cnyn'],
  ['cpe', 'cape'],
  ['cswy', 'causeway', 'causwa'],
  ['ctr', 'cen', 'cent', 'center', 'centr', 'centre', 'cnter', 'cntr'],
  ['ctrs', 'centers'],
  ['cir', 'circ', 'circl', 'circle', 'crcl', 'crcle'],
  ['cirs', 'circles'],
  ['clf', 'cliff'],
  ['clfs', 'cliffs'],
  ['clb', 'club'],
  ['cmn', 'common'],
  ['cmns', 'commons'],
  ['cor', 'corner'],
  ['cors', 'corners'],
  ['crse', 'course'],
  ['ct', 'court'],
  ['cts', 'courts'],
  ['cv', 'cove'],
  ['cvs', 'coves'],
  ['crk', 'creek'],
  ['cres', 'crescent', 'crsent', 'crsnt'],
  ['crst', 'crest'],
  ['xing', 'crossing', 'crssng'],
  ['xrd', 'crossroad'],
  ['xrds', 'crossroads'],
  ['curv', 'curve'],
  ['dl', 'dale'],
  ['dm', 'dam'],
  ['dv', 'div', 'divide', 'dvd'],
  ['dr', 'driv', 'drive', 'drv'],
  ['drs', 'drives'],
  ['est', 'estate'],
  ['ests', 'estates'],
  ['expy', 'exp', 'expr', 'express', 'expressway', 'expw'],
  ['ext', 'extension', 'extn', 'extnsn'],
  ['exts'],
  ['fall'],
  ['fls', 'falls'],
  ['fry', 'ferry', 'frry'],
  ['fld', 'field'],
  ['flds', 'fields'],
  ['flt', 'flat'],
  ['flts', 'flats'],
  ['frd', 'ford'],
  ['frds', 'fords'],
  ['frst', 'forest', 'forests'],
  ['frg', 'forg', 'forge'],
  ['frgs', 'forges'],
  ['frk', 'fork'],
  ['frks', 'forks'],
  ['ft', 'fort', 'frt'],
  ['fwy', 'freeway', 'freewy', 'frway', 'frwy'],
  ['gdn', 'garden', 'gardn', 'grden', 'grdn'],
  ['gdns', 'gardens', 'grdns'],
  ['gtwy', 'gateway', 'gatewy', 'gatway', 'gtway'],
  ['gln', 'glen'],
  ['glns', 'glens'],
  ['grn', 'green'],
  ['grns', 'greens'],
  ['grv', 'grov', 'grove'],
  ['grvs', 'groves'],
  ['hbr', 'harb', 'harbor', 'harbr', 'hrbor'],
  ['hbrs', 'harbors'],
  ['hvn', 'haven'],
  ['hts', 'ht'],
  ['hwy', 'highway', 'highwy', 'hiway', 'hiwy', 'hway'],
  ['hl', 'hill'],
  ['hls', 'hills'],
  ['holw', 'hllw', 'hollow', 'hollows', 'holws'],
  ['inlt'],
  ['is', 'island', 'islnd'],
  ['iss', 'islands', 'islnds'],
  ['isle', 'isles'],
  ['jct', 'jction', 'jctn', 'junction', 'junctn', 'juncton'],
  ['jcts', 'jctns', 'junctions'],
  ['ky', 'key'],
  ['kys', 'keys'],
  ['knl', 'knol', 'knoll'],
  ['knls', 'knolls'],
  ['lk', 'lake'],
  ['lks', 'lakes'],
  ['land'],
  ['lndg', 'landing', 'lndng'],
  ['ln', 'lane'],
  ['lgt', 'light'],
  ['lgts', 'lights'],
  ['lf', 'loaf'],
  ['lck', 'lock'],
  ['lcks', 'locks'],
  ['ldg', 'ldge', 'lodg', 'lodge'],
  ['loop', 'loops'],
  ['mall'],
  ['mnr', 'manor'],
  ['mnrs', 'manors'],
  ['mdw', 'meadow'],
  ['mdws', 'mdw', 'meadows', 'medows'],
  ['mews'],
  ['ml', 'mill'],
  ['mls', 'mills'],
  ['msn', 'missn', 'mssn'],
  ['mtwy', 'motorway'],
  ['mt', 'mnt', 'mount'],
  ['mtn', 'mntain', 'mntn', 'mountain', 'mountin', 'mtin'],
  ['mtns', 'mntns', 'mountains'],
  ['nck', 'neck'],
  ['orch', 'orchard', 'orchrd'],
  ['oval', 'ovl'],
  ['opas', 'overpass'],
  ['park', 'prk', 'parks'],
  ['pkwy', 'parkway', 'parkwy', 'pkway', 'pky', 'parkways', 'pkwys'],
  ['pass'],
  ['psge', 'passage'],
  ['path', 'paths'],
  ['pike', 'pikes'],
  ['pne', 'pine'],
  ['pnes', 'pines'],
  ['pl'],
  ['pln', 'plain'],
  ['plns', 'plains'],
  ['plz', 'plaza', 'plza'],
  ['pt', 'point'],
  ['pts', 'points'],
  ['prt', 'port'],
  ['prts', 'ports'],
  ['pr', 'prairie', 'prr'],
  ['radl', 'rad', 'radial', 'radiel'],
  ['ramp'],
  ['rnch', 'ranch', 'ranches', 'rnchs'],
  ['rpd', 'rapid'],
  ['rpds', 'rapids'],
  ['rst', 'rest'],
  ['rdg', 'rdge', 'ridge'],
  ['rdgs', 'ridges'],
  ['riv', 'river', 'rvr', 'rivr'],
  ['rd', 'road'],
  ['rds', 'roads'],
  ['rte', 'route'],
  ['row'],
  ['rue'],
  ['run'],
  ['shl', 'shoal'],
  ['shls', 'shoals'],
  ['shr', 'shoar', 'shore'],
  ['shrs', 'shoars', 'shores'],
  ['skwy', 'skyway'],
  ['spg', 'spng', 'spring', 'sprng'],
  ['spgs', 'spngs', 'springs', 'sprngs'],
  ['spur', 'spurs'],
  ['sq', 'sqr', 'sqre', 'squ', 'square'],
  ['sqs', 'sqrs', 'squares'],
  ['sta', 'station', 'statn', 'stn'],
  ['stra', 'strav', 'straven', 'stravenue', 'stravn', 'strvn', 'strvnue'],
  ['strm', 'stream', 'streme'],
  ['st', 'street', 'strt', 'str'],
  ['sts', 'streets'],
  ['smt', 'sumit', 'sumitt', 'summit'],
  ['ter', 'terr', 'terrace'],
  ['trwy', 'throughway'],
  ['trce', 'trace', 'traces'],
  ['trak', 'track', 'tracks', 'trk', 'trks'],
  ['trfy', 'trafficway'],
  ['trl', 'trail', 'trails', 'trls'],
  ['trlr', 'trailer', 'trlrs'],
  ['tunl', 'tunel', 'tunls', 'tunnel', 'tunnels', 'tunnl'],
  ['tpke', 'trnpk', 'turnpike', 'turnpk'],
  ['upas', 'underpass'],
  ['un', 'union'],
  ['uns', 'unions'],
  ['vly', 'valley', 'vally', 'vlly'],
  ['vlys', 'valleys'],
  ['via', 'vdct', 'viadct', 'viaduct'],
  ['vw', 'view'],
  ['vws', 'views'],
  ['vlg', 'vill', 'villag', 'village', 'villg', 'villiage'],
  ['vlgs', 'villages'],
  ['vl', 'ville'],
  ['vis', 'vist', 'vista', 'vst', 'vsta'],
  ['walk', 'walks'],
  ['wall'],
  ['way', 'wy'],
  ['ways'],
  ['wl', 'well'],
  ['wls', 'wells'],
];

// Default cleansing of street names - it takes the street name and replaces common abbreviated words such as street, avenue, lane, boulevard
const streetCleansing = (
  street: string,
  cleansingTerms: string[][] = defaultStreetCleansingTerms
) => replacementsStringCleansing(street, cleansingTerms);

// Split terms into words (in their original order) and compares each word to each other computing the damerauLevenshteinDistance algorithm.
// Weights first words over last wods, in order to favour street name comparison over unit# comparison
const damerauLevenshteinDistanceWithFirstWordsWeighting = (
  term1: string,
  term2: string,
  splChr: string = '-'
) => {
  const term1Words = term1.split(splChr);
  const term2Words = term2.split(splChr);

  const wordsCount = Math.max(term1Words.length, term2Words.length);

  return (
    range(wordsCount, 0).reduce((totalDistance, wordWeight, wordIdx) => {
      const word1 = term1.split(splChr)[wordIdx] || '';
      const word2 = term2.split(splChr)[wordIdx] || '';
      return (
        totalDistance + wordWeight * damerauLevenshteinDistance(word1, word2)
      );
    }, 0.0) / sum(range(wordsCount, 0))
  );
};

// Default function to normalize output values of damerauLevenshteinDistance. With this nornalization fn output value
//  could be higher than 1 in case the distance value is grater than the mid value of compared terms' lengths
const defaultNormalizationFunction = (
  value: number,
  term1: string,
  term2: string
) => value / ((term1.length + term2.length) / 2.0);

// Call to damerauLevenshteinDistance fn normalizing output distance value with provided normalizationFunction
const normalizedDamerauLevenshteinDistance = (
  term1: string,
  term2: string,
  normalizationFunction = defaultNormalizationFunction
) =>
  normalizationFunction(damerauLevenshteinDistance(term1, term2), term1, term2);

// Call to custom damerauLevenshteinDistanceWithFirstWordsWeighting fn normalizing output distance value with provided normalizationFunction
const normalizedDamerauLevenshteinDistanceWithFirstWordsWeighting = (
  term1: string,
  term2: string,
  normalizationFunction = defaultNormalizationFunction
) =>
  normalizationFunction(
    damerauLevenshteinDistanceWithFirstWordsWeighting(term1, term2),
    term1,
    term2
  );

export const compareAddresses = (
  address1: Address,
  address2: Address,
  {
    citiesMatchDistanceThreshold = 0.25,
    streetsMatchDistanceThreshold = 0.25,
    streetCleansingTerms = defaultStreetCleansingTerms,
    normalizationFunction = defaultNormalizationFunction,
    logUsage = false,
  }
) => {
  const [
    { street: street1, city: city1, state: state1, zipCode: zipCode1 },
    { street: street2, city: city2, state: state2, zipCode: zipCode2 },
  ] = [address1, address2];

  // ZIP Codes comparison
  const zipCodesMismatch = !!(
    zipCode1 &&
    zipCode2 &&
    stringCleansing(zipCode1) !== stringCleansing(zipCode2)
  );

  // States comparison
  const statesMismatch = !!(
    state1 &&
    state2 &&
    stateCleansing(state1) !== stateCleansing(state2)
  );

  // Cities comparison
  const citiesDistance =
    city1 && city2
      ? normalizedDamerauLevenshteinDistance(
          stringCleansing(city1),
          stringCleansing(city2),
          normalizationFunction
        )
      : 0.0;
  const citiesMismatch = citiesMatchDistanceThreshold < citiesDistance;

  // Streets comparison
  const [streetNumber1, cleanStreet1] = getStreetNumberAndCleanName(
    stringCleansing(street1)
  );
  const [streetNumber2, cleanStreet2] = getStreetNumberAndCleanName(
    stringCleansing(street2)
  );

  const streetNumberMismatch = !!(
    streetNumber1 &&
    streetNumber2 &&
    streetNumber1 !== streetNumber2
  );

  const streetsDistance =
    street1 && street2
      ? normalizedDamerauLevenshteinDistanceWithFirstWordsWeighting(
          streetCleansing(cleanStreet1, streetCleansingTerms),
          streetCleansing(cleanStreet2, streetCleansingTerms),
          normalizationFunction
        )
      : 0;
  const streetsMismatch = streetsMatchDistanceThreshold < streetsDistance;

  // Final match value
  const match =
    !zipCodesMismatch &&
    !statesMismatch &&
    !citiesMismatch &&
    !streetNumberMismatch &&
    !streetsMismatch;

  if (logUsage) {
    analytics().track('address_comparison', {
      address1: addressComponentsToString(address1),
      address2: addressComponentsToString(address2),
      zipCodesMismatch,
      statesMismatch,
      citiesDistance,
      citiesMismatch,
      citiesMatchDistanceThreshold,
      streetNumberMismatch,
      streetsDistance,
      streetsMismatch,
      streetsMatchDistanceThreshold,
      match,
    });
  }

  return match;
};
