import { max } from "lodash";
import { isSubset } from "../util/set";
import locales from "./date_magic.locales";
import { DateSpec } from "../interfaces";
import { DateTime } from "luxon";

const MIN_VALUE_COUNT_THRESHOLD = 5;
const MIN_FORMAT_THRESHOLD = 0.5;
const TWO_DIGIT_CUTOFF_YEAR = 60;
const NUM_ROWS_FOR_ESTIMATION = 250;

export const alpha = Symbol("alpha");
export const number = Symbol("number");
export const separator = Symbol("separator");

type TAlpha = typeof alpha;
type TNumber = typeof number;
type TSeparator = typeof separator;

type TokenType = TAlpha | TNumber | TSeparator;

interface Token {
  type: TokenType;
  value: string;
}

export type TokenFormat = (TAlpha | TNumber | string)[];

export type DateParseResult = { tokenFormat: TokenFormat; dateSpec: DateSpec };

const alphaRegex = /\p{L}/u;
const numberRegex = /[0-9]/;

const monthLists: string[][] = Object.values(locales).reduce((sets, locale) => {
  sets.push(locale.months.map((s) => s.toLowerCase()));
  sets.push(locale.shortMonths.map((s) => s.toLowerCase()));
  return sets;
}, [] as string[][]);

// TODO: special case for the . character.
// This should be optional in certain cases, like month abbrevations in some formats
// where you would have Apr. May Jun. (no period for May)
function charType(char: string): TokenType {
  if (char.match(alphaRegex)) return alpha;
  if (char.match(numberRegex)) return number;
  return separator;
}

export function tokenize(string: string): Token[] {
  if (string.length === 0) return [];

  const tokens: Token[] = [];

  let currentToken: Token = {
    type: charType(string[0]),
    value: "",
  };

  for (const char of string) {
    const type = charType(char);
    if (type === currentToken.type) {
      currentToken.value += char;
      continue;
    }

    tokens.push(currentToken);
    currentToken = {
      type,
      value: char,
    };
  }

  tokens.push(currentToken);
  return tokens;
}

function tokensToFormat(tokens: Token[]): TokenFormat {
  return tokens.map((token) =>
    token.type === separator ? token.value : token.type
  );
}

function tokenFormatKey(format: TokenFormat): string {
  return format
    .map((v) => (typeof v === "string" ? v : v.description))
    .join("");
}

const tokensToKey = (tokens: Token[]) => tokenFormatKey(tokensToFormat(tokens));

function getTokenFormatCounts(tokenVecs: Token[][]): Map<TokenFormat, number> {
  const keyToFormat = new Map<string, TokenFormat>();
  const keyCounts = new Map<string, number>();

  for (const tVec of tokenVecs) {
    const format = tokensToFormat(tVec);
    const key = tokenFormatKey(format);
    keyToFormat.set(key, format);
    keyCounts.set(key, (keyCounts.get(key) ?? 0) + 1);
  }

  const formatCounts = new Map<TokenFormat, number>();
  for (const [key, count] of keyCounts) {
    formatCounts.set(keyToFormat.get(key)!, count);
  }

  return formatCounts;
}

export function getTokenFormat(tokenVecs: Token[][]): TokenFormat | null {
  if (tokenVecs.length < MIN_VALUE_COUNT_THRESHOLD) return null;

  const formatCounts = getTokenFormatCounts(tokenVecs);
  const maxCount = max([...formatCounts.values()])!;

  if (maxCount / tokenVecs.length < MIN_FORMAT_THRESHOLD) return null;

  let mostCommonFormat: TokenFormat;
  for (const [format, count] of formatCounts) {
    if (count === maxCount) mostCommonFormat = format;
  }

  return mostCommonFormat!;
}

function getTokenCounts(tokenFormat: TokenFormat): Map<TokenType, number> {
  return tokenFormat.reduce((counts, val) => {
    if (val === number || val === alpha) {
      counts.set(val, (counts.get(val) ?? 0) + 1);
    } else {
      counts.set(separator, (counts.get(separator) ?? 0) + 1);
    }
    return counts;
  }, new Map<TokenType, number>());
}

// For now, we only consider formats that have exactly 3 numbers, or 2 numbers and 1 alpha
// e.g. 1/19/88 or 19-Jan-88
function isValidTokenTypes(counts: Map<TokenType, number>): boolean {
  return (
    counts.get(number) === 3 ||
    (counts.get(number) === 2 && counts.has(alpha) && counts.get(alpha)! >= 1)
  );
}

function filterTokenVecsByFormat(
  tokenVecs: Token[][],
  tokenFormat: TokenFormat
): Token[][] {
  const formatKey = tokenFormatKey(tokenFormat);
  return tokenVecs.filter((tVec) => tokensToKey(tVec) === formatKey);
}

// returns a Map with an entry for each number in the token format,
// where the value is a [min, max] tuple of all numbers in that position
export function getNumberRanges(
  tokenVecs: Token[][],
  tokenFormat: TokenFormat
): Map<number, [min: number, max: number]> {
  const numberIndexes = tokenFormat.reduce((indexes, tokenType, index) => {
    if (tokenType === number) indexes.push(index);
    return indexes;
  }, [] as number[]);

  const numberRanges = new Map<number, [min: number, max: number]>();

  for (const tVec of tokenVecs) {
    for (const index of numberIndexes) {
      const val = parseInt(tVec[index].value);
      const range = numberRanges.get(index);

      if (range === undefined) {
        numberRanges.set(index, [val, val]);
      } else {
        numberRanges.set(index, [
          Math.min(range[0], val),
          Math.max(range[1], val),
        ]);
      }
    }
  }

  return numberRanges;
}

export function getMonthCandidateSets(
  tokenVecs: Token[][],
  tokenFormat: TokenFormat
): Map<number, Set<string>> {
  const alphaIndexes = tokenFormat.reduce((indexes, tokenType, index) => {
    if (tokenType === alpha) indexes.push(index);
    return indexes;
  }, [] as number[]);

  const monthSets = new Map<number, Set<string>>();

  indexLoop: for (const index of alphaIndexes) {
    const set = new Set<string>();

    for (const tVec of tokenVecs) {
      set.add(tVec[index].value.toLowerCase());
      if (set.size > 12) continue indexLoop;
    }

    monthSets.set(index, set);
  }

  return monthSets;
}

function isValidDayRange([min, max]: [number, number]): boolean {
  return min >= 1 && max <= 31;
}

function isValidMonthRange([min, max]: [number, number]): boolean {
  return min >= 1 && max <= 12;
}

function isValidYearRange([min, max]: [number, number]): boolean {
  return max < 100 || (min >= 1000 && max < 10000);
}

function getFormatAndSpecs(tokenVecs: Token[][]): DateParseResult | null {
  const tokenFormat = getTokenFormat(tokenVecs);
  if (!tokenFormat) return null;

  const tokenCounts = getTokenCounts(tokenFormat);
  if (!isValidTokenTypes(tokenCounts)) return null;

  const filteredTokenVecs = filterTokenVecsByFormat(tokenVecs, tokenFormat);
  const numberRanges = getNumberRanges(filteredTokenVecs, tokenFormat);
  let dateSpec: DateSpec | null;

  if (tokenCounts.get(number) === 3) {
    dateSpec = get3NumberSpec(numberRanges);
  } else {
    const monthSets = getMonthCandidateSets(filteredTokenVecs, tokenFormat);
    dateSpec = get2NumberSpec(numberRanges, monthSets);
  }

  if (!dateSpec) return null;

  return { tokenFormat, dateSpec };
}

export function tryDateParse(values: string[]): DateParseResult | null {
  const tokenVecs = values.map(tokenize);
  return getFormatAndSpecs(tokenVecs);
}

export function getDate(
  value: string,
  { tokenFormat, dateSpec }: DateParseResult
): DateTime | null {
  const tokenVec = tokenize(value);
  const formatKey = tokenFormatKey(tokenFormat);

  if (tokensToKey(tokenVec) !== formatKey) return null;
  return tokenVecToDateTime(tokenVec, dateSpec);
}

export function getDates(values: string[]): (string | null)[] | null {
  const tokenVecs = values.map(tokenize);
  const parseResult = getFormatAndSpecs(tokenVecs);
  if (!parseResult) return null;
  const { tokenFormat, dateSpec } = parseResult;

  const formatKey = tokenFormatKey(tokenFormat);
  return tokenVecs.map((tVec) => {
    if (tokensToKey(tVec) !== formatKey) return null;
    const dt = tokenVecToDateTime(tVec, dateSpec);
    if (dt === null) return null;
    return dt.toISODate();
  });
}

function get3NumberSpec(
  numberRanges: Map<number, [number, number]>
): DateSpec | null {
  const validDays = [...numberRanges]
    .filter(([_index, range]) => isValidDayRange(range))
    .map(([index]) => index);

  const validMonths = [...numberRanges]
    .filter(([_index, range]) => isValidMonthRange(range))
    .map(([index]) => index);

  const validYears = [...numberRanges]
    .filter(([_index, range]) => isValidYearRange(range))
    .map(([index]) => index);

  // check if we can disambiguate month and day
  if (validMonths.length === 0 || validMonths.length > 1) return null;

  // and then see if we can disambiguate day and year
  // (valid months are always valid days)
  if (validDays.length === 0 || validDays.length > 2) return null;

  // and make sure we have a valid year
  if (validYears.length === 0) return null;

  // We only have one valid month index
  const monthIndex = validMonths[0];
  // The day index is whichever of the two valid ones is not the month index
  const dayIndex = validDays[0] === monthIndex ? validDays[1] : validDays[0];
  // The year index is whatever's left (we know there are exactly 3 numbers)
  const yearIndex = [...numberRanges.keys()].find(
    (index) => index !== monthIndex && index !== dayIndex
  );
  if (yearIndex === undefined) return null;

  const yearRange = numberRanges.get(yearIndex)!;
  const yearFormat = yearRange[1] < 100 ? "two-digit" : "four-digit";

  return {
    day: { index: dayIndex },
    month: { index: monthIndex, format: "number" },
    year: { index: yearIndex, format: yearFormat },
  };
}

function get2NumberSpec(
  numberRanges: Map<number, [number, number]>,
  monthSets: Map<number, Set<string>>
): DateSpec | null {
  if (monthSets.size === 0) return null;

  const validDays = [...numberRanges]
    .filter(([_index, range]) => isValidDayRange(range))
    .map(([index]) => index);

  const validYears = [...numberRanges]
    .filter(([_index, range]) => isValidYearRange(range))
    .map(([index]) => index);

  if (validDays.length === 0 || validDays.length > 1) return null;

  if (validYears.length === 0) return null;

  let monthIndex: number | null = null;
  let months: string[];
  for (const [index, valueSet] of monthSets) {
    const monthList = monthLists.find((monthList) =>
      isSubset(valueSet, new Set(monthList))
    );

    if (monthList) {
      monthIndex = index;
      months = monthList;
      break;
    }
  }

  if (monthIndex === null) return null;

  const dayIndex = validDays[0];
  const yearIndex = validYears[0] === dayIndex ? validYears[1] : validYears[0];
  const yearRange = numberRanges.get(yearIndex)!;
  const yearFormat = yearRange[1] < 100 ? "two-digit" : "four-digit";

  return {
    day: { index: dayIndex },
    month: { index: monthIndex, format: months! },
    year: { index: yearIndex, format: yearFormat },
  };
}

function tokenVecToDateTime(
  tokenVec: Token[],
  dateSpec: DateSpec
): DateTime | null {
  const day = parseInt(tokenVec[dateSpec.day.index].value);

  let year: number;
  if (dateSpec.year.format === "four-digit") {
    year = parseInt(tokenVec[dateSpec.year.index].value);
  } else {
    const twoDigit = parseInt(tokenVec[dateSpec.year.index].value);
    if (twoDigit <= TWO_DIGIT_CUTOFF_YEAR) {
      year = 2000 + twoDigit;
    } else {
      year = 1900 + twoDigit;
    }
  }

  let month: number;
  if (dateSpec.month.format === "number") {
    month = parseInt(tokenVec[dateSpec.month.index].value);
  } else {
    const value = tokenVec[dateSpec.month.index].value.toLowerCase();
    const index = dateSpec.month.format.indexOf(value);
    if (index === -1) return null;
    month = index + 1;
  }

  const dateTime = DateTime.fromObject({ year, month, day });
  if (!dateTime.isValid) return null;
  return dateTime;
}

// this function is used at the time that we parse the file to see if we might have a valid
// date format for a column. we don't know yet how it will be mapped, so we don't even know
// if it's a date field at this point. we do this now so we can determine whether to show
// the date formatting suggestion later.
export function estimateDateFormats(
  data: string[][],
  skipLast: boolean
): Map<number, DateSpec> {
  const tVecsByCol = new Map<number, Token[][]>();

  for (let row of data.slice(0, NUM_ROWS_FOR_ESTIMATION)) {
    if (skipLast) {
      row = row.slice(0, -2);
    }

    row.forEach((val, colIdx) => {
      if (typeof val !== "string" || val === "") return;

      let tVecs = tVecsByCol.get(colIdx);
      if (!tVecs) {
        tVecs = [];
        tVecsByCol.set(colIdx, tVecs);
      }

      tVecs.push(tokenize(val));
    });
  }

  const dateFormats = new Map<number, DateSpec>();
  for (const [colIndex, tVecs] of tVecsByCol) {
    const parseResult = getFormatAndSpecs(tVecs);
    if (parseResult) {
      dateFormats.set(colIndex, parseResult.dateSpec);
    }
  }

  return dateFormats;
}
