import punctuation from './punctuation.json';

export interface LocationData {
  sourceIndex: number[];
  targetIndex: number[];
}

export default class Tokenization {
  longPunctuation: string[] = [];

  shortPunctuation: string[] = [];

  matchTerm = '';

  constructor() {
    this.computePunctuationTerms();
  }

  private computePunctuationTerms(): void {
    Object.keys(punctuation).forEach((element: string) => {
      if (element.length > 1) {
        this.longPunctuation.push(element);
      } else {
        this.shortPunctuation.push(element);
      }
    });

    this.matchTerm = this.shortPunctuation.join('').replace(/./gm, '\\$&');
    this.longPunctuation.sort((l: string, r: string): number => {
      return r.length - l.length;
    });
  }

  private handleLongPunctuation(input: string): string {
    let res = input;
    this.longPunctuation.forEach((currentPunctuation: string) => {
      const matchTerm = currentPunctuation.replace(/./gm, '\\$&');
      const reg = new RegExp(`${matchTerm}`, 'gm');
      res = res.replace(reg, ` ${currentPunctuation} `);
    });

    return res.trim();
  }

  private handleShortPunctuation(input: string): string {
    const { matchTerm } = this;
    const reg = new RegExp(`[${matchTerm}]`, 'gm');
    const res = input.replace(reg, ' $& ');
    return res.trim();
  }

  protected isNumberWithSeparator(element: string): boolean {
    const regex = /(\d+,\d{3},\d{3}(\.\d+)*)|(\d+,\d{3})/g;
    const result = element.match(regex);
    if (result) {
      return true;
    }

    return false;
  }

  private handleSegmentIsNumberWithSeparator(input: string): string {
    const matchTerm = this.shortPunctuation.join('').replace(',', '').replace(/./gm, '\\$&');
    const reg = new RegExp(`[${matchTerm}]|(,$)`, 'gm');
    const res = input.replace(reg, ' $& ');
    return res.trim();
  }

  protected hasApostrophe(element: string): boolean {
    const regex = /\w+’\w{1,2}/g;
    const result = element.match(regex);
    if (result) {
      return true;
    }

    return false;
  }

  private handleSegmentWithApostrophe(input: string): string {
    const matchTerm = this.shortPunctuation.join('').replace('’', '').replace(/./gm, '\\$&');
    const reg = new RegExp(`[${matchTerm}]`, 'gm');
    const res = input.replace(reg, ' $& ');
    return res.trim();
  }

  protected isChapterVerseCode(element: string): boolean {
    const regex = /\d+:\d+/g;
    const result = element.match(regex);
    if (result) {
      return true;
    }

    return false;
  }

  private handleChapterVerseCode(input: string): string {
    const matchTerm = this.shortPunctuation.join('').replace(':', '').replace(/./gm, '\\$&');
    const reg = new RegExp(`[${matchTerm}]`, 'gm');
    const res = input.replace(reg, ' $& ');
    return res.trim();
  }

  public tokenizer(input: string): string[] {
    const segments: string[] = [];
    const res = this.handleLongPunctuation(input);
    const regexSplit = /\s+/;
    res.split(regexSplit).forEach((element: string) => {
      if (element in punctuation) {
        segments.push(element);
      } else if (this.isNumberWithSeparator(element)) {
        const localString = this.handleSegmentIsNumberWithSeparator(element);
        localString
          .trim()
          .split(regexSplit)
          .forEach((item: string): void => {
            segments.push(item);
          });
      } else if (this.hasApostrophe(element)) {
        const localString = this.handleSegmentWithApostrophe(element);
        localString
          .trim()
          .split(regexSplit)
          .forEach((item: string): void => {
            segments.push(item);
          });
      } else if (this.isChapterVerseCode(element)) {
        const localString = this.handleChapterVerseCode(element);
        localString
          .trim()
          .split(regexSplit)
          .forEach((item: string): void => {
            segments.push(item);
          });
      } else {
        const str = this.handleShortPunctuation(element);
        str.split(regexSplit).forEach((item: string) => {
          segments.push(item);
        });
      }
    });

    return segments;
  }

  public tokenize2string(input: string): string {
    const result = this.tokenizer(input);
    return result.join(' ');
  }

  public parseTheLocationKey(input: string): LocationData {
    const result = input.split('|');

    if (result.length !== 2 || result[0] === '' || result[1] === '') {
      throw Error('Invalid location key.');
    }

    const sourceIndex = result[0].split('+').map((element: string): number => {
      if (typeof Number(element) !== 'number' || Number(element) < 0) {
        throw Error('Invalid source index in the location key.');
      }
      return Number(element);
    });

    const targetIndex = result[1].split('+').map((element: string): number => {
      if (typeof Number(element) !== 'number' || Number(element) < 0) {
        throw Error('Invalid targe index in the location key.');
      }
      return Number(element);
    });

    return { sourceIndex, targetIndex };
  }
}
