export class TextHelper {
  static removeDoubleSpaces = (text: string): string => {
    return text
      .trim()
      .replace(/\&nbsp;/g, '')
      .replace(/<p><\/p>/g, '');
  };

  static applyFormatting = (text: string): string => {
    const cleanedText = text.replace(/[\u200B-\u200D\uFEFF]/g, '');

    let cleanedContent = cleanedText.replace(
      /^((<p[^>]*>(&nbsp;|\s)*<\/p>(\s*))+)|((\s*)((<p[^>]*>(&nbsp;|\s)*<\/p>)(\s*))+)$/g,
      ''
    );
    cleanedContent = cleanedContent.replace(/\s+(?=[.,;:!?])/g, '');
    return cleanedContent;
  };

  static findDuplicateWords = (text: string, selectableIteratorList?: string[]): { duplicateWords: string[]; highlightedContent: string } => {
    const plainText = text.replace(/<[^>]+>/g, ' ');
    // Replace HTML entities and special characters with spaces
    const cleanedText = plainText.replace(/&nbsp;|[.,\/#!$%\^&\*;:{}=\-_`~()]/g, ' ');
    // Remove extra whitespace and newlines
    const normalizedText = cleanedText.replace(/\s+/g, ' ').trim();
    // Split text into words
    const words = normalizedText.split(' ');
    const frequencies: Record<string, string[]> = {};

    words.forEach(word => {
      const lowerCaseWord = word.toLowerCase();
      if (frequencies[lowerCaseWord]) {
        frequencies[lowerCaseWord].push(word);
      } else {
        frequencies[lowerCaseWord] = [word];
      }
    });

    let duplicateWords: string[];

    if (selectableIteratorList) {
      const lowercaseIteratorList = selectableIteratorList.map(item => item.toLowerCase());
      duplicateWords = Object.keys(frequencies)
        .filter(key => frequencies[key].length > 1 && lowercaseIteratorList.includes(key))
        .map(key => frequencies[key][frequencies[key].length - 1]);
    } else {
      duplicateWords = Object.keys(frequencies)
        .filter(key => frequencies[key].length > 1)
        .map(key => frequencies[key][frequencies[key].length - 1]);
    }

    // Highlight duplicate words in red in the original HTML content
    let highlightedContent = text;
    duplicateWords.forEach(duplicateWord => {
      const regex = new RegExp(`\\b${duplicateWord}\\b`, 'gi');
      highlightedContent = highlightedContent.replace(regex, `<span style="color:red;text-transform:capitalize">${duplicateWord}</span>`);
    });

    return { duplicateWords, highlightedContent };
  };


  static findAndReplaceRedWords = (text: string): string => {
    // Define a regular expression to match red-highlighted words
    const redWordRegex = /<span style="color: red;">(.*?)<\/span>/g;
    // Use the replace method to find and replace red-highlighted words
    const replacedText = text.replace(redWordRegex, '$1');

    return replacedText;
  };

  static extractSelectableWords = (text: string): string[] => {
    const regex = /<label[^>]+initial-value="([^"]+)"/g;
    const matches = text.match(regex);

    if (matches) {
      // Extract the values from the initial-value attribute
      const highlightedWords = matches.map(match => {
        const valueMatch = /initial-value="([^"]+)"/.exec(match);
        return valueMatch ? valueMatch[1] : '';
      });

      return highlightedWords;
    } else {
      return [];
    }
  };
  static extractWordsFromHTML = (htmlText: string): string[] => {
    const regex = /<[^>]*>/g;
    const cleanedText = htmlText.replace(regex, '');

    const textWithoutNbsp = cleanedText.replace(/&nbsp;|nbspnbsp/gi, ' ');

    // Remove any remaining special characters and split into words
    const wordsArray = textWithoutNbsp.replace(/[^\w\s]/gi, '').split(/\s+/);
    return wordsArray.filter(word => word.trim() !== '');
  };
}
