import { useMemo } from 'react';
import { stripTags, truncateText } from 'utils';

/**
 * Extracts the first sentence from a given text. Sentences end with `.`, `!`, or `:`.
 *
 * @param {string} text - The input text from which to extract the first sentence.
 * @returns {string} The first sentence or the entire text if no sentence-ending punctuation is found.
 */
const getFirstSentence = (text: string): string => {
  const sentenceMatch = text.match(/[^.!?]+[.!?]?/);
  return sentenceMatch ? sentenceMatch[0].trim() : text.trim();
};

/**
 * Truncates a string to a specified maximum length without breaking words and appends '...' if truncated.
 *
 * @param {string} text - The text to truncate.
 * @param {number} maxLength - The maximum allowed length of the truncated text.
 * @returns {string} The truncated text with '...' appended if truncation occurred.
 */
const truncateToMaxLength = (text: string, maxLength: number): string => {
  if (text.length <= maxLength) return text;
  const truncated = text.slice(0, maxLength);
  const lastSpace = truncated.lastIndexOf(' ');
  return lastSpace > 0
    ? `${truncated.slice(0, lastSpace)}...`
    : `${truncated}...`;
};

/**
 * Extracts a title from a paragraph with a maximum length constraint.
 *
 * @param {string} para - The paragraph text from which to extract the title.
 * @returns {string} The extracted and possibly truncated title.
 */
const extractTitleFromParagraph = (para: string): string => {
  let title = getFirstSentence(para);

  if (title.length > 100) {
    title = truncateToMaxLength(title, 100);
  }

  return title;
};

/**
 * Processes a document's text to extract a title and paragraph for display on a card.
 *
 * The processing follows these steps:
 * 1. Extracts a title from the first `<h1>`-`<h6>` tag within the first 600 characters.
 * 2. If no heading is found, extracts the first `<p>` tag as the title.
 * 3. If the extracted title exceeds 100 characters, truncates it to the first sentence or to 100 characters with ellipsis.
 * 4. Removes the title from the paragraph if it was extracted from a paragraph.
 * 5. Truncates the paragraph to a maximum of 1500 characters.
 *
 * @param {string | undefined} text - The input HTML text to process.
 * @returns {{
 *   processed: boolean;
 *   title: string | null;
 *   paragraph: string;
 *   hasContent: boolean;
 * }} An object containing the processed title, paragraph, and a flag indicating if content is present.
 */
const useDocumentDescription = (
  text?: string,
): {
  processed: boolean;
  title?: string | null;
  paragraph?: string;
  hasContent?: boolean;
} =>
  useMemo(() => {
    if (!text) {
      return {
        processed: false,
      };
    }

    let title: string | null = null;

    const headingMatch = text
      .slice(0, 600)
      .match(/<(h[1-6])[^>]*>(.*?)<\/\1>/i);
    if (headingMatch) {
      title = stripTags(headingMatch[0]);
    }

    if (!title) {
      const paragraphMatch = text.match(/<p[^>]*>(.*?)<\/p>/i);
      if (paragraphMatch) {
        const firstParagraph = stripTags(paragraphMatch[0]);
        title = extractTitleFromParagraph(firstParagraph);
      }
    }

    let paragraph = stripTags(text).replace(/\s+/g, ' ').trim();

    if (title) {
      const titleEscaped = title
        .replace(/\.\.\.$/, '')
        .replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
      const titleRegex = new RegExp(`^${titleEscaped}\\s*`, 'i');
      paragraph = paragraph.replace(titleRegex, '').trim();
    }

    paragraph = truncateText(paragraph, 1500);

    return {
      processed: true,
      title,
      paragraph,
      hasContent: !!title?.trim() || !!paragraph.trim(),
    };
  }, [text]);

export default useDocumentDescription;
