<?php
/**
 * @package   ats
 * @copyright Copyright (c)2025 Nicholas K. Dionysopoulos / Akeeba Ltd
 * @license   GNU General Public License version 3, or later
 */

namespace Akeeba\Component\ATS\Administrator\Helper;

defined('_JEXEC') || die;

/**
 * Utility class to handle email quote detection and removal.
 *
 * @since     5.4.1
 */
final class EmailQuoteRemover
{
	/**
	 * Maximum number of trailing lines to check for a signature in plain text messages.
	 *
	 * The idea is that the message may contain something that looks like a signature separator inside the main body
	 * text. We don't want to remove too much, so we only check the last this many lines.
	 *
	 * @since  5.4.1
	 */
	private const MAX_SIGNATURE_LINES = 10;

	/**
	 * Common patterns used by mail clients to quote the original message in HTML email replies.
	 *
	 * @since  5.4.1
	 */
	private const HTML_QUOTE_PATTERNS = [
		// Generic HTML quote patterns
		'/<blockquote[^>]*>(.*?)<\/blockquote>/is',
		// GMail quote marker
		'/<div class="gmail_quote"[^>]*>(.*?)<\/div>/is',
		// Apple Mail quote style
		'/<div[^>]*type="cite"[^>]*>(.*?)<\/div>/is',
		// Generic quote class
		'/<div[^>]*class="[^"]*quote[^"]*"[^>]*>(.*?)<\/div>/is',
		// Outlook quote marker
		'/<hr[^>]*id="stopSpelling"/i',
		// Outlook divider
		'/<div[^>]*style="border:none;border-top:solid #[A-F0-9]{6} 1.0pt;padding:3.0pt 0cm 0cm 0cm"/i',
		// Common Outlook/Exchange quote separator
		'/<div[^>]*style="border-top:solid #[A-F0-9]{6} 1.0pt;[^"]*">/i',
	];

	/**
	 * Common patterns used by mail clients to delimit the original email text in HTML email replies.
	 *
	 * @since  5.4.1
	 */
	private const HTML_REPLY_DELIMITERS = [
		// English
		'On .* wrote:',
		'On .* wrote at .*:',
		'On .* at .*, .* wrote:',
		'From:.*Sent:.*To:.*Subject:',
		'-{3,}Original Message-{3,}',
		'-{3,}Forwarded Message-{3,}',
		'In reply to:.*',
		'.* <.*> wrote:',
		'Quoting .* <.*>:',
		'Begin forwarded message:',
		'Reply to:.*',
		'-+\s*Reply above this line\s*-+',

		// German
		'Am .* schrieb .*:',
		'Am .* um .* schrieb .*:',
		'Am .* hat .* geschrieben:',
		'Von:.*Gesendet:.*An:.*Betreff:',
		'-{3,}Ursprüngliche Nachricht-{3,}',
		'-{3,}Weitergeleitete Nachricht-{3,}',
		'Als Antwort auf:.*',
		'Zitat von .* <.*>:',
		'Beginn der weitergeleiteten Nachricht:',

		// French
		'Le .* a écrit :',
		'Le .* à .*, .* a écrit :',
		'Le .* à .* .* a écrit :',
		'De :.*Envoyé :.*À :.*Objet :',
		'-{3,}Message d\'origine-{3,}',
		'-{3,}Message transféré-{3,}',
		'En réponse à :.*',
		'Citation de .* <.*>:',
		'Début du message transféré :',

		// Spanish
		'El .* escribió:',
		'El .* a las .* escribió:',
		'El día .* a la\(s\) .*, .* escribió:',
		'De:.*Enviado:.*Para:.*Asunto:',
		'-{3,}Mensaje original-{3,}',
		'-{3,}Mensaje reenviado-{3,}',
		'En respuesta a:.*',
		'Citando a .* <.*>:',
		'Inicio del mensaje reenviado:',

		// Portuguese
		'Em .* escreveu:',
		'Em .* às .*, .* escreveu:',
		'De:.*Enviado:.*Para:.*Assunto:',
		'-{3,}Mensagem original-{3,}',
		'-{3,}Mensagem encaminhada-{3,}',
		'Em resposta a:.*',
		'Citando .* <.*>:',
		'Início da mensagem encaminhada:',

		// Greek
		'Στις .* ο\/η .* έγραψε:',
		'Στις .* ο\/η .* έγραψε στο .*:',
		'Από:.*Αποστολή:.*Προς:.*Θέμα:',
		'-{3,}Αρχικό μήνυμα-{3,}',
		'-{3,}Προωθημένο μήνυμα-{3,}',
		'Σε απάντηση:.*',
		'Παραθέτοντας .* <.*>:',
		'Αρχή προωθημένου μηνύματος:',

		// Dutch
		'Op .* schreef .*:',
		'Op .* om .* schreef .*:',
		'Van:.*Verzonden:.*Aan:.*Onderwerp:',
		'-{3,}Oorspronkelijk bericht-{3,}',
		'-{3,}Doorgestuurd bericht-{3,}',
		'Als antwoord op:.*',
		'Citaat van .* <.*>:',
		'Begin doorgestuurd bericht:',
	];

	/**
	 * Common patterns that indicate email signatures in HTML email.
	 *
	 * @since  5.4.1
	 */
	private const HTML_SIGNATURE_PATTERNS = [
		'/<div[^>]*class="[^"]*signature[^"]*"[^>]*>.*?<\/div>/is',
		'/<div[^>]*class="[^"]*moz-signature[^"]*"[^>]*>.*?<\/div>/is',
		'/<div[^>]*style="[^"]*border-top[^"]*"[^>]*>.*?<\/div>/is',
	];

	/**
	 * Common patterns that indicate email signatures in plain text email.
	 *
	 * @since  5.4.1
	 */
	private const PLAIN_TEXT_SIGNATURE_PATTERNS = [
		// Common signature delimiters
		'/^--\s*$/m',
		'/^_{2,}$/m',
		'/^-{2,}$/m',
		'/^={2,}$/m',
		'/^[_\-=]{2,}$/m',
		// Common signature content (I couldn't find references for this in other languages).
		'/^Sent from my (?:iPhone|iPad|Android|Samsung|mobile)$/m',
	];

	/**
	 * Remove quoted content from the HTML email body
	 *
	 * @param   string  $htmlBody  The HTML email body
	 *
	 * @return  string  The HTML body with quotes removed
	 * @since   5.4.1
	 */
	public static function removeHtmlQuotes(string $htmlBody): string
	{
		// First, try to detect and remove common HTML quote patterns from the bottom of the message.
		foreach (self::HTML_QUOTE_PATTERNS as $pattern)
		{
			// Count total occurrences of this pattern
			$allMatches   = [];
			$regExcResult = preg_match_all($pattern, $htmlBody, $allMatches);

			/**
			 * If there is no match, or there are more than one matches skip over it.
			 *
			 * The idea is that if there are two or more quotes this is very likely an inline reply.
			 */
			if (!$regExcResult || count($allMatches[0]) > 1)
			{
				continue;
			}

			// Get the position of the first match
			$pos = strpos($htmlBody, $allMatches[0][0]);

			// If the quote appears after some content, it's likely at the end
			if ($pos > 0)
			{
				return trim(substr($htmlBody, 0, $pos));
			}
		}

		// If no standard patterns found, try to detect by common reply delimiters embedded in HTML
		foreach (self::HTML_REPLY_DELIMITERS as $header)
		{
			if (preg_match('/<[^>]*>(' . $header . ')<\/[^>]*>/is', $htmlBody, $matches, PREG_OFFSET_CAPTURE))
			{
				// Check how many quote patterns appear after this header
				$remainingContent = substr($htmlBody, $matches[0][1]);
				$quoteCount       = 0;

				foreach (self::HTML_QUOTE_PATTERNS as $pattern)
				{
					if (preg_match_all($pattern, $remainingContent, $subMatches))
					{
						$quoteCount += count($subMatches[1]);
					}

					if ($quoteCount > 1)
					{
						break;
					}
				}

				// Only truncate if there's exactly none or one quote pattern after the header
				if ($matches[0][1] > 0 && ($quoteCount === 1 || $quoteCount === 0))
				{
					return trim(substr($htmlBody, 0, $matches[0][1]));
				}
			}
		}

		return $htmlBody;
	}

	/**
	 * Remove signatures from HTML email body
	 *
	 * @param   string  $htmlBody  The HTML email body
	 *
	 * @return  string  The HTML body with signatures removed
	 * @since   5.4.1
	 */
	public static function removeHtmlSignatures(string $htmlBody): string
	{
		// Look for common HTML signature containers
		foreach (self::HTML_SIGNATURE_PATTERNS as $pattern)
		{
			$htmlBody = preg_replace($pattern, '', $htmlBody);
		}

		return trim($htmlBody);
	}

	/**
	 * Remove signatures from plain text email body
	 *
	 * @param   string  $plainBody  The plain text email body
	 *
	 * @return  string  The plain text body with signatures removed
	 * @since   5.4.1
	 */
	public static function removePlainTextSignatures(string $plainBody): string
	{
		$lines = explode("\n", $plainBody);

		// Make sure we only check up to self::MAX_SIGNATURE_LINES lines for a signature.
		$totalLines  = count($lines);
		$startAtLine = 0;

		if ($totalLines > self::MAX_SIGNATURE_LINES)
		{
			$startAtLine = $totalLines - self::MAX_SIGNATURE_LINES;
		}

		$curLine        = 0;
		$processedLines = [];
		$inSignature    = false;

		foreach ($lines as $line)
		{
			if ($startAtLine < $curLine++)
			{
				$processedLines[] = $line;

				continue;
			}

			$trimmedLine = trim($line);

			// Check if this line starts a signature
			foreach (self::PLAIN_TEXT_SIGNATURE_PATTERNS as $pattern)
			{
				if (preg_match($pattern, $trimmedLine))
				{
					$inSignature = true;
					break;
				}
			}

			if (!$inSignature)
			{
				$processedLines[] = $line;
			}
		}

		return trim(implode("\n", $processedLines));
	}
}