<?php

namespace App\Services;

use Illuminate\Support\Facades\Storage;
use PhpOffice\PhpWord\IOFactory as WordIOFactory;
use PhpOffice\PhpWord\PhpWord;
use Smalot\PdfParser\Parser as PdfParser;

class AssessorToolParserService
{
    /**
     * Parse assessor tool document and extract structure
     * 
     * @param string $filePath Path to the uploaded file
     * @param string $mimeType MIME type of the file
     * @return array Structured data with sections, rows, and marks
     */
    public function parseDocument($filePath, $mimeType)
    {
        if (str_contains($mimeType, 'word') || str_contains($mimeType, 'msword') || str_ends_with($filePath, '.docx')) {
            return $this->parseWordDocument($filePath);
        } elseif (str_contains($mimeType, 'pdf') || str_ends_with($filePath, '.pdf')) {
            return $this->parsePdfDocument($filePath);
        }
        
        throw new \Exception('Unsupported file type. Please upload a Word (.docx) or PDF file.');
    }

    /**
     * Parse Word document
     */
    protected function parseWordDocument($filePath)
    {
        // Check if PhpWord is available
        if (!class_exists('\PhpOffice\PhpWord\IOFactory')) {
            throw new \Exception('PhpWord library is not installed. Please use manual entry or install: composer require phpoffice/phpword');
        }
        
        try {
            // Handle both relative and absolute paths
            if (file_exists($filePath)) {
                // Already an absolute path
                $fullPath = $filePath;
            } else {
                // Try as relative path from public disk
                $fullPath = Storage::disk('public')->path($filePath);
                if (!file_exists($fullPath)) {
                    // Try alternative path construction
                    $fullPath = storage_path('app/public/' . ltrim($filePath, '/'));
                    if (!file_exists($fullPath)) {
                        throw new \Exception("File not found at: {$filePath}. Tried: " . Storage::disk('public')->path($filePath) . " and " . $fullPath);
                    }
                }
            }
            
            $phpWord = WordIOFactory::load($fullPath);
            $sections = [];
            $currentSection = null;
            $rowOrder = 0;
            
            // Extract header information
            $headerData = $this->extractHeaderData($phpWord);
            
            foreach ($phpWord->getSections() as $section) {
                foreach ($section->getElements() as $element) {
                    if (method_exists($element, 'getText')) {
                        $text = trim($element->getText());
                        if ($this->isSectionHeader($text)) {
                            $currentSection = $this->cleanSectionName($text);
                            continue;
                        }
                    }

                    if (method_exists($element, 'getRows')) {
                        // This is a table. We want to treat each QUESTION row as a single
                        // item description, even when the visible text in Word wraps.
                        foreach ($element->getRows() as $rowIndex => $row) {
                            $cells = $row->getCells();
                            $cellCount = count($cells);

                            if ($cellCount < 2) {
                                continue;
                            }

                            // Detect typical assessor tool layout:
                            // [0] = serial number, [1] = description, [2] = marks, [3] = comments
                            // If we only have two columns, treat them as [description, marks].
                            $descriptionIndex = 0;
                            $marksIndex = 1;

                            if ($cellCount >= 3) {
                                $descriptionIndex = 1;
                                $marksIndex = 2;
                            }

                            $itemText = $this->getCellText($cells[$descriptionIndex] ?? null);
                            $marksAvailable = $this->extractMarks($cells[$marksIndex] ?? null);

                            // Check if this is a section header (e.g., "TASK 1:")
                            if ($this->isSectionHeader($itemText)) {
                                $currentSection = $this->cleanSectionName($itemText);
                                continue;
                            }

                            // Skip header rows (column titles)
                            if ($this->isHeaderRow($itemText)) {
                                continue;
                            }

                            // Extract item description
                            $itemDescription = $this->cleanItemDescription($itemText);

                            // Only create an item when we have some text and positive marks.
                            if (!empty($itemDescription) && $marksAvailable > 0) {
                                $rowOrder++;
                                $rowIdentifier = $currentSection 
                                    ? strtolower(str_replace([' ', ':'], ['_', ''], $currentSection)) . '_item_' . $rowOrder
                                    : 'item_' . $rowOrder;

                                $sections[] = [
                                    'section' => $currentSection,
                                    'row_identifier' => $rowIdentifier,
                                    'item_description' => $itemDescription,
                                    'marks_available' => $marksAvailable,
                                    'display_order' => $rowOrder,
                                    'is_subtotal' => false,
                                    'is_total' => false,
                                ];
                            }
                        }
                    }
                }
            }
            
            // Calculate total marks
            $totalMarks = array_sum(array_column($sections, 'marks_available'));
            
            return [
                'header_data' => $headerData,
                'sections' => $sections,
                'total_marks' => $totalMarks,
            ];
        } catch (\Exception $e) {
            throw new \Exception('Failed to parse Word document: ' . $e->getMessage());
        }
    }

    /**
     * Parse PDF document
     */
    protected function parsePdfDocument($filePath)
    {
        // Check if PDF parser is available
        if (!class_exists('\Smalot\PdfParser\Parser')) {
            throw new \Exception('PDF Parser library is not installed. Please use manual entry or install: composer require smalot/pdfparser');
        }
        
        try {
            // Handle both relative and absolute paths
            if (file_exists($filePath)) {
                // Already an absolute path
                $fullPath = $filePath;
            } else {
                // Try as relative path from public disk
                $fullPath = Storage::disk('public')->path($filePath);
                if (!file_exists($fullPath)) {
                    // Try alternative path construction
                    $fullPath = storage_path('app/public/' . ltrim($filePath, '/'));
                    if (!file_exists($fullPath)) {
                        throw new \Exception("File not found at: {$filePath}. Tried: " . Storage::disk('public')->path($filePath) . " and " . $fullPath);
                    }
                }
            }
            
            $parser = new PdfParser();
            $pdf = $parser->parseFile($fullPath);
            $text = $pdf->getText();
            
            // Split into logical "lines" and then rebuild multi-line items.
            $rawLines = explode("\n", $text);
            $sections = [];
            $currentSection = null;
            $rowOrder = 0;
            $buffer = []; // holds wrapped description lines until we see marks
            
            foreach ($rawLines as $line) {
                $line = trim(preg_replace('/\s+/', ' ', $line));
                
                if ($line === '') {
                    continue;
                }
                
                // Section headers (e.g. "TASK 1: PRACTICAL") reset the buffer
                if ($this->isSectionHeader($line)) {
                    $currentSection = $this->cleanSectionName($line);
                    $buffer = [];
                    continue;
                }
                
                // Skip obvious table header rows
                if ($this->isHeaderRow($line)) {
                    $buffer = [];
                    continue;
                }
                
                // Does this line contain marks? If yes, treat buffered lines + this one
                // as a single item description.
                if (preg_match('/(\d+(?:\.\d+)?)\s*(?:mark|marks)?/i', $line, $matches)) {
                    $marksAvailable = (float) $matches[1];
                    
                    // Description is everything we've buffered plus this line,
                    // with trailing marks / parentheses stripped out.
                    $combined = trim(implode(' ', $buffer) . ' ' . $line);
                    $combined = preg_replace('/\s*\([^)]*\)\s*$/', '', $combined);
                    $itemDescription = $this->cleanItemDescription($combined);
                    
                    if (!empty($itemDescription) && $marksAvailable > 0 && strlen($itemDescription) > 5) {
                        $rowOrder++;
                        $rowIdentifier = $currentSection 
                            ? strtolower(str_replace([' ', ':'], ['_', ''], $currentSection)) . '_item_' . $rowOrder
                            : 'item_' . $rowOrder;
                        
                        $sections[] = [
                            'section' => $currentSection,
                            'row_identifier' => $rowIdentifier,
                            'item_description' => $itemDescription,
                            'marks_available' => $marksAvailable,
                            'display_order' => $rowOrder,
                            'is_subtotal' => false,
                            'is_total' => false,
                        ];
                    }
                    
                    // Reset buffer ready for the next item
                    $buffer = [];
                    continue;
                }
                
                // No marks on this line – treat as a continuation line for the
                // current item description and keep buffering.
                $buffer[] = $line;
            }
            
            $totalMarks = array_sum(array_column($sections, 'marks_available'));
            
            return [
                'header_data' => [],
                'sections' => $sections,
                'total_marks' => $totalMarks,
            ];
        } catch (\Exception $e) {
            throw new \Exception('Failed to parse PDF document: ' . $e->getMessage());
        }
    }

    /**
     * Extract header data from document
     */
    protected function extractHeaderData($phpWord)
    {
        $headerData = [
            'candidate_name' => '',
            'candidate_registration' => '',
            'assessor_name' => '',
            'assessor_registration' => '',
            'venue' => '',
            'date' => '',
        ];
        
        // Try to extract header information from first few paragraphs
        // This is a basic implementation - you may need to enhance based on your document structure
        foreach ($phpWord->getSections() as $section) {
            $elements = $section->getElements();
            foreach (array_slice($elements, 0, 10) as $element) {
                if (method_exists($element, 'getText')) {
                    $text = $element->getText();
                    // Look for patterns like "Candidate's Name & Registration Code"
                    // This would need to be customized based on your actual document format
                }
            }
        }
        
        return $headerData;
    }

    /**
     * Get full text from a Word table cell, preserving wrapped lines.
     */
    protected function getCellText($cell)
    {
        if (!$cell) {
            return '';
        }

        $pieces = [];

        foreach ($cell->getElements() as $element) {
            if ($element instanceof \PhpOffice\PhpWord\Element\TextRun) {
                foreach ($element->getElements() as $textElement) {
                    if (method_exists($textElement, 'getText')) {
                        $pieces[] = $textElement->getText();
                    }
                }
                continue;
            }

            if (method_exists($element, 'getText')) {
                $pieces[] = $element->getText();
            }
        }

        return trim(preg_replace('/\s+/', ' ', implode(' ', $pieces)));
    }

    /**
     * Extract marks from text
     */
    protected function extractMarks($cell)
    {
        if (!$cell) {
            return 0;
        }
        
        $text = $this->getCellText($cell);
        
        // Look for numbers in the text
        if (preg_match('/(\d+(?:\.\d+)?)/', $text, $matches)) {
            return (float) $matches[1];
        }
        
        return 0;
    }

    /**
     * Check if text is a section header
     */
    protected function isSectionHeader($text)
    {
        return preg_match('/^TASK\s+\d+\s*:\s*(PRACTICAL|ORAL)\b/i', trim($text));
    }

    /**
     * Check if text is a header row
     */
    protected function isHeaderRow($text)
    {
        $headerKeywords = ['items to be evaluated', 'marks available', 'marks obtained', 'comments'];
        $textLower = strtolower($text);
        
        foreach ($headerKeywords as $keyword) {
            if (str_contains($textLower, $keyword)) {
                return true;
            }
        }
        
        return false;
    }

    /**
     * Clean section name
     */
    protected function cleanSectionName($text)
    {
        return trim($text);
    }

    /**
     * Clean item description
     */
    protected function cleanItemDescription($text)
    {
        // Remove parenthetical instructions like "(Award 2 mark or zero)"
        $text = preg_replace('/\s*\([^)]*\)\s*/', ' ', $text);
        // Remove extra whitespace
        $text = preg_replace('/\s+/', ' ', $text);
        return trim($text);
    }
}

