import os
import sys
import json
import uuid
import threading
from concurrent.futures import ThreadPoolExecutor, as_completed
from flask import Flask, request, jsonify, send_file
from flask_cors import CORS
import fitz  # PyMuPDF
from PIL import Image
import io
import tempfile
import shutil
from datetime import datetime
import traceback
import re
import mysql.connector
from mysql.connector import Error
import time

app = Flask(__name__)
CORS(app)

# REMOVE ALL UPLOAD LIMITS
app.config['MAX_CONTENT_LENGTH'] = None  # No limit

class DatabaseManager:
    def __init__(self):
        self.config = {
            'host': 'localhost',
            'user': 'root',
            'password': '',
            'database': 'portfolio_management'
        }
    
    def get_connection(self):
        try:
            return mysql.connector.connect(**self.config)
        except Error as e:
            print(f"Database connection error: {e}")
            return None

class PDFProcessor:
    def __init__(self, max_workers=10):
        self.supported_formats = ['.pdf']
        self.temp_base = "temp_pdf_processing"
        self.max_workers = max_workers
        self.executor = ThreadPoolExecutor(max_workers=max_workers)
        self.db = DatabaseManager()
        
    def create_project_structure(self, project_id, project_name):
        """Create folder structure for project"""
        project_folder = os.path.join("projects", project_id)
        pdfs_folder = os.path.join(project_folder, "pdfs")
        images_folder = os.path.join(project_folder, "images")
        additional_folder = os.path.join(project_folder, "additional")
        
        for folder in [project_folder, pdfs_folder, images_folder, additional_folder]:
            os.makedirs(folder, exist_ok=True)
            
        return {
            'project_folder': project_folder,
            'pdfs_folder': pdfs_folder,
            'images_folder': images_folder,
            'additional_folder': additional_folder
        }
    
    def save_to_database(self, table, data):
        """Generic method to save data to database"""
        conn = self.db.get_connection()
        if not conn:
            return False
            
        try:
            cursor = conn.cursor()
            placeholders = ', '.join(['%s'] * len(data))
            columns = ', '.join(data.keys())
            sql = f"INSERT INTO {table} ({columns}) VALUES ({placeholders})"
            
            cursor.execute(sql, list(data.values()))
            conn.commit()
            return cursor.lastrowid
        except Error as e:
            print(f"Database error: {e}")
            return False
        finally:
            cursor.close()
            conn.close()
    
    def update_processing_status(self, project_id, status):
        """Update project processing status"""
        conn = self.db.get_connection()
        if not conn:
            return False
            
        try:
            cursor = conn.cursor()
            sql = "UPDATE projects SET status = %s WHERE id = %s"
            cursor.execute(sql, (status, project_id))
            conn.commit()
            return True
        except Error as e:
            print(f"Database error: {e}")
            return False
        finally:
            cursor.close()
            conn.close()
    
    def extract_model_info(self, text):
        """Enhanced model information extraction"""
        model_info = {
            'model_name': '',
            'model_height': '',
            'model_weight': '',
            'model_waist': '',
            'instagram_handle': '',
            'contact_info': '',
            'specialties': []
        }
        
        text_lower = text.lower()
        lines = text.split('\n')
        
        # Extract name with improved patterns
        name_patterns = [
            r'^(?:model|name)[:\s]*([A-Za-z\s]{3,50})$',
            r'([A-Z][a-z]+ [A-Z][a-z]+)',
            r'name[:\s]*([^\n,]{3,50})'
        ]
        
        for pattern in name_patterns:
            matches = re.findall(pattern, text, re.IGNORECASE | re.MULTILINE)
            if matches:
                model_info['model_name'] = matches[0].strip()
                break
        
        # Extract height with improved patterns
        height_patterns = [
            r'height[:\s]*(\d+\'?\s*\d*\"?\s*(?:cm|inches)?)',
            r'(\d+\s*cm\s*\/\s*\d+\'?\s*\d*\")',
            r'(\d+\'?\s*\d*\s*(?:cm|inches|"))'
        ]
        for pattern in height_patterns:
            matches = re.findall(pattern, text_lower, re.IGNORECASE)
            if matches:
                model_info['model_height'] = matches[0].strip()
                break
        
        # Extract weight
        weight_patterns = [
            r'weight[:\s]*(\d+\s*(?:kg|lbs|pounds))',
            r'(\d+\s*(?:kg|lbs))\s*weight'
        ]
        for pattern in weight_patterns:
            matches = re.findall(pattern, text_lower, re.IGNORECASE)
            if matches:
                model_info['model_weight'] = matches[0].strip()
                break
        
        # Extract waist
        waist_patterns = [
            r'waist[:\s]*(\d+\s*(?:cm|inches))',
            r'(\d+\s*(?:cm))\s*waist'
        ]
        for pattern in waist_patterns:
            matches = re.findall(pattern, text_lower, re.IGNORECASE)
            if matches:
                model_info['model_waist'] = matches[0].strip()
                break
        
        # Extract Instagram with improved patterns
        insta_patterns = [
            r'instagram[:\s]*@?([a-zA-Z0-9._]{1,30})',
            r'@([a-zA-Z0-9._]{1,30})',
            r'insta[:\s]*@?([a-zA-Z0-9._]{1,30})'
        ]
        for pattern in insta_patterns:
            matches = re.findall(pattern, text_lower, re.IGNORECASE)
            if matches:
                model_info['instagram_handle'] = f"@{matches[0]}"
                break
        
        # Extract contact information
        email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
        phone_pattern = r'(\+?\d{1,3}[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}'
        
        emails = re.findall(email_pattern, text)
        if emails:
            model_info['contact_info'] = emails[0]
        
        phones = re.findall(phone_pattern, text)
        if phones and not model_info['contact_info']:
            model_info['contact_info'] = phones[0]
        
        # Extract specialties
        specialties_keywords = [
            'runway', 'fashion', 'commercial', 'editorial', 'print', 'catalog',
            'lingerie', 'swimwear', 'beauty', 'hair', 'makeup', 'fitness',
            'glamour', 'portrait', 'acting', 'dancing', 'singing', 'model'
        ]
        
        found_specialties = []
        for specialty in specialties_keywords:
            if specialty in text_lower:
                found_specialties.append(specialty.title())
        
        model_info['specialties'] = json.dumps(found_specialties)
        
        return model_info
    
    def extract_content(self, pdf_path, project_folders, file_index=0, total_files=1, project_id=None):
        """Extract text, images, and model information from PDF"""
        pdf_document = None
        try:
            print(f"🔄 Processing PDF {file_index + 1}/{total_files}: {os.path.basename(pdf_path)}")
            
            pdf_document = fitz.open(pdf_path)
            extraction_result = {
                'text_content': [],
                'images': [],
                'metadata': {},
                'model_info': {},
                'file_name': os.path.basename(pdf_path),
                'file_index': file_index,
                'total_files': total_files
            }
            
            # Extract metadata
            metadata = pdf_document.metadata
            extraction_result['metadata'] = {
                'title': metadata.get('title', ''),
                'author': metadata.get('author', ''),
                'subject': metadata.get('subject', ''),
                'pages': len(pdf_document),
                'creation_date': str(metadata.get('creationDate', '')),
                'file_size': os.path.getsize(pdf_path),
                'file_name': os.path.basename(pdf_path)
            }
            
            all_text = ""
            
            # Save PDF to project folder
            pdf_filename = f"{uuid.uuid4().hex}_{os.path.basename(pdf_path)}"
            pdf_save_path = os.path.join(project_folders['pdfs_folder'], pdf_filename)
            shutil.copy2(pdf_path, pdf_save_path)
            
            # Save PDF record to database
            pdf_db_id = self.save_to_database('project_pdfs', {
                'project_id': project_id,
                'original_name': os.path.basename(pdf_path),
                'file_path': pdf_save_path,
                'file_size': os.path.getsize(pdf_path),
                'page_count': len(pdf_document),
                'processing_status': 'processing'
            })
            
            # Process each page
            for page_num in range(len(pdf_document)):
                page = pdf_document[page_num]
                
                # Extract text
                text = page.get_text()
                page_text_data = {
                    'page': page_num + 1,
                    'text': text.strip(),
                    'has_text': bool(text.strip())
                }
                
                extraction_result['text_content'].append(page_text_data)
                all_text += text + "\n"
                
                # Extract images with improved quality
                image_list = page.get_images(full=True)
                
                for img_index, img in enumerate(image_list):
                    try:
                        # Extract image with higher resolution
                        xref = img[0]
                        base_image = pdf_document.extract_image(xref)
                        image_bytes = base_image["image"]
                        image_ext = base_image["ext"]
                        
                        # Convert to PIL Image
                        pil_image = Image.open(io.BytesIO(image_bytes))
                        
                        # Convert to WebP with optimal quality
                        image_filename = f"{uuid.uuid4().hex}.webp"
                        image_save_path = os.path.join(project_folders['images_folder'], image_filename)
                        
                        # Save as WebP with optimization
                        pil_image.save(image_save_path, 'WEBP', quality=85, optimize=True, method=6)
                        
                        # Save image record to database
                        self.save_to_database('extracted_images', {
                            'pdf_id': pdf_db_id,
                            'project_id': project_id,
                            'image_name': image_filename,
                            'image_path': image_save_path,
                            'image_size': os.path.getsize(image_save_path),
                            'width': pil_image.width,
                            'height': pil_image.height,
                            'page_number': page_num + 1
                        })
                        
                        image_data = {
                            'page': page_num + 1,
                            'image_index': img_index + 1,
                            'filename': image_filename,
                            'filepath': image_save_path,
                            'original_format': image_ext,
                            'width': pil_image.width,
                            'height': pil_image.height,
                            'size_bytes': os.path.getsize(image_save_path),
                            'size_kb': round(os.path.getsize(image_save_path) / 1024, 2)
                        }
                        
                        extraction_result['images'].append(image_data)
                        
                    except Exception as img_error:
                        print(f"Error processing image on page {page_num+1}: {img_error}")
                        continue
            
            # Extract model information from combined text
            model_info = self.extract_model_info(all_text)
            extraction_result['model_info'] = model_info
            
            # Save extracted data to database
            self.save_to_database('extracted_data', {
                'pdf_id': pdf_db_id,
                'project_id': project_id,
                'model_name': model_info['model_name'],
                'model_height': model_info['model_height'],
                'model_weight': model_info['model_weight'],
                'model_waist': model_info['model_waist'],
                'instagram_handle': model_info['instagram_handle'],
                'contact_info': model_info['contact_info'],
                'specialties': model_info['specialties'],
                'extracted_text': all_text
            })
            
            # Update PDF status
            self.save_to_database('project_pdfs', {
                'id': pdf_db_id,
                'processing_status': 'completed'
            }, update=True)
            
            print(f"✅ Completed PDF {file_index + 1}/{total_files}")
            return extraction_result
            
        except Exception as e:
            error_msg = f"PDF processing error for {os.path.basename(pdf_path)}: {str(e)}"
            print(error_msg)
            raise Exception(error_msg)
        finally:
            if pdf_document:
                pdf_document.close()
    
    def process_single_pdf(self, file_data, project_folders, file_index, total_files, project_id):
        """Process a single PDF file"""
        try:
            # Save uploaded file temporarily
            temp_pdf_path = os.path.join(project_folders['pdfs_folder'], f"temp_{file_data.filename}")
            file_data.save(temp_pdf_path)
            
            # Process PDF
            result = self.extract_content(temp_pdf_path, project_folders, file_index, total_files, project_id)
            
            return result
        except Exception as e:
            return {'error': str(e), 'file_name': file_data.filename}
    
    def process_project_pdfs(self, files, project_id, project_name):
        """Process multiple PDFs for a project"""
        total_files = len(files)
        results = []
        errors = []
        
        # Create project folder structure
        project_folders = self.create_project_structure(project_id, project_name)
        
        # Create project in database
        self.save_to_database('projects', {
            'id': project_id,
            'name': project_name,
            'status': 'processing'
        })
        
        print(f"🚀 Starting parallel processing of {total_files} PDFs for project {project_name}")
        
        # Submit all tasks to thread pool
        future_to_index = {}
        for i, file_data in enumerate(files):
            future = self.executor.submit(
                self.process_single_pdf, 
                file_data, 
                project_folders, 
                i, 
                total_files,
                project_id
            )
            future_to_index[future] = i
        
        # Collect results as they complete
        completed = 0
        for future in as_completed(future_to_index):
            i = future_to_index[future]
            try:
                result = future.result()
                if 'error' in result:
                    errors.append({
                        'file_index': i,
                        'file_name': files[i].filename,
                        'error': result['error']
                    })
                else:
                    results.append(result)
                completed += 1
                print(f"📊 Progress: {completed}/{total_files} PDFs processed")
            except Exception as e:
                errors.append({
                    'file_index': i,
                    'file_name': files[i].filename,
                    'error': str(e)
                })
                completed += 1
        
        # Update project status
        if errors:
            status = 'completed' if results else 'failed'
        else:
            status = 'completed'
        
        self.update_processing_status(project_id, status)
        
        # Prepare final response
        batch_result = {
            'success': len(results),
            'failed': len(errors),
            'total': total_files,
            'results': results,
            'errors': errors,
            'project_id': project_id,
            'project_name': project_name,
            'project_folders': project_folders,
            'processing_time': datetime.now().isoformat()
        }
        
        return batch_result

# Initialize processor with configurable workers
MAX_WORKERS = 10
processor = PDFProcessor(max_workers=MAX_WORKERS)

@app.route('/health', methods=['GET'])
def health_check():
    return jsonify({
        'status': 'healthy', 
        'service': 'PDF Processor',
        'max_workers': MAX_WORKERS,
        'upload_limit': 'NO LIMIT'
    })

@app.route('/create-project', methods=['POST'])
def create_project():
    try:
        data = request.get_json()
        project_name = data.get('project_name')
        project_description = data.get('project_description', '')
        
        if not project_name:
            return jsonify({'error': 'Project name is required'}), 400
        
        project_id = str(uuid.uuid4())
        
        # Create project in database
        db = DatabaseManager()
        db.save_to_database('projects', {
            'id': project_id,
            'name': project_name,
            'description': project_description,
            'status': 'created'
        })
        
        # Create folder structure
        processor.create_project_structure(project_id, project_name)
        
        return jsonify({
            'success': True,
            'project_id': project_id,
            'project_name': project_name,
            'message': 'Project created successfully'
        })
        
    except Exception as e:
        return jsonify({'error': str(e)}), 500

@app.route('/process-project-pdfs/<project_id>', methods=['POST'])
def process_project_pdfs(project_id):
    try:
        print(f"📨 Received PDF upload request for project {project_id}")
        
        # Get project info from database
        db = DatabaseManager()
        conn = db.get_connection()
        cursor = conn.cursor(dictionary=True)
        cursor.execute("SELECT * FROM projects WHERE id = %s", (project_id,))
        project = cursor.fetchone()
        cursor.close()
        conn.close()
        
        if not project:
            return jsonify({'error': 'Project not found'}), 404
        
        # Get files
        if 'pdf_files' not in request.files:
            return jsonify({'error': 'No files provided'}), 400
        
        files = request.files.getlist('pdf_files')
        if not files or all(file.filename == '' for file in files):
            return jsonify({'error': 'No files selected'}), 400
        
        # Filter only PDF files
        pdf_files = [f for f in files if f.filename.lower().endswith('.pdf')]
        
        if not pdf_files:
            return jsonify({'error': 'No PDF files found'}), 400
        
        print(f"🚀 Processing {len(pdf_files)} PDFs for project {project['name']}")
        
        # Process all PDFs
        batch_result = processor.process_project_pdfs(
            pdf_files, 
            project_id, 
            project['name']
        )
        
        return jsonify({
            'success': True,
            'batch_result': batch_result
        })
        
    except Exception as e:
        print(f"💥 Project PDF processing error: {str(e)}")
        return jsonify({'error': str(e)}), 500

@app.route('/project-status/<project_id>', methods=['GET'])
def get_project_status(project_id):
    try:
        db = DatabaseManager()
        conn = db.get_connection()
        cursor = conn.cursor(dictionary=True)
        
        # Get project info
        cursor.execute("SELECT * FROM projects WHERE id = %s", (project_id,))
        project = cursor.fetchone()
        
        if not project:
            return jsonify({'error': 'Project not found'}), 404
        
        # Get PDF counts
        cursor.execute("""
            SELECT 
                COUNT(*) as total_pdfs,
                SUM(CASE WHEN processing_status = 'completed' THEN 1 ELSE 0 END) as completed_pdfs,
                SUM(CASE WHEN processing_status = 'processing' THEN 1 ELSE 0 END) as processing_pdfs,
                SUM(CASE WHEN processing_status = 'failed' THEN 1 ELSE 0 END) as failed_pdfs
            FROM project_pdfs 
            WHERE project_id = %s
        """, (project_id,))
        pdf_stats = cursor.fetchone()
        
        # Get total images
        cursor.execute("SELECT COUNT(*) as total_images FROM extracted_images WHERE project_id = %s", (project_id,))
        image_stats = cursor.fetchone()
        
        cursor.close()
        conn.close()
        
        return jsonify({
            'success': True,
            'project': project,
            'stats': {
                'pdfs': pdf_stats,
                'images': image_stats
            }
        })
        
    except Exception as e:
        return jsonify({'error': str(e)}), 500

@app.route('/project-data/<project_id>', methods=['GET'])
def get_project_data(project_id):
    try:
        db = DatabaseManager()
        conn = db.get_connection()
        cursor = conn.cursor(dictionary=True)
        
        # Get project PDFs with extracted data
        cursor.execute("""
            SELECT 
                pp.*,
                ed.model_name,
                ed.model_height,
                ed.model_weight,
                ed.model_waist,
                ed.instagram_handle,
                ed.contact_info,
                ed.specialties,
                ed.extracted_text,
                (SELECT COUNT(*) FROM extracted_images ei WHERE ei.pdf_id = pp.id) as image_count
            FROM project_pdfs pp
            LEFT JOIN extracted_data ed ON pp.id = ed.pdf_id
            WHERE pp.project_id = %s
        """, (project_id,))
        pdfs = cursor.fetchall()
        
        # Get images for each PDF
        for pdf in pdfs:
            cursor.execute("SELECT * FROM extracted_images WHERE pdf_id = %s", (pdf['id'],))
            pdf['images'] = cursor.fetchall()
        
        # Get additional photos
        cursor.execute("SELECT * FROM additional_photos WHERE project_id = %s", (project_id,))
        additional_photos = cursor.fetchall()
        
        cursor.close()
        conn.close()
        
        return jsonify({
            'success': True,
            'pdfs': pdfs,
            'additional_photos': additional_photos
        })
        
    except Exception as e:
        return jsonify({'error': str(e)}), 500

@app.route('/update-model-info/<pdf_id>', methods=['PUT'])
def update_model_info(pdf_id):
    try:
        data = request.get_json()
        
        db = DatabaseManager()
        conn = db.get_connection()
        cursor = conn.cursor()
        
        cursor.execute("""
            UPDATE extracted_data 
            SET model_name = %s, model_height = %s, model_weight = %s, 
                model_waist = %s, instagram_handle = %s, contact_info = %s,
                specialties = %s
            WHERE pdf_id = %s
        """, (
            data.get('model_name'),
            data.get('model_height'),
            data.get('model_weight'),
            data.get('model_waist'),
            data.get('instagram_handle'),
            data.get('contact_info'),
            json.dumps(data.get('specialties', [])),
            pdf_id
        ))
        
        conn.commit()
        cursor.close()
        conn.close()
        
        return jsonify({'success': True, 'message': 'Model information updated successfully'})
        
    except Exception as e:
        return jsonify({'error': str(e)}), 500

@app.route('/upload-additional-photo/<project_id>', methods=['POST'])
def upload_additional_photo(project_id):
    try:
        if 'photo' not in request.files:
            return jsonify({'error': 'No photo provided'}), 400
        
        photo = request.files['photo']
        if photo.filename == '':
            return jsonify({'error': 'No photo selected'}), 400
        
        # Create project folders if they don't exist
        project_folders = processor.create_project_structure(project_id, "")
        
        # Save photo
        photo_filename = f"additional_{uuid.uuid4().hex}.webp"
        photo_path = os.path.join(project_folders['additional_folder'], photo_filename)
        
        # Convert and save as WebP
        pil_image = Image.open(photo)
        pil_image.save(photo_path, 'WEBP', quality=85, optimize=True)
        
        # Save to database
        db = DatabaseManager()
        db.save_to_database('additional_photos', {
            'project_id': project_id,
            'image_name': photo_filename,
            'image_path': photo_path,
            'image_size': os.path.getsize(photo_path)
        })
        
        return jsonify({
            'success': True,
            'message': 'Additional photo uploaded successfully',
            'photo_path': photo_path
        })
        
    except Exception as e:
        return jsonify({'error': str(e)}), 500

if __name__ == '__main__':
    os.makedirs("projects", exist_ok=True)
    os.makedirs(processor.temp_base, exist_ok=True)
    
    print("=== PROJECT-BASED PDF PROCESSOR ===")
    print("📁 Upload Limit: NO LIMIT")
    print("👥 Max Workers: 10")
    print("📄 Max Files: UNLIMITED")
    print("🌐 Service URL: http://127.0.0.1:5000")
    print("🔧 Ready to process PDFs...")
    
    # Run the service
    app.run(host='127.0.0.1', port=5000, debug=False, threaded=True)