import os
import logging
import datetime
import mimetypes
import hashlib
import psycopg2
import psycopg2.extras
from typing import List, Dict, Tuple

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

class DocumentScanner:
    """
    Scans a directory and catalogs documents, saving the metadata to PostgreSQL.
    """

    def __init__(self, root_dir: str, db_host: str, db_port: int, db_name: str, db_user: str, db_pass: str):
        """
        Initializes the DocumentScanner.

        Args:
            root_dir (str): The root directory to scan.
            db_host (str): The PostgreSQL host.
            db_port (int): The PostgreSQL port.
            db_name (str): The PostgreSQL database name.
            db_user (str): The PostgreSQL user.
            db_pass (str): The PostgreSQL password.
        """
        self.root_dir = root_dir
        self.db_host = db_host
        self.db_port = db_port
        self.db_name = db_name
        self.db_user = db_user
        self.db_pass = db_pass
        self.conn = None  # Initialize connection attribute

    def connect_to_db(self) -> psycopg2.extensions.connection:
        """
        Connects to the PostgreSQL database.

        Returns:
            psycopg2.extensions.connection: The database connection object.
        """
        try:
            self.conn = psycopg2.connect(
                host=self.db_host,
                port=self.db_port,
                database=self.db_name,
                user=self.db_user,
                password=self.db_pass
            )
            return self.conn
        except psycopg2.Error as e:
            logging.error(f"Error connecting to PostgreSQL: {e}")
            raise

    def disconnect_from_db(self) -> None:
        """
        Disconnects from the PostgreSQL database.
        """
        if self.conn:
            try:
                self.conn.close()
                logging.info("Disconnected from PostgreSQL.")
            except psycopg2.Error as e:
                logging.error(f"Error disconnecting from PostgreSQL: {e}")

    def create_table(self) -> None:
        """
        Creates the 'documents' table in the PostgreSQL database if it doesn't exist.
        """
        try:
            cursor = self.conn.cursor()
            cursor.execute("""
                CREATE TABLE IF NOT EXISTS documents (
                    file_path VARCHAR(2048) PRIMARY KEY,
                    size BIGINT,
                    modified_date TIMESTAMP,
                    content_type VARCHAR(255),
                    file_hash VARCHAR(64) UNIQUE
                )
            """)
            self.conn.commit()
            logging.info("Table 'documents' created (if it didn't exist).")
        except psycopg2.Error as e:
            logging.error(f"Error creating table: {e}")
            self.conn.rollback()
            raise

    def scan_directory(self) -> List[Dict]:
        """
        Scans the root directory for documents with specific extensions.

        Returns:
            List[Dict]: A list of dictionaries, each containing metadata about a document.
        """
        documents = []
        extensions = ['.md', '.txt', '.json', '.py', '.docx', '.pdf']

        for root, _, files in os.walk(self.root_dir):
            for file in files:
                if any(file.endswith(ext) for ext in extensions):
                    file_path = os.path.join(root, file)
                    try:
                        size = os.path.getsize(file_path)
                        modified_date = datetime.datetime.fromtimestamp(os.path.getmtime(file_path))
                        content_type, _ = mimetypes.guess_type(file_path)
                        file_hash = self.calculate_hash(file_path)

                        documents.append({
                            'file_path': file_path,
                            'size': size,
                            'modified_date': modified_date,
                            'content_type': content_type,
                            'file_hash': file_hash
                        })
                        logging.info(f"Scanned: {file_path}")

                    except OSError as e:
                        logging.error(f"Error processing {file_path}: {e}")
                    except Exception as e:
                        logging.error(f"Unexpected error processing {file_path}: {e}")

        return documents

    def calculate_hash(self, file_path: str) -> str:
        """
        Calculates the SHA-256 hash of a file.

        Args:
            file_path (str): The path to the file.

        Returns:
            str: The SHA-256 hash of the file.
        """
        hasher = hashlib.sha256()
        try:
            with open(file_path, 'rb') as file:
                while True:
                    chunk = file.read(4096)  # Read in 4KB chunks
                    if not chunk:
                        break
                    hasher.update(chunk)
        except OSError as e:
            logging.error(f"Error reading file {file_path} for hashing: {e}")
            raise
        return hasher.hexdigest()

    def save_to_db(self, documents: List[Dict]) -> None:
        """
        Saves the document metadata to the PostgreSQL database.

        Args:
            documents (List[Dict]): A list of dictionaries, each containing metadata about a document.
        """
        try:
            cursor = self.conn.cursor()
            psycopg2.extras.execute_batch(cursor, """
                INSERT INTO documents (file_path, size, modified_date, content_type, file_hash)
                VALUES (%(file_path)s, %(size)s, %(modified_date)s, %(content_type)s, %(file_hash)s)
                ON CONFLICT (file_path) DO UPDATE SET
                    size = EXCLUDED.size,
                    modified_date = EXCLUDED.modified_date,
                    content_type = EXCLUDED.content_type,
                    file_hash = EXCLUDED.file_hash;
            """, documents)
            self.conn.commit()
            logging.info(f"Saved {len(documents)} documents to the database.")
        except psycopg2.Error as e:
            logging.error(f"Error saving documents to the database: {e}")
            self.conn.rollback()
            raise

    def run_scan(self) -> None:
        """
        Runs the document scanning and cataloging process.
        """
        try:
            self.connect_to_db()
            self.create_table()
            documents = self.scan_directory()
            self.save_to_db(documents)
        except Exception as e:
            logging.error(f"An error occurred during the scan: {e}")
        finally:
            self.disconnect_from_db()


if __name__ == '__main__':
    # Example Usage (replace with your actual configuration)
    root_directory = '/mnt/e/genesis-system'
    db_host = 'postgresql-genesis-u50607.vm.elestio.app'
    db_port = 25432
    db_name = 'your_db_name'  # Replace with your actual database name
    db_user = 'your_db_user'  # Replace with your actual database user
    db_pass = 'your_db_password'  # Replace with your actual database password

    scanner = DocumentScanner(root_directory, db_host, db_port, db_name, db_user, db_pass)
    scanner.run_scan()
