import psycopg2
import os
import sys
import re
import json
import uuid
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
print(f"DEBUG: project_root = {project_root}")
print(f"DEBUG: sys.path = {sys.path}")
sys.path.insert(0, project_root)
import structlog
logger = structlog.get_logger(__name__)
from dotenv import load_dotenv
load_dotenv()
from passlib.context import CryptContext

pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")

def get_password_hash(password: str) -> str:
    return str(pwd_context.hash(password))

def initialize_db_schema(drop_all=False):
    db_url = os.getenv("DATABASE_URL")
    # Parse the DATABASE_URL for psycopg2
    def parse_pg_url(url):
        parts = url.split("://", 1)
        if len(parts) < 2:
            raise ValueError("Invalid database URL format")
        
        scheme = parts[0]
        if scheme not in ["postgresql", "postgresql+asyncpg"]:
            raise ValueError(f"Unsupported scheme: {scheme}")

        netloc_path = parts[1]
        netloc_parts = netloc_path.split("/", 1)
        netloc = netloc_parts[0]
        path = "/" + netloc_parts[1] if len(netloc_parts) > 1 else ""

        user_pass_host_port = netloc.split("@", 1)
        if len(user_pass_host_port) == 2:
            user_pass = user_pass_host_port[0]
            host_port = user_pass_host_port[1]
        else:
            user_pass = ""
            host_port = user_pass_host_port[0]

        user = ""
        password = ""
        if user_pass:
            user_pass_parts = user_pass.split(":", 1)
            user = user_pass_parts[0]
            password = user_pass_parts[1] if len(user_pass_parts) > 1 else ""

        host = ""
        port = ""
        host_port_parts = host_port.split(":", 1)
        host = host_port_parts[0]
        port = host_port_parts[1] if len(host_port_parts) > 1 else ""

        database = path.lstrip("/")

        conn_params = {}
        if host: conn_params["host"] = host
        if port: conn_params["port"] = port
        if database: conn_params["dbname"] = database
        if user: conn_params["user"] = user
        if password: conn_params["password"] = password
        
        return " ".join([f"{k}={v}" for k, v in conn_params.items()])

    db_url = parse_pg_url(db_url)
    logger.info(f"Initializing schema for database: {db_url}")
    schema_file = "wiki/06_Database_Schema_Design.sql"

    conn = None
    try:
        if os.getenv("DB_SERVER_TYPE").lower() != "postgresql":
            logger.warning(f"This script is intended for PostgreSQL. Current DB_SERVER_TYPE is {os.getenv("DB_SERVER_TYPE")}. Skipping schema initialization.")
            return

        # Try to connect to the database, if it doesn't exist, create it
        try:
            conn = psycopg2.connect(db_url)
        except psycopg2.OperationalError as e:
            if "does not exist" in str(e):
                # Parse connection parameters to get database name
                db_name = None
                for param in db_url.split():
                    if param.startswith("dbname="):
                        db_name = param.split("=")[1]
                        break
                
                if db_name:
                    logger.info(f"Database {db_name} does not exist. Creating it...")
                    # Connect to postgres database to create our database
                    postgres_db_url = db_url.replace(f"dbname={db_name}", "dbname=postgres")
                    postgres_conn = psycopg2.connect(postgres_db_url)
                    postgres_conn.autocommit = True
                    postgres_cursor = postgres_conn.cursor()
                    postgres_cursor.execute(f'CREATE DATABASE "{db_name}"')
                    postgres_cursor.close()
                    postgres_conn.close()
                    
                    # Now connect to our newly created database
                    conn = psycopg2.connect(db_url)
                else:
                    raise e
            else:
                raise e
        cursor = conn.cursor()

        # Handle drop_all option
        if drop_all:
            logger.warning("Dropping all existing tables as requested by drop_all=True")
            # Disable foreign key checks temporarily
            cursor.execute("SET CONSTRAINTS ALL DEFERRED;")
            
            # Drop all tables
            cursor.execute("""
                SELECT table_name FROM information_schema.tables 
                WHERE table_schema = 'public'
            """)
            tables = cursor.fetchall()
            for table in tables:
                table_name = table[0]
                try:
                    cursor.execute(f'DROP TABLE IF EXISTS "{table_name}" CASCADE')
                    logger.info(f"Dropped table: {table_name}")
                except psycopg2.Error as e:
                    logger.warning(f"Could not drop table {table_name}: {e}")
            
            conn.commit()
            logger.info("All existing tables dropped successfully")

        # 1. 读取 SQL 文件
        with open(schema_file, "r", encoding="utf-8") as f:
            sql_script = f.read()

        # 2. 清理 SQL 脚本：移除所有注释
        # 移除块注释 /* ... */
        sql_script = re.sub(r'/\*.*?\*/', '', sql_script, flags=re.DOTALL)
        # 移除行注释 -- ...
        sql_script = re.sub(r'--.*?\n', '\n', sql_script)

        # 3. 将清理后的脚本按分号分割成独立的命令
        sql_commands = [cmd.strip() for cmd in sql_script.split(';') if cmd.strip()]
        
        # 4. 分离 CREATE TABLE, CREATE INDEX 和其他命令 (这部分逻辑是正确的，予以保留)
        create_table_commands = []
        create_index_commands = []
        other_commands = []
        drop_commands = []
        
        for command in sql_commands:
            stripped_command = command.strip().upper()
            if stripped_command:
                if stripped_command.startswith("CREATE TABLE"):
                    create_table_commands.append(command)
                elif stripped_command.startswith("CREATE INDEX"):
                    create_index_commands.append(command)
                elif stripped_command.startswith("DROP TABLE"):
                    drop_commands.append(command)
                elif not stripped_command.startswith("DROP TABLE"): # 忽略 DROP TABLE，因为 drop_all 已经处理了
                    other_commands.append(command)
        
        # 5. 按正确顺序组合命令：先创建表，然后执行其他操作（如 INSERT），最后创建索引
        all_commands = drop_commands + create_table_commands + other_commands + create_index_commands
        
        logger.info(f"Found {len(all_commands)} SQL commands to execute.")
        
        # 6. 逐条执行命令
        for command in all_commands:
            try:
                cursor.execute(command)
                logger.debug(f"Executed SQL command: {command[:100].strip()}...")
            except psycopg2.Error as cmd_e:
                logger.error(f"Error executing command: {command[:100].strip()}... Error: {cmd_e}")
                #conn.rollback() # 如果有命令失败，回滚事务
                #raise # 重新抛出异常以终止脚本

        conn.commit()
        logger.info(
            f"Database schema from {schema_file} initialized successfully in PostgreSQL."
        )

        # # Create system_settings table if it doesn't exist
        # cursor.execute("""CREATE TABLE IF NOT EXISTS system_settings (
        #     id SERIAL PRIMARY KEY,
        #     setting_key TEXT NOT NULL UNIQUE,
        #     setting_value TEXT,
        #     setting_type TEXT,
        #     description TEXT,
        #     created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
        #     updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
        # );""")
        # conn.commit()
        # logger.info("Ensured 'system_settings' table exists.")


 
        # Perform integrity check to ensure all required tables exist
        required_tables = [
            'data_sources', 'table_schemas', 'column_schemas', 'metric_definitions',
            'users', 'roles', 'user_roles', 'role_permissions', 'row_level_security_policies',
            'role_rls_policies', 'conversation_history', 'message_feedback', 'audit_log',
            'knowledge_source_directories', 'system_settings', 'analysis_reports'
        ]
        
        logger.info("Performing database integrity check...")
        missing_tables = []
        for table in required_tables:
            try:
                cursor.execute(f"SELECT 1 FROM {table} LIMIT 1")
                logger.debug(f"Table '{table}' exists")
            except psycopg2.Error:
                missing_tables.append(table)
                logger.warning(f"Table '{table}' is missing")
                
        if missing_tables:
            logger.warning(f"Found {len(missing_tables)} missing tables: {missing_tables}")
            # Try to create missing tables individually
            for table in missing_tables:
                if table == 'analysis_reports':
                    try:
                        cursor.execute("""
                            CREATE TABLE analysis_reports (
                                id VARCHAR(255) PRIMARY KEY,
                                user_id VARCHAR(255) NOT NULL,
                                user_question TEXT NOT NULL,
                                summary TEXT NOT NULL,
                                json_file_path TEXT NOT NULL,
                                txt_file_path TEXT NOT NULL,
                                timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
                                FOREIGN KEY (user_id) REFERENCES users(id)
                            )
                        """)
                        cursor.execute("CREATE INDEX IF NOT EXISTS idx_analysis_reports_user_id ON analysis_reports(user_id)")
                        cursor.execute("CREATE INDEX IF NOT EXISTS idx_analysis_reports_timestamp ON analysis_reports(timestamp)")
                        conn.commit()
                        logger.info(f"Successfully created missing table '{table}'")
                    except psycopg2.Error as e:
                        logger.error(f"Failed to create table '{table}': {e}")
        else:
            logger.info("All required tables are present in the database")

        # --- Initialize Default Admin User and Roles ---
        logger.info("Initializing default admin user and roles...")

        admin_username = os.getenv("DEFAULT_ADMIN_USERNAME")
        admin_password = os.getenv("DEFAULT_ADMIN_PASSWORD")
        hashed_admin_password = get_password_hash(admin_password)
        admin_user_id = str(uuid.uuid4())

        # Create or update admin user
        cursor.execute(
            f"""INSERT INTO users (id, username, hashed_password, email, name, age, failed_login_attempts, is_locked) VALUES ('{admin_user_id}', '{admin_username}', '{hashed_admin_password}', 'admin@example.com', 'Admin User', 30, 0, FALSE) ON CONFLICT(username) DO UPDATE SET hashed_password = EXCLUDED.hashed_password, email = EXCLUDED.email, name = EXCLUDED.name, age = EXCLUDED.age, failed_login_attempts = EXCLUDED.failed_login_attempts, is_locked = EXCLUDED.is_locked RETURNING id;"""
        )
        # Fetch the actual user_id in case of conflict (existing user)
        actual_admin_user_id = cursor.fetchone()[0]
        logger.info(f"Default admin user '{admin_username}' initialized/updated with ID: {actual_admin_user_id}")

        # Create or update roles
        cursor.execute("INSERT INTO roles (role_name) VALUES ('admin') ON CONFLICT(role_name) DO NOTHING;")
        cursor.execute("INSERT INTO roles (role_name) VALUES ('analyst') ON CONFLICT(role_name) DO NOTHING;")
        conn.commit()
        logger.info("Default roles 'admin' and 'analyst' ensured.")

        # Assign admin role to default admin user
        cursor.execute(
            f"""INSERT INTO user_roles (user_id, role_id) VALUES ('{actual_admin_user_id}', (SELECT id FROM roles WHERE role_name = 'admin')) ON CONFLICT(user_id, role_id) DO NOTHING;"""
        )
        conn.commit()
        logger.info(f"Admin role assigned to default admin user '{admin_username}'.")

        # Insert default PII patterns into system_settings table
        default_pii_patterns = {
            "EMAIL": r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}",
            "PHONE_NUMBER": r"\+?\d{1,4}[\s\-]?\(?\d{1,}\)?\s\-]?\d{1,}[\s\-]?\d{1,}",
            "ID_CARD": r"\d{15}(\d{2}[0-9X])?",
            "BANK_CARD": r"\b\d{13,19}\b",
            "IP_ADDRESS": r"\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b",
            "CHINESE_NAME": r"[\u4e00-\u9fa5]{2,4}",
            "PASSPORT_NUMBER": r"[A-Z]{1}[0-9]{7}|[A-Z]{2}[0-9]{7}",
            "CREDIT_CARD": r"(?:4[0-9]{12}(?:[0-9]{3})?|5[1-5][0-9]{14}|3[47][0-9]{13}|6(?:011|5[0-9]{2})[0-9]{12})",
            "ADDRESS": r"[\u4e00-\u9fa5a-zA-Z0-9]{5,}?(省|市|区|县|镇|乡|村|路|街|巷|号|大厦|公寓|楼|单元|室)",
            "USCC": r"[0-9A-Z]{18}",
            "DRIVING_LICENSE": r"\d{18}",
            "LICENSE_PLATE": r"[京津沪渝冀豫辽吉黑苏浙皖闽鄂湘粤桂琼川贵云藏陕甘青宁蒙新][A-HJ-NP-Z][0-9A-Z]{5}",
            "BANK_ACCOUNT": r"\b\d{16,19}\b",
            "POSTAL_CODE": r"[0-9]{6}",
            "DATE_OF_BIRTH": r"\d{4}[-/]\d{2}[-/]\d{2}"
        }
        cursor.execute(
            """INSERT INTO system_settings (setting_key, setting_value, setting_type, description) VALUES ('PII_PATTERNS', %s, 'json', 'Default PII patterns for sanitization') ON CONFLICT(setting_key) DO UPDATE SET setting_value = EXCLUDED.setting_value, updated_at = CURRENT_TIMESTAMP;""",
            (json.dumps(default_pii_patterns),)
        )
        conn.commit()
        logger.info("Default PII patterns ensured in system_settings.")
        # --- End Initialize Default Admin User and Roles ---

    except psycopg2.Error as e:
        logger.error(f"PostgreSQL error during schema initialization: {e}")
        if conn:
            conn.rollback()
    except FileNotFoundError:
        logger.error(f"Schema file not found: {schema_file}")
    except Exception as e:
        logger.error(f"An unexpected error occurred during schema initialization: {e}")
        if conn:
            conn.rollback()
    finally:
        if conn:
            cursor.close()
            conn.close()


if __name__ == "__main__":
    from passlib.context import CryptContext
    import structlog
    
    # Configure logging
    structlog.configure(
        processors=[
            structlog.stdlib.add_logger_name,
            structlog.stdlib.add_log_level,
            structlog.processors.TimeStamper(fmt="iso"),
            structlog.processors.JSONRenderer()
        ],
        context_class=dict,
        logger_factory=structlog.stdlib.LoggerFactory(),
        wrapper_class=structlog.stdlib.BoundLogger,
        cache_logger_on_first_use=True,
    )
    
    # Check for drop_all argument
    drop_all = False
    if len(sys.argv) > 1 and sys.argv[1].lower() == "drop_all=true":
        drop_all = True
        
    initialize_db_schema(drop_all=drop_all)