import sys
import os

project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
sys.path.insert(0, project_root)

import psycopg2
 
import random
from datetime import datetime, timedelta

import structlog
logger = structlog.get_logger(__name__)
from dotenv import load_dotenv
load_dotenv()


def initialize_sample_data():
    db_url = os.getenv("DATABASE_URL")

    # Parse the DATABASE_URL for psycopg2
    def parse_pg_url(url):
        parts = url.split("://", 1)
        if len(parts) < 2:
            raise ValueError("Invalid database URL format")
        
        scheme = parts[0]
        if scheme not in ["postgresql", "postgresql+asyncpg"]:
            raise ValueError(f"Unsupported scheme: {scheme}")

        netloc_path = parts[1]
        netloc_parts = netloc_path.split("/", 1)
        netloc = netloc_parts[0]
        path = "/" + netloc_parts[1] if len(netloc_parts) > 1 else ""

        user_pass_host_port = netloc.split("@", 1)
        if len(user_pass_host_port) == 2:
            user_pass = user_pass_host_port[0]
            host_port = user_pass_host_port[1]
        else:
            user_pass = ""
            host_port = user_pass_host_port[0]

        user = ""
        password = ""
        if user_pass:
            user_pass_parts = user_pass.split(":", 1)
            user = user_pass_parts[0]
            password = user_pass_parts[1] if len(user_pass_parts) > 1 else ""

        host = ""
        port = ""
        host_port_parts = host_port.split(":", 1)
        host = host_port_parts[0]
        port = host_port_parts[1] if len(host_port_parts) > 1 else ""

        database = path.lstrip("/")

        conn_params = {}
        if host: conn_params["host"] = host
        if port: conn_params["port"] = port
        if database: conn_params["dbname"] = database
        if user: conn_params["user"] = user
        if password: conn_params["password"] = password
        
        return conn_params

    db_params = parse_pg_url(db_url)

    conn = None
    try:
        conn = psycopg2.connect(**db_params)
        cursor = conn.cursor()

        # Clear existing data from relevant tables
        logger.info("Clearing existing sample data...")
        truncate_sqls = [
            "TRUNCATE TABLE returns CASCADE;",
            "TRUNCATE TABLE shipments CASCADE;",
            "TRUNCATE TABLE orders CASCADE;",
            "TRUNCATE TABLE employees CASCADE;",
            "TRUNCATE TABLE products CASCADE;",
            "TRUNCATE TABLE customers CASCADE;",
            "TRUNCATE TABLE stores CASCADE;",
        ]
        for sql_statement in truncate_sqls:
            cursor.execute(sql_statement)
        conn.commit()
        logger.info("Existing sample data cleared successfully.")
        conn = psycopg2.connect(**db_params)
        cursor = conn.cursor()
        cursor.execute("SET search_path TO public;")

        # Sample Data SQL Statements (individual statements for psycopg2)
        # Fixed subqueries to be more specific by including data_source condition
        sample_data_sqls = [
            """INSERT INTO data_sources (name, type, connection_config) VALUES ('sales_data_warehouse', 'POSTGRESQL', '{"host": "127.0.0.1", "port": 5432, "user": "postgres", "password": "ZYCBI2025", "database": "chatbi_forge_dev", "description": "Sample sales data warehouse for BI analysis.", "active": true}') ON CONFLICT(name) DO NOTHING;""",
            """INSERT INTO table_schemas (data_source_id, table_name, description, selection_logic) VALUES ((SELECT id FROM data_sources WHERE name = 'sales_data_warehouse'), 'orders', 'Stores customer order information, including sales amount and order date.', 'Use for sales, revenue, and order-related queries.') ON CONFLICT(data_source_id, table_name) DO NOTHING;""",
            """INSERT INTO column_schemas (table_schema_id, column_name, data_type, column_type, description, synonyms) VALUES ((SELECT ts.id FROM table_schemas ts JOIN data_sources ds ON ts.data_source_id = ds.id WHERE ts.table_name = 'orders' AND ds.name = 'sales_data_warehouse'), 'order_id', 'INTEGER', 'DIMENSION', 'Unique identifier for each order.', NULL) ON CONFLICT(table_schema_id, column_name) DO NOTHING;""",
            """INSERT INTO column_schemas (table_schema_id, column_name, data_type, column_type, description, synonyms) VALUES ((SELECT ts.id FROM table_schemas ts JOIN data_sources ds ON ts.data_source_id = ds.id WHERE ts.table_name = 'orders' AND ds.name = 'sales_data_warehouse'), 'customer_id', 'INTEGER', 'DIMENSION', 'Identifier for the customer who placed the order.', NULL) ON CONFLICT(table_schema_id, column_name) DO NOTHING;""",
            """INSERT INTO column_schemas (table_schema_id, column_name, data_type, column_type, description, synonyms) VALUES ((SELECT ts.id FROM table_schemas ts JOIN data_sources ds ON ts.data_source_id = ds.id WHERE ts.table_name = 'orders' AND ds.name = 'sales_data_warehouse'), 'order_date', 'DATE', 'DIMENSION', 'The date when the order was placed.', 'sale_date, transaction_date') ON CONFLICT(table_schema_id, column_name) DO NOTHING;""",
            """INSERT INTO column_schemas (table_schema_id, column_name, data_type, column_type, description, synonyms) VALUES ((SELECT ts.id FROM table_schemas ts JOIN data_sources ds ON ts.data_source_id = ds.id WHERE ts.table_name = 'orders' AND ds.name = 'sales_data_warehouse'), 'product_id', 'INTEGER', 'DIMENSION', 'Identifier for the product purchased.', NULL) ON CONFLICT(table_schema_id, column_name) DO NOTHING;""",
            """INSERT INTO column_schemas (table_schema_id, column_name, data_type, column_type, description, synonyms) VALUES ((SELECT ts.id FROM table_schemas ts JOIN data_sources ds ON ts.data_source_id = ds.id WHERE ts.table_name = 'orders' AND ds.name = 'sales_data_warehouse'), 'quantity', 'INTEGER', 'METRIC', 'Number of units of the product sold in the order.', 'units_sold') ON CONFLICT(table_schema_id, column_name) DO NOTHING;""",
            """INSERT INTO column_schemas (table_schema_id, column_name, data_type, column_type, description, synonyms) VALUES ((SELECT ts.id FROM table_schemas ts JOIN data_sources ds ON ts.data_source_id = ds.id WHERE ts.table_name = 'orders' AND ds.name = 'sales_data_warehouse'), 'price', 'REAL', 'METRIC', 'Unit price of the product at the time of order.', NULL) ON CONFLICT(table_schema_id, column_name) DO NOTHING;""",
            """INSERT INTO column_schemas (table_schema_id, column_name, data_type, column_type, description, synonyms) VALUES ((SELECT ts.id FROM table_schemas ts JOIN data_sources ds ON ts.data_source_id = ds.id WHERE ts.table_name = 'orders' AND ds.name = 'sales_data_warehouse'), 'sales_amount', 'REAL', 'METRIC', 'Total sales amount for the order item (quantity * price).', 'revenue, total_sales') ON CONFLICT(table_schema_id, column_name) DO NOTHING;""",
            """INSERT INTO metric_definitions (metric_name, metric_alias, definition_formula, monitoring_query, dependencies, business_meaning) VALUES ('Total Sales', 'TotalRevenue', 'SUM(sales_amount)', 'SELECT SUM(sales_amount) AS value, order_date AS time FROM orders GROUP BY order_date', 'sales_amount', 'The sum of all sales amounts across all orders.') ON CONFLICT(metric_name) DO UPDATE SET metric_alias=EXCLUDED.metric_alias, definition_formula=EXCLUDED.definition_formula, monitoring_query=EXCLUDED.monitoring_query, dependencies=EXCLUDED.dependencies, business_meaning=EXCLUDED.business_meaning;""",
            """INSERT INTO metric_definitions (metric_name, metric_alias, definition_formula, monitoring_query, dependencies, business_meaning) VALUES ('Average Order Value', 'AOV', 'AVG(sales_amount)', 'SELECT AVG(sales_amount) AS value, order_date AS time FROM orders GROUP BY order_date', 'sales_amount', 'The average sales amount per order.') ON CONFLICT(metric_name) DO UPDATE SET metric_alias=EXCLUDED.metric_alias, definition_formula=EXCLUDED.definition_formula, monitoring_query=EXCLUDED.monitoring_query, dependencies=EXCLUDED.dependencies, business_meaning=EXCLUDED.business_meaning;""",
            """INSERT INTO users (id, username, hashed_password, email) VALUES ('user123', 'Alice', '$2b$12$examplehashedpasswordforAlice.example', 'alice@example.com') ON CONFLICT(id) DO NOTHING;""",
            """INSERT INTO roles (role_name) VALUES ('analyst') ON CONFLICT(role_name) DO NOTHING;""",
            """INSERT INTO roles (role_name) VALUES ('admin') ON CONFLICT(role_name) DO NOTHING;""",
            """INSERT INTO user_roles (user_id, role_id) VALUES ('user123', (SELECT id FROM roles WHERE role_name = 'analyst')) ON CONFLICT(user_id, role_id) DO NOTHING;""",
            """INSERT INTO orders (order_id, customer_id, order_date, product_id, quantity, price, sales_amount) VALUES (1, 101, '2025-07-20', 1, 2, 10.00, 20.00) ON CONFLICT(order_id) DO NOTHING;""",
            """INSERT INTO orders (order_id, customer_id, order_date, product_id, quantity, price, sales_amount) VALUES (2, 102, '2025-07-21', 2, 1, 25.00, 25.00) ON CONFLICT(order_id) DO NOTHING;""",
            """INSERT INTO orders (order_id, customer_id, order_date, product_id, quantity, price, sales_amount) VALUES (3, 101, '2025-07-22', 3, 3, 5.00, 15.00) ON CONFLICT(order_id) DO NOTHING;""",
            """INSERT INTO orders (order_id, customer_id, order_date, product_id, quantity, price, sales_amount) VALUES (4, 103, '2025-07-23', 1, 1, 10.00, 10.00) ON CONFLICT(order_id) DO NOTHING;""",
            """INSERT INTO orders (order_id, customer_id, order_date, product_id, quantity, price, sales_amount) VALUES (5, 102, '2025-07-24', 2, 2, 25.00, 50.00) ON CONFLICT(order_id) DO NOTHING;""",
            """INSERT INTO orders (order_id, customer_id, order_date, product_id, quantity, price, sales_amount) VALUES (6, 104, '2025-07-25', 4, 1, 100.00, 100.00) ON CONFLICT(order_id) DO NOTHING;""",
            """INSERT INTO orders (order_id, customer_id, order_date, product_id, quantity, price, sales_amount) VALUES (7, 101, '2025-07-26', 3, 2, 5.00, 10.00) ON CONFLICT(order_id) DO NOTHING;""",
            """INSERT INTO table_schemas (data_source_id, table_name, description, selection_logic) VALUES ((SELECT id FROM data_sources WHERE name = 'sales_data_warehouse'), 'products', 'Stores information about products, including their categories.', 'Use for product analysis and categorization.') ON CONFLICT(data_source_id, table_name) DO NOTHING;""",
            """INSERT INTO column_schemas (table_schema_id, column_name, data_type, column_type, description, synonyms) VALUES ((SELECT ts.id FROM table_schemas ts JOIN data_sources ds ON ts.data_source_id = ds.id WHERE ts.table_name = 'products' AND ds.name = 'sales_data_warehouse'), 'product_id', 'INTEGER', 'DIMENSION', 'Unique identifier for each product.', NULL) ON CONFLICT(table_schema_id, column_name) DO NOTHING;""",
            """INSERT INTO column_schemas (table_schema_id, column_name, data_type, column_type, description, synonyms) VALUES ((SELECT ts.id FROM table_schemas ts JOIN data_sources ds ON ts.data_source_id = ds.id WHERE ts.table_name = 'products' AND ds.name = 'sales_data_warehouse'), 'product_name', 'TEXT', 'DIMENSION', 'Name of the product.', NULL) ON CONFLICT(table_schema_id, column_name) DO NOTHING;""",
            """INSERT INTO column_schemas (table_schema_id, column_name, data_type, column_type, description, synonyms) VALUES ((SELECT ts.id FROM table_schemas ts JOIN data_sources ds ON ts.data_source_id = ds.id WHERE ts.table_name = 'products' AND ds.name = 'sales_data_warehouse'), 'category', 'TEXT', 'DIMENSION', 'Category of the product (e.g., Electronics, Apparel).', NULL) ON CONFLICT(table_schema_id, column_name) DO NOTHING;""",
            """INSERT INTO table_schemas (data_source_id, table_name, description, selection_logic) VALUES ((SELECT id FROM data_sources WHERE name = 'sales_data_warehouse'), 'customers', 'Stores customer demographic information.', 'Use for customer analysis and segmentation.') ON CONFLICT(data_source_id, table_name) DO NOTHING;""",
            """INSERT INTO column_schemas (table_schema_id, column_name, data_type, column_type, description, synonyms) VALUES ((SELECT ts.id FROM table_schemas ts JOIN data_sources ds ON ts.data_source_id = ds.id WHERE ts.table_name = 'customers' AND ds.name = 'sales_data_warehouse'), 'customer_id', 'INTEGER', 'DIMENSION', 'Unique identifier for each customer.', NULL) ON CONFLICT(table_schema_id, column_name) DO NOTHING;""",
            """INSERT INTO column_schemas (table_schema_id, column_name, data_type, column_type, description, synonyms) VALUES ((SELECT ts.id FROM table_schemas ts JOIN data_sources ds ON ts.data_source_id = ds.id WHERE ts.table_name = 'customers' AND ds.name = 'sales_data_warehouse'), 'customer_name', 'TEXT', 'DIMENSION', 'Full name of the customer.', NULL) ON CONFLICT(table_schema_id, column_name) DO NOTHING;""",
            """INSERT INTO column_schemas (table_schema_id, column_name, data_type, column_type, description, synonyms) VALUES ((SELECT ts.id FROM table_schemas ts JOIN data_sources ds ON ts.data_source_id = ds.id WHERE ts.table_name = 'customers' AND ds.name = 'sales_data_warehouse'), 'email', 'TEXT', 'DIMENSION', 'Email address of the customer.', NULL) ON CONFLICT(table_schema_id, column_name) DO NOTHING;""",
            """INSERT INTO column_schemas (table_schema_id, column_name, data_type, column_type, description, synonyms) VALUES ((SELECT ts.id FROM table_schemas ts JOIN data_sources ds ON ts.data_source_id = ds.id WHERE ts.table_name = 'customers' AND ds.name = 'sales_data_warehouse'), 'city', 'TEXT', 'DIMENSION', 'City where the customer resides.', NULL) ON CONFLICT(table_schema_id, column_name) DO NOTHING;""",
            """INSERT INTO column_schemas (table_schema_id, column_name, data_type, column_type, description, synonyms) VALUES ((SELECT ts.id FROM table_schemas ts JOIN data_sources ds ON ts.data_source_id = ds.id WHERE ts.table_name = 'customers' AND ds.name = 'sales_data_warehouse'), 'country', 'TEXT', 'DIMENSION', 'Country where the customer resides.', NULL) ON CONFLICT(table_schema_id, column_name) DO NOTHING;""",
            """INSERT INTO table_schemas (data_source_id, table_name, description, selection_logic) VALUES ((SELECT id FROM data_sources WHERE name = 'sales_data_warehouse'), 'stores', 'Information about retail stores.', 'Use for store-level performance analysis.') ON CONFLICT(data_source_id, table_name) DO NOTHING;""",
            """INSERT INTO column_schemas (table_schema_id, column_name, data_type, column_type, description, synonyms) VALUES ((SELECT ts.id FROM table_schemas ts JOIN data_sources ds ON ts.data_source_id = ds.id WHERE ts.table_name = 'stores' AND ds.name = 'sales_data_warehouse'), 'store_id', 'INTEGER', 'DIMENSION', 'Unique identifier for each store.', NULL) ON CONFLICT(table_schema_id, column_name) DO NOTHING;""",
            """INSERT INTO column_schemas (table_schema_id, column_name, data_type, column_type, description, synonyms) VALUES ((SELECT ts.id FROM table_schemas ts JOIN data_sources ds ON ts.data_source_id = ds.id WHERE ts.table_name = 'stores' AND ds.name = 'sales_data_warehouse'), 'store_name', 'TEXT', 'DIMENSION', 'Name of the store.', NULL) ON CONFLICT(table_schema_id, column_name) DO NOTHING;""",
            """INSERT INTO column_schemas (table_schema_id, column_name, data_type, column_type, description, synonyms) VALUES ((SELECT ts.id FROM table_schemas ts JOIN data_sources ds ON ts.data_source_id = ds.id WHERE ts.table_name = 'stores' AND ds.name = 'sales_data_warehouse'), 'city', 'TEXT', 'DIMENSION', 'City where the store is located.', NULL) ON CONFLICT(table_schema_id, column_name) DO NOTHING;""",
            """INSERT INTO column_schemas (table_schema_id, column_name, data_type, column_type, description, synonyms) VALUES ((SELECT ts.id FROM table_schemas ts JOIN data_sources ds ON ts.data_source_id = ds.id WHERE ts.table_name = 'stores' AND ds.name = 'sales_data_warehouse'), 'region', 'TEXT', 'DIMENSION', 'Geographical region of the store.', NULL) ON CONFLICT(table_schema_id, column_name) DO NOTHING;""",
            """INSERT INTO table_schemas (data_source_id, table_name, description, selection_logic) VALUES ((SELECT id FROM data_sources WHERE name = 'sales_data_warehouse'), 'employees', 'Information about employees, linked to stores.', 'Use for employee performance or store staffing analysis.') ON CONFLICT(data_source_id, table_name) DO NOTHING;""",
            """INSERT INTO column_schemas (table_schema_id, column_name, data_type, column_type, description, synonyms) VALUES ((SELECT ts.id FROM table_schemas ts JOIN data_sources ds ON ts.data_source_id = ds.id WHERE ts.table_name = 'employees' AND ds.name = 'sales_data_warehouse'), 'employee_id', 'INTEGER', 'DIMENSION', 'Unique identifier for each employee.', NULL) ON CONFLICT(table_schema_id, column_name) DO NOTHING;""",
            """INSERT INTO column_schemas (table_schema_id, column_name, data_type, column_type, description, synonyms) VALUES ((SELECT ts.id FROM table_schemas ts JOIN data_sources ds ON ts.data_source_id = ds.id WHERE ts.table_name = 'employees' AND ds.name = 'sales_data_warehouse'), 'employee_name', 'TEXT', 'DIMENSION', 'Full name of the employee.', NULL) ON CONFLICT(table_schema_id, column_name) DO NOTHING;""",
            """INSERT INTO column_schemas (table_schema_id, column_name, data_type, column_type, description, synonyms) VALUES ((SELECT ts.id FROM table_schemas ts JOIN data_sources ds ON ts.data_source_id = ds.id WHERE ts.table_name = 'employees' AND ds.name = 'sales_data_warehouse'), 'hire_date', 'DATE', 'DIMENSION', 'Date when the employee was hired.', NULL) ON CONFLICT(table_schema_id, column_name) DO NOTHING;""",
            """INSERT INTO column_schemas (table_schema_id, column_name, data_type, column_type, description, synonyms) VALUES ((SELECT ts.id FROM table_schemas ts JOIN data_sources ds ON ts.data_source_id = ds.id WHERE ts.table_name = 'employees' AND ds.name = 'sales_data_warehouse'), 'store_id', 'INTEGER', 'DIMENSION', 'Identifier of the store where the employee works.', NULL) ON CONFLICT(table_schema_id, column_name) DO NOTHING;""",
            """INSERT INTO table_schemas (data_source_id, table_name, description, selection_logic) VALUES ((SELECT id FROM data_sources WHERE name = 'sales_data_warehouse'), 'shipments', 'Details about product shipments for orders.', 'Use for logistics and delivery tracking analysis.') ON CONFLICT(data_source_id, table_name) DO NOTHING;""",
            """INSERT INTO column_schemas (table_schema_id, column_name, data_type, column_type, description, synonyms) VALUES ((SELECT ts.id FROM table_schemas ts JOIN data_sources ds ON ts.data_source_id = ds.id WHERE ts.table_name = 'shipments' AND ds.name = 'sales_data_warehouse'), 'shipment_id', 'INTEGER', 'DIMENSION', 'Unique identifier for each shipment.', NULL) ON CONFLICT(table_schema_id, column_name) DO NOTHING;""",
            """INSERT INTO column_schemas (table_schema_id, column_name, data_type, column_type, description, synonyms) VALUES ((SELECT ts.id FROM table_schemas ts JOIN data_sources ds ON ts.data_source_id = ds.id WHERE ts.table_name = 'shipments' AND ds.name = 'sales_data_warehouse'), 'order_id', 'INTEGER', 'DIMENSION', 'Identifier of the order associated with the shipment.', NULL) ON CONFLICT(table_schema_id, column_name) DO NOTHING;""",
            """INSERT INTO column_schemas (table_schema_id, column_name, data_type, column_type, description, synonyms) VALUES ((SELECT ts.id FROM table_schemas ts JOIN data_sources ds ON ts.data_source_id = ds.id WHERE ts.table_name = 'shipments' AND ds.name = 'sales_data_warehouse'), 'shipment_date', 'DATE', 'DIMENSION', 'Date when the shipment was made.', NULL) ON CONFLICT(table_schema_id, column_name) DO NOTHING;""",
            """INSERT INTO column_schemas (table_schema_id, column_name, data_type, column_type, description, synonyms) VALUES ((SELECT ts.id FROM table_schemas ts JOIN data_sources ds ON ts.data_source_id = ds.id WHERE ts.table_name = 'shipments' AND ds.name = 'sales_data_warehouse'), 'delivery_status', 'TEXT', 'DIMENSION', 'Current delivery status (e.g., Shipped, Delivered, In Transit).', NULL) ON CONFLICT(table_schema_id, column_name) DO NOTHING;""",
            """INSERT INTO table_schemas (data_source_id, table_name, description, selection_logic) VALUES ((SELECT id FROM data_sources WHERE name = 'sales_data_warehouse'), 'returns', 'Records of returned products.', 'Use for analyzing product returns and customer satisfaction.') ON CONFLICT(data_source_id, table_name) DO NOTHING;""",
            """INSERT INTO column_schemas (table_schema_id, column_name, data_type, column_type, description, synonyms) VALUES ((SELECT ts.id FROM table_schemas ts JOIN data_sources ds ON ts.data_source_id = ds.id WHERE ts.table_name = 'returns' AND ds.name = 'sales_data_warehouse'), 'return_id', 'INTEGER', 'DIMENSION', 'Unique identifier for each return.', NULL) ON CONFLICT(table_schema_id, column_name) DO NOTHING;""",
            """INSERT INTO column_schemas (table_schema_id, column_name, data_type, column_type, description, synonyms) VALUES ((SELECT ts.id FROM table_schemas ts JOIN data_sources ds ON ts.data_source_id = ds.id WHERE ts.table_name = 'returns' AND ds.name = 'sales_data_warehouse'), 'order_id', 'INTEGER', 'DIMENSION', 'Identifier of the original order for the returned product.', NULL) ON CONFLICT(table_schema_id, column_name) DO NOTHING;""",
            """INSERT INTO column_schemas (table_schema_id, column_name, data_type, column_type, description, synonyms) VALUES ((SELECT ts.id FROM table_schemas ts JOIN data_sources ds ON ts.data_source_id = ds.id WHERE ts.table_name = 'returns' AND ds.name = 'sales_data_warehouse'), 'product_id', 'INTEGER', 'DIMENSION', 'Identifier of the product that was returned.', NULL) ON CONFLICT(table_schema_id, column_name) DO NOTHING;""",
            """INSERT INTO column_schemas (table_schema_id, column_name, data_type, column_type, description, synonyms) VALUES ((SELECT ts.id FROM table_schemas ts JOIN data_sources ds ON ts.data_source_id = ds.id WHERE ts.table_name = 'returns' AND ds.name = 'sales_data_warehouse'), 'return_date', 'DATE', 'DIMENSION', 'Date when the product was returned.', NULL) ON CONFLICT(table_schema_id, column_name) DO NOTHING;""",
            """INSERT INTO column_schemas (table_schema_id, column_name, data_type, column_type, description, synonyms) VALUES ((SELECT ts.id FROM table_schemas ts JOIN data_sources ds ON ts.data_source_id = ds.id WHERE ts.table_name = 'returns' AND ds.name = 'sales_data_warehouse'), 'reason', 'TEXT', 'DIMENSION', 'Reason for the return (e.g., Defective, Wrong Size, Unwanted).', NULL) ON CONFLICT(table_schema_id, column_name) DO NOTHING;""",
        ]

        # Execute static SQL statements one by one
        for sql_statement in sample_data_sqls:
            cursor.execute(sql_statement)
        conn.commit()
        logger.info("Static sample data initialized successfully.")

        # Add product categories and products
        product_categories = [
            "Electronics",
            "Apparel",
            "Home Goods",
            "Books",
            "Food",
            "Sports",
            "Beauty",
            "Toys",
        ]
        products_to_insert = []
        for i in range(1, 101):  # Generate 100 products
            product_id = i
            product_name = f"Product_{i}"
            category = random.choice(product_categories)
            products_to_insert.append((product_id, product_name, category))

        # Insert products into 'products' table
        # Assuming 'products' table exists with columns: product_id, product_name, category
        for prod_id, prod_name, cat in products_to_insert:
            cursor.execute(f"""
                INSERT INTO products (product_id, product_name, category)
                VALUES ({prod_id}, '{prod_name}', '{cat}')
                ON CONFLICT(product_id) DO NOTHING;
            """)
        conn.commit()
        logger.info("Product data initialized successfully.")

        # Generate a large number of dynamic orders
        num_dynamic_orders = random.randint(
            5000, 10000
        )  # Generate between 5,000 and 100,000 orders
        start_order_id = 10000  # Start order_id from a higher number to avoid conflicts with static data

        logger.info(f"Generating {num_dynamic_orders} dynamic orders...")
        for i in range(num_dynamic_orders):
            order_id = start_order_id + i
            customer_id = random.randint(100, 500)

            # Generate random date within the last year
            random_days = random.randint(0, 365)
            order_date = (datetime.now() - timedelta(days=random_days)).strftime(
                "%Y-%m-%d"
            )

            # Use product_id from the generated products
            product_id = random.choice([p[0] for p in products_to_insert])

            quantity = random.randint(1, 10)
            price = round(random.uniform(5.0, 150.0), 2)
            sales_amount = round(quantity * price, 2)

            dynamic_order_sql = f"""
            INSERT INTO orders (order_id, customer_id, order_date, product_id, quantity, price, sales_amount) 
            VALUES ({order_id}, {customer_id}, '{order_date}', {product_id}, {quantity}, {price}, {sales_amount}) 
            ON CONFLICT(order_id) DO NOTHING;
            """
            cursor.execute(dynamic_order_sql)

            if (i + 1) % 1000 == 0:
                conn.commit()
                logger.info(f"Committed {i + 1} dynamic orders.")

        conn.commit()
        logger.info(
            f"Finished generating and committing {num_dynamic_orders} dynamic orders."
        )

        # Generate dynamic data for new tables
        num_customers = 500
        num_stores = 50
        num_employees_per_store = 10
        num_shipments = num_dynamic_orders  # One shipment per order
        num_returns = int(num_dynamic_orders * 0.1)  # 10% of orders returned

        # --- Customers Table ---
        logger.info(f"Generating {num_customers} customers...")
        customer_names = [
            "Alice",
            "Bob",
            "Charlie",
            "David",
            "Eve",
            "Frank",
            "Grace",
            "Heidi",
            "Ivan",
            "Judy",
        ]
        cities = [
            "New York",
            "Los Angeles",
            "Chicago",
            "Houston",
            "Phoenix",
            "Philadelphia",
            "San Antonio",
            "San Diego",
            "Dallas",
            "San Jose",
        ]
        countries = [
            "USA",
            "Canada",
            "Mexico",
            "UK",
            "Germany",
            "France",
            "Australia",
            "Japan",
            "China",
            "India",
        ]

        for i in range(1, num_customers + 1):
            customer_id = i
            customer_name = (
                random.choice(customer_names) + " " + str(random.randint(1, 100))
            )
            email = f"customer_{customer_id}_{customer_name.replace(' ', '').lower()}@example.com"
            city = random.choice(cities)
            country = random.choice(countries)
            cursor.execute(f"""
                INSERT INTO customers (customer_id, customer_name, email, city, country)
                VALUES ({customer_id}, '{customer_name}', '{email}', '{city}', '{country}')
                ON CONFLICT(customer_id) DO NOTHING;
            """)
        conn.commit()
        logger.info("Customer data initialized successfully.")

        # --- Stores Table ---
        logger.info(f"Generating {num_stores} stores...")
        store_cities = [
            "New York",
            "Los Angeles",
            "Chicago",
            "Houston",
            "Phoenix",
            "London",
            "Paris",
            "Berlin",
            "Tokyo",
            "Sydney",
        ]
        regions = ["North", "South", "East", "West", "Central"]

        for i in range(1, num_stores + 1):
            store_id = i
            store_name = f"Store {i}"
            city = random.choice(store_cities)
            region = random.choice(regions)
            cursor.execute(f"""
                INSERT INTO stores (store_id, store_name, city, region)
                VALUES ({store_id}, '{store_name}', '{city}', '{region}')
                ON CONFLICT(store_id) DO NOTHING;
            """)
        conn.commit()
        logger.info("Store data initialized successfully.")

        # --- Employees Table ---
        logger.info(f"Generating {num_stores * num_employees_per_store} employees...")
        employee_names = [
            "John",
            "Jane",
            "Peter",
            "Mary",
            "Robert",
            "Linda",
            "Michael",
            "Susan",
            "William",
            "Elizabeth",
        ]

        for store_id in range(1, num_stores + 1):
            for i in range(num_employees_per_store):
                employee_id = (store_id - 1) * num_employees_per_store + i + 1
                employee_name = (
                    random.choice(employee_names) + " " + str(random.randint(1, 100))
                )
                hire_date = (
                    datetime.now() - timedelta(days=random.randint(365, 1825))
                ).strftime("%Y-%m-%d")  # Hired 1-5 years ago
                cursor.execute(f"""
                    INSERT INTO employees (employee_id, employee_name, hire_date, store_id)
                    VALUES ({employee_id}, '{employee_name}', '{hire_date}', {store_id})
                    ON CONFLICT(employee_id) DO NOTHING;
                """)
        conn.commit()
        logger.info("Employee data initialized successfully.")

        # --- Shipments Table ---
        logger.info(f"Generating {num_shipments} shipments...")
        delivery_statuses = [
            "Shipped",
            "Delivered",
            "In Transit",
            "Pending",
            "Cancelled",
        ]

        # Assuming order_ids are sequential from 1 to num_dynamic_orders + 7 (from static data)
        # Adjust this if order_ids are not sequential or have gaps
        all_order_ids = list(range(1, 8)) + list(
            range(start_order_id, start_order_id + num_dynamic_orders)
        )
        random.shuffle(all_order_ids)  # Shuffle to pick random orders for shipments

        for i in range(num_shipments):
            shipment_id = i + 1
            order_id = (
                all_order_ids[i]
                if i < len(all_order_ids)
                else random.choice(all_order_ids)
            )  # Ensure we don't go out of bounds
            shipment_date = (
                datetime.now() - timedelta(days=random.randint(0, 30))
            ).strftime("%Y-%m-%d")  # Shipped within last month
            delivery_status = random.choice(delivery_statuses)
            cursor.execute(f"""
                INSERT INTO shipments (shipment_id, order_id, shipment_date, delivery_status)
                VALUES ({shipment_id}, {order_id}, '{shipment_date}', '{delivery_status}')
                ON CONFLICT(shipment_id) DO NOTHING;
            """)
            if (i + 1) % 1000 == 0:
                conn.commit()
                logger.info(f"Committed {i + 1} shipments.")
        conn.commit()
        logger.info("Shipment data initialized successfully.")

        # --- Returns Table ---
        logger.info(f"Generating {num_returns} returns...")
        return_reasons = [
            "Defective",
            "Wrong Size",
            "Unwanted",
            "Damaged",
            "Late Delivery",
        ]

        # Select random product_ids from the generated products
        product_ids_for_returns = [p[0] for p in products_to_insert]

        for i in range(num_returns):
            return_id = i + 1
            order_id = random.choice(all_order_ids)  # Pick a random order to return
            product_id = random.choice(product_ids_for_returns)
            return_date = (
                datetime.now() - timedelta(days=random.randint(0, 60))
            ).strftime("%Y-%m-%d")  # Returned within last 2 months
            reason = random.choice(return_reasons)
            cursor.execute(f"""
                INSERT INTO returns (return_id, order_id, product_id, return_date, reason)
                VALUES ({return_id}, {order_id}, {product_id}, '{return_date}', '{reason}')
                ON CONFLICT(return_id) DO NOTHING;
            """)
            if (i + 1) % 1000 == 0:
                conn.commit()
                logger.info(f"Committed {i + 1} returns.")
        conn.commit()
        logger.info("Return data initialized successfully.")
        
        # Insert sample conversation history data
        logger.info("Inserting sample conversation history data...")
        try:
            # Get a sample user
            cursor.execute("SELECT id FROM users LIMIT 1")
            user_result = cursor.fetchone()
            if user_result:
                user_id = user_result[0]
                
                # Insert sample conversation history entries
                sample_history = [
                    {
                        "session_id": f"{user_id}:abc123-def456-ghi789",
                        "message_id": "msg1",
                        "author": "USER",
                        "content": "What were our sales last month?",
                        "sql_query": "SELECT SUM(sales) FROM orders WHERE order_date >= DATE_TRUNC('month', CURRENT_DATE - INTERVAL '1 month') AND order_date < DATE_TRUNC('month', CURRENT_DATE)",
                    },
                    {
                        "session_id": f"{user_id}:abc123-def456-ghi789",
                        "message_id": "msg2",
                        "author": "SYSTEM",
                        "content": "Last month's sales were $125,000.",
                        "sql_query": "SELECT SUM(sales) FROM orders WHERE order_date >= DATE_TRUNC('month', CURRENT_DATE - INTERVAL '1 month') AND order_date < DATE_TRUNC('month', CURRENT_DATE)",
                    },
                    {
                        "session_id": f"{user_id}:xyz789-uvw456-rst123",
                        "message_id": "msg3",
                        "author": "USER",
                        "content": "Show me the top 5 products by revenue.",
                        "sql_query": "SELECT product_name, SUM(revenue) as total_revenue FROM products p JOIN order_items oi ON p.product_id = oi.product_id GROUP BY p.product_name ORDER BY total_revenue DESC LIMIT 5",
                    }
                ]
                
                for entry in sample_history:
                    cursor.execute("""
                        INSERT INTO conversation_history 
                        (session_id, message_id, author, content, sql_query, timestamp)
                        VALUES (%(session_id)s, %(message_id)s, %(author)s, %(content)s, %(sql_query)s, NOW())
                        ON CONFLICT (message_id) DO NOTHING
                    """, entry)
                
                conn.commit()
                logger.info("Sample conversation history data inserted successfully.")
            else:
                logger.warning("No users found, skipping conversation history sample data insertion.")
        except Exception as e:
            logger.error(f"Error inserting sample conversation history data: {e}")
            conn.rollback()

    except psycopg2.Error as e:
        logger.error(f"PostgreSQL error during sample data initialization: {e}")
        if conn:
            conn.rollback()
    except Exception as e:
        logger.error(
            f"An unexpected error occurred during sample data initialization: {e}"
        )
        if conn:
            conn.rollback()
    finally:
        if conn:
            cursor.close()
            conn.close()


if __name__ == "__main__":
    # This ensures settings are loaded and logging is configured before running
    initialize_sample_data()
