-- SQL DDL for InsightLink
-- Version: 1.0
-- Target Database: SQLite (for MVP), with notes for PostgreSQL (for production)
-- This script creates the necessary tables for the metadata store.

-- Drop tables if they exist to allow for clean re-initialization
DROP TABLE IF EXISTS message_feedback CASCADE;
DROP TABLE IF EXISTS conversation_history CASCADE;
DROP TABLE IF EXISTS conversations CASCADE;
DROP TABLE IF EXISTS user_roles CASCADE;
DROP TABLE IF EXISTS role_permissions CASCADE;
DROP TABLE IF EXISTS roles CASCADE;
DROP TABLE IF EXISTS row_level_security_policies CASCADE;
DROP TABLE IF EXISTS role_rls_policies CASCADE;
DROP TABLE IF EXISTS knowledge_source_directories CASCADE;
DROP TABLE IF EXISTS users CASCADE;
DROP TABLE IF EXISTS column_schemas CASCADE;
DROP TABLE IF EXISTS table_schemas CASCADE;
DROP TABLE IF EXISTS data_sources CASCADE;
DROP TABLE IF EXISTS metric_definitions CASCADE;
DROP TABLE IF EXISTS audit_log CASCADE;
DROP TABLE IF EXISTS system_settings CASCADE;
DROP TABLE IF EXISTS monitoring_tasks CASCADE;
DROP TABLE IF EXISTS analysis_reports CASCADE;

-- =============================================
-- == 1. Data & Knowledge Model Tables        ==
-- =============================================
-- These tables form the core data catalog of the system.

-- Table to store information about data sources
CREATE TABLE data_sources (
    id SERIAL PRIMARY KEY,
    name TEXT NOT NULL UNIQUE,
    type TEXT NOT NULL CHECK(type IN ('SNOWFLAKE', 'BIGQUERY', 'REDSHIFT', 'POSTGRESQL', 'MYSQL', 'SQLSERVER', 'TIDB', 'OCEANBASE', 'CLICKHOUSE', 'ORACLE', 'DB2', 'TRINO', 'DATABRICKS', 'ATHENA')),
    connection_config TEXT NOT NULL, -- JSON blob for connection details
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);

-- Table to store schemas of tables from data sources
CREATE TABLE table_schemas (
    id SERIAL PRIMARY KEY,
    data_source_id INTEGER NOT NULL,
    table_name TEXT NOT NULL,
    description TEXT,
    selection_logic TEXT, -- Logic on when to use this table
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    FOREIGN KEY (data_source_id) REFERENCES data_sources(id),
    UNIQUE(data_source_id, table_name)
);

-- Table to store schemas of columns within tables
CREATE TABLE column_schemas (
    id SERIAL PRIMARY KEY,
    table_schema_id INTEGER NOT NULL,
    column_name TEXT NOT NULL,
    data_type TEXT NOT NULL,
    column_type TEXT CHECK(column_type IN ('DIMENSION', 'METRIC')),
    description TEXT,
    synonyms TEXT, -- Comma-separated list of synonyms
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    FOREIGN KEY (table_schema_id) REFERENCES table_schemas(id),
    UNIQUE(table_schema_id, column_name)
);

-- Table to store definitions of business metrics
CREATE TABLE metric_definitions (
    id SERIAL PRIMARY KEY,
    metric_name TEXT NOT NULL UNIQUE,
    metric_alias TEXT UNIQUE,
    data_source_id INTEGER REFERENCES data_sources(id),
    data_source_name TEXT REFERENCES data_sources(name),
    definition_formula TEXT NOT NULL, -- SQL expression
    dependencies TEXT, -- Comma-separated list of dependent columns or metrics
    business_meaning TEXT,
    monitoring_query TEXT, -- New field for dedicated monitoring SQL
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);

-- Table to store knowledge source directories for RAG
CREATE TABLE knowledge_source_directories (
    id SERIAL PRIMARY KEY,
    name TEXT NOT NULL UNIQUE,
    path TEXT NOT NULL,
    description TEXT,
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);

CREATE TABLE IF NOT EXISTS orders (
    order_id SERIAL PRIMARY KEY,
    customer_id INTEGER,
    order_date DATE,
    product_id INTEGER,
    quantity INTEGER,
    price NUMERIC(10, 2),
    sales_amount NUMERIC(10, 2)
);

CREATE TABLE IF NOT EXISTS customers (
    customer_id SERIAL PRIMARY KEY,
    customer_name TEXT NOT NULL,
    email TEXT UNIQUE,
    city TEXT,
    country TEXT
);

CREATE TABLE IF NOT EXISTS products (
    product_id SERIAL PRIMARY KEY,
    product_name TEXT NOT NULL,
    category TEXT
);

CREATE TABLE IF NOT EXISTS stores (
    store_id SERIAL PRIMARY KEY,
    store_name TEXT NOT NULL,
    city TEXT,
    region TEXT
);

CREATE TABLE IF NOT EXISTS employees (
    employee_id SERIAL PRIMARY KEY,
    employee_name TEXT NOT NULL,
    hire_date DATE,
    store_id INTEGER,
    FOREIGN KEY (store_id) REFERENCES stores(store_id)
);

CREATE TABLE IF NOT EXISTS shipments (
    shipment_id SERIAL PRIMARY KEY,
    order_id INTEGER,
    shipment_date DATE,
    delivery_status TEXT,
    FOREIGN KEY (order_id) REFERENCES orders(order_id)
);

CREATE TABLE IF NOT EXISTS returns (
    return_id SERIAL PRIMARY KEY,
    order_id INTEGER,
    product_id INTEGER,
    return_date DATE,
    reason TEXT,
    FOREIGN KEY (order_id) REFERENCES orders(order_id),
    FOREIGN KEY (product_id) REFERENCES products(product_id)
);

-- =============================================
-- == 2. Configuration & Security Tables      ==
-- =============================================

CREATE TABLE monitoring_tasks (
    id SERIAL PRIMARY KEY,
    metric_name VARCHAR(255) NOT NULL,
    check_frequency_minutes INTEGER NOT NULL,
    threshold TEXT NOT NULL,
    comparison_operator VARCHAR(255) NOT NULL,
    is_active BOOLEAN DEFAULT TRUE,
    notification_channel VARCHAR(255),
    notification_config JSONB, -- 使用 JSONB 以获得更好的性能和索引支持
    created_at TIMESTAMP WITHOUT TIME ZONE DEFAULT NOW(),
    updated_at TIMESTAMP WITHOUT TIME ZONE DEFAULT NOW()
);

-- Table to store user information
CREATE TABLE users (
    id VARCHAR(255) PRIMARY KEY, -- User ID from IdP
    username TEXT NOT NULL UNIQUE,
    hashed_password TEXT NOT NULL,
    name TEXT,
    age INTEGER,
    email TEXT UNIQUE,
    failed_login_attempts INTEGER DEFAULT 0,
    is_locked BOOLEAN DEFAULT FALSE,
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);

-- Table to store user roles
CREATE TABLE roles (
    id SERIAL PRIMARY KEY,
    role_name TEXT NOT NULL UNIQUE
);

-- Junction table for user-role mapping
CREATE TABLE user_roles (
    user_id TEXT NOT NULL,
    role_id INTEGER NOT NULL,
    PRIMARY KEY (user_id, role_id),
    FOREIGN KEY (user_id) REFERENCES users(id),
    FOREIGN KEY (role_id) REFERENCES roles(id)
);

-- Table to store permissions associated with roles
CREATE TABLE role_permissions (
    id SERIAL PRIMARY KEY,
    role_id INTEGER NOT NULL,
    permission_type TEXT NOT NULL CHECK(permission_type IN ('TABLE_ACCESS', 'COLUMN_ACCESS', 'ROW_LEVEL_FILTER')),
    resource_name TEXT NOT NULL, -- e.g., table name, column name
    filter_condition TEXT, -- for row-level security
    FOREIGN KEY (role_id) REFERENCES roles(id)
);

-- Table to store row-level security policies
CREATE TABLE row_level_security_policies (
    id SERIAL PRIMARY KEY,
    policy_name TEXT NOT NULL UNIQUE,
    target_table TEXT NOT NULL,
    policy_type TEXT NOT NULL, -- e.g., 'FILTER', 'MASK'
    policy_rule TEXT NOT NULL, -- SQL expression for the policy
    description TEXT,
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);

-- Junction table for role-RLS policy mapping
CREATE TABLE role_rls_policies (
    role_id INTEGER NOT NULL,
    rls_policy_id INTEGER NOT NULL,
    PRIMARY KEY (role_id, rls_policy_id),
    FOREIGN KEY (role_id) REFERENCES roles(id),
    FOREIGN KEY (rls_policy_id) REFERENCES row_level_security_policies(id)
);

-- Table to store system settings
CREATE TABLE system_settings (
    id SERIAL PRIMARY KEY,
    setting_key TEXT NOT NULL UNIQUE,
    setting_value TEXT,
    setting_type TEXT, -- e.g., 'string', 'json', 'boolean'
    description TEXT,
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);


-- =============================================
-- == 3. Conversation & Feedback Tables       ==
-- =============================================

-- Table to store conversation metadata
CREATE TABLE conversations (
    id VARCHAR(255) PRIMARY KEY,
    user_id TEXT NOT NULL,
    title TEXT NOT NULL,
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    FOREIGN KEY (user_id) REFERENCES users(id) ON DELETE CASCADE
);

-- Table to store conversation history
CREATE TABLE conversation_history (
    id SERIAL PRIMARY KEY,
    conversation_id VARCHAR(255) NOT NULL,
    message_id TEXT NOT NULL UNIQUE,
    author TEXT NOT NULL CHECK(author IN ('USER', 'SYSTEM')),
    content TEXT NOT NULL,
    explanation TEXT, -- For system responses
    sql_query TEXT, -- For system responses
    data_result JSON, -- JSON blob of the data
    visualization_config JSON, -- JSON blob for the chart
    -- Enhanced explainability fields
    intent_info JSON, -- JSON blob for intent information
    rag_context JSON, -- JSON blob for RAG context
    sql_explanation TEXT, -- Explanation of the SQL query
    analysis_path JSON, -- JSON blob for analysis steps
    share_id TEXT UNIQUE,
    data_source_id INTEGER, -- New column for data source tracking
    timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    FOREIGN KEY (data_source_id) REFERENCES data_sources(id),
    FOREIGN KEY (conversation_id) REFERENCES conversations(id) ON DELETE CASCADE
);

-- Table to store user feedback on messages
CREATE TABLE message_feedback (
    id SERIAL PRIMARY KEY,
    message_id TEXT NOT NULL UNIQUE,
    is_positive BOOLEAN NOT NULL,
    star_rating INTEGER,  -- 1-5 star rating
    correction_text TEXT,
    feedback_type TEXT,
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    FOREIGN KEY (message_id) REFERENCES conversation_history(message_id) ON DELETE CASCADE
);

-- Table to store audit logs
CREATE TABLE audit_log (
    id SERIAL PRIMARY KEY,
    user_id TEXT NOT NULL,
    timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    event_type TEXT NOT NULL,
    action_details TEXT, -- JSON blob for details
    result TEXT NOT NULL -- e.g., 'SUCCESS', 'FAILURE'
);

-- Table to store analysis reports
CREATE TABLE analysis_reports (
    id VARCHAR(255) PRIMARY KEY, -- Unique ID for the report (UUID)
    user_id TEXT NOT NULL,
    user_question TEXT NOT NULL,
    summary TEXT NOT NULL,
    json_file_path TEXT NOT NULL,
    txt_file_path TEXT NOT NULL,
    timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    FOREIGN KEY (user_id) REFERENCES users(id)
);


-- =============================================
-- == 5. Initial Data                         ==
-- =============================================

-- Insert mock data source
INSERT INTO data_sources (name, type, connection_config)
VALUES ('mock_postgres_db', 'POSTGRESQL', '{"host": "localhost", "port": 5432, "database": "mock_db", "user": "postgres", "password": "ZYCBI2025", "description": "Sample data ,not use", "active": false}')
ON CONFLICT(name) DO NOTHING;

-- Insert sales data warehouse source
INSERT INTO data_sources (name, type, connection_config)
VALUES ('sales_data_warehouse', 'POSTGRESQL', '{"host": "localhost", "port": 5432, "database": "chatbi_forge_dev", "user": "postgres", "password": "ZYCBI2025", "description": "Sample sales data warehouse for BI analysis.", "active": true}')
ON CONFLICT(name) DO NOTHING;

-- =============================================
-- == 4. Indexes for Performance              ==
-- =============================================
-- Production Note: For PostgreSQL, consider using more advanced index types like GIN for text search.
-- Moved to the end to ensure tables are created before indexes

CREATE INDEX idx_table_schemas_table_name ON table_schemas(table_name);
CREATE INDEX idx_column_schemas_column_name ON column_schemas(column_name);
CREATE INDEX idx_metric_definitions_metric_name ON metric_definitions(metric_name);
CREATE INDEX idx_conversation_history_conversation_id ON conversation_history(conversation_id);
CREATE INDEX idx_audit_log_user_id ON audit_log(user_id);
CREATE INDEX idx_audit_log_event_type ON audit_log(event_type);
CREATE INDEX idx_analysis_reports_user_id ON analysis_reports(user_id);
CREATE INDEX idx_analysis_reports_timestamp ON analysis_reports(timestamp);

-- =============================================
-- == 6. Notes for Production (PostgreSQL)    ==
-- =============================================

-- For PostgreSQL, you would make the following changes:
-- 1. Use SERIAL or BIGSERIAL instead of INTEGER PRIMARY KEY AUTOINCREMENT.
--    e.g., id SERIAL PRIMARY KEY;
-- 2. Use JSONB instead of TEXT for JSON blobs for better performance and indexing.
--    e.g., connection_config JSONB NOT NULL;
-- 3. Use more specific data types like VARCHAR(255) instead of TEXT where appropriate.
-- 4. Implement more robust foreign key constraints with ON DELETE CASCADE or ON DELETE SET NULL.
-- 5. The CHECK constraints and UNIQUE constraints are generally compatible.
-- 6. Timestamps can use `TIMESTAMPTZ` for time zone awareness.
--    e.g., created_at TIMESTAMPTZ DEFAULT now();
