"""postgresql_indexing_api_demo.py — index() with SQLRecordManager and PGVector"""

import os

from dotenv import load_dotenv
from langchain_community.indexes import SQLRecordManager
from langchain_core.documents import Document
from langchain_core.indexing import index
from langchain_openai import OpenAIEmbeddings
from langchain_postgres import PGVector
from sqlalchemy import create_engine, text

load_dotenv()

POSTGRES_URL = os.getenv(
    "POSTGRES_URL",
    "postgresql://postgres:postgres@localhost:5432/chatdb",
)
CONNECTION = POSTGRES_URL.replace("postgresql://", "postgresql+psycopg://", 1)
NAMESPACE = "indexing_demo"

DOCS_V1 = [
    Document(
        page_content="The <a> tag creates a hyperlink.",
        metadata={"source": "html_notes.txt"},
    ),
    Document(
        page_content="The <title> tag sets the browser tab title.",
        metadata={"source": "html_notes.txt"},
    ),
]

DOCS_V2 = [DOCS_V1[0]]


def reset_record_manager(namespace: str) -> None:
    engine = create_engine(CONNECTION)
    with engine.begin() as conn:
        conn.execute(
            text("DELETE FROM upsertion_record WHERE namespace = :ns"),
            {"ns": namespace},
        )


def print_result(label: str, result: dict) -> None:
    print(f"\n=== {label} ===")
    print(f"added:   {result['num_added']}")
    print(f"updated: {result['num_updated']}")
    print(f"skipped: {result['num_skipped']}")
    print(f"deleted: {result['num_deleted']}")


def main() -> None:
    reset_record_manager(NAMESPACE)

    record_manager = SQLRecordManager(NAMESPACE, db_url=CONNECTION)
    record_manager.create_schema()

    embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
    vectorstore = PGVector(
        embeddings=embeddings,
        collection_name=NAMESPACE,
        connection=CONNECTION,
        use_jsonb=True,
        pre_delete_collection=True,
    )

    print_result(
        "First run (2 docs)",
        index(
            DOCS_V1,
            record_manager,
            vectorstore,
            cleanup="incremental",
            source_id_key="source",
        ),
    )

    print_result(
        "Second run (same 2 docs)",
        index(
            DOCS_V1,
            record_manager,
            vectorstore,
            cleanup="incremental",
            source_id_key="source",
        ),
    )

    print_result(
        "Third run (1 doc removed)",
        index(
            DOCS_V2,
            record_manager,
            vectorstore,
            cleanup="incremental",
            source_id_key="source",
        ),
    )


if __name__ == "__main__":
    main()