"""indexing_api_demo.py — index() with SQLRecordManager and Chroma"""

import shutil
from pathlib import Path

from dotenv import load_dotenv
from langchain_community.indexes import SQLRecordManager
from langchain_community.vectorstores import Chroma
from langchain_core.documents import Document
from langchain_core.indexing import index
from langchain_openai import OpenAIEmbeddings

load_dotenv()

NAMESPACE = "indexing_demo"
DB_DIR = Path(__file__).parent / "indexing_chroma_db"
SQL_PATH = Path(__file__).parent / "indexing_record_manager.sql"

DOCS_V1 = [
    Document(
        page_content="The <a> tag creates a hyperlink.",
        metadata={"source": "html_notes.txt"},
    ),
    Document(
        page_content="The <title> tag sets the browser tab title.",
        metadata={"source": "html_notes.txt"},
    ),
]

DOCS_V2 = [DOCS_V1[0]]


def print_result(label: str, result: dict) -> None:
    print(f"\n=== {label} ===")
    print(f"added:   {result['num_added']}")
    print(f"updated: {result['num_updated']}")
    print(f"skipped: {result['num_skipped']}")
    print(f"deleted: {result['num_deleted']}")


def main() -> None:
    if DB_DIR.exists():
        shutil.rmtree(DB_DIR)
    if SQL_PATH.exists():
        SQL_PATH.unlink()

    record_manager = SQLRecordManager(
        NAMESPACE,
        db_url=f"sqlite:///{SQL_PATH}",
    )
    record_manager.create_schema()

    embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
    vectorstore = Chroma(
        collection_name=NAMESPACE,
        embedding_function=embeddings,
        persist_directory=str(DB_DIR),
    )

    print_result(
        "First run (2 docs)",
        index(
            DOCS_V1,
            record_manager,
            vectorstore,
            cleanup="incremental",
            source_id_key="source",
        ),
    )

    print_result(
        "Second run (same 2 docs)",
        index(
            DOCS_V1,
            record_manager,
            vectorstore,
            cleanup="incremental",
            source_id_key="source",
        ),
    )

    print_result(
        "Third run (1 doc removed)",
        index(
            DOCS_V2,
            record_manager,
            vectorstore,
            cleanup="incremental",
            source_id_key="source",
        ),
    )


if __name__ == "__main__":
    main()