"""postgresql_hybrid_search_demo.py — vector-only vs hybrid (BM25 + PGVector) search"""

import os

from dotenv import load_dotenv
from langchain.retrievers import EnsembleRetriever
from langchain_community.retrievers import BM25Retriever
from langchain_core.documents import Document
from langchain_openai import OpenAIEmbeddings
from langchain_postgres import PGVector

load_dotenv()

POSTGRES_URL = os.getenv(
    "POSTGRES_URL",
    "postgresql://postgres:postgres@localhost:5432/chatdb",
)
CONNECTION = POSTGRES_URL.replace("postgresql://", "postgresql+psycopg://", 1)
COLLECTION = "hybrid_search_demo"

TEXTS = [
    "The <a> tag creates a hyperlink. Set the href attribute to the destination URL.",
    "The <title> tag sets the browser tab title.",
    "The <h1> tag marks the main heading on a page.",
]

QUERY = "href attribute URL"


def print_docs(label: str, docs: list) -> None:
    print(f"\n=== {label} ===")
    for i, doc in enumerate(docs):
        print(f"[{i}] {doc.page_content}")


def main() -> None:
    embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
    vectorstore = PGVector(
        embeddings=embeddings,
        collection_name=COLLECTION,
        connection=CONNECTION,
        use_jsonb=True,
        pre_delete_collection=True,
    )
    vectorstore.add_documents([Document(page_content=t) for t in TEXTS])

    vector_retriever = vectorstore.as_retriever(search_kwargs={"k": 2})

    bm25_retriever = BM25Retriever.from_texts(TEXTS)
    bm25_retriever.k = 2

    hybrid_retriever = EnsembleRetriever(
        retrievers=[bm25_retriever, vector_retriever],
        weights=[0.5, 0.5],
    )

    print(f"Query: {QUERY}")
    print_docs("Vector-only (PGVector)", vector_retriever.invoke(QUERY))
    print_docs("Hybrid (BM25 + PGVector)", hybrid_retriever.invoke(QUERY))


if __name__ == "__main__":
    main()