"""hybrid_search_demo.py — vector-only vs hybrid (BM25 + vector) search"""

from dotenv import load_dotenv
from langchain.retrievers import EnsembleRetriever
from langchain_community.retrievers import BM25Retriever
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings

load_dotenv()

TEXTS = [
    "The <a> tag creates a hyperlink. Set the href attribute to the destination URL.",
    "The <title> tag sets the browser tab title.",
    "The <h1> tag marks the main heading on a page.",
]

QUERY = "href attribute URL"


def print_docs(label: str, docs: list) -> None:
    print(f"\n=== {label} ===")
    for i, doc in enumerate(docs):
        print(f"[{i}] {doc.page_content}")


def main() -> None:
    embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
    vectorstore = Chroma.from_texts(texts=TEXTS, embedding=embeddings)

    vector_retriever = vectorstore.as_retriever(search_kwargs={"k": 2})

    bm25_retriever = BM25Retriever.from_texts(TEXTS)
    bm25_retriever.k = 2

    hybrid_retriever = EnsembleRetriever(
        retrievers=[bm25_retriever, vector_retriever],
        weights=[0.5, 0.5],
    )

    print(f"Query: {QUERY}")
    print_docs("Vector-only", vector_retriever.invoke(QUERY))
    print_docs("Hybrid (BM25 + vector)", hybrid_retriever.invoke(QUERY))


if __name__ == "__main__":
    main()