"""embeddings_demo.py — OpenAIEmbeddings and cosine similarity"""

from dotenv import load_dotenv
from langchain_openai import OpenAIEmbeddings

load_dotenv()

TEXTS = [
    "The <a> tag creates a hyperlink.",
    "The <title> tag sets the browser tab title.",
    "The <h1> tag marks the main heading.",
]

QUERY = "How do I make a link on a page?"


def cosine_similarity(a: list[float], b: list[float]) -> float:
    dot = sum(x * y for x, y in zip(a, b))
    norm_a = sum(x * x for x in a) ** 0.5
    norm_b = sum(y * y for y in b) ** 0.5
    return dot / (norm_a * norm_b)


def main() -> None:
    embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

    vectors = embeddings.embed_documents(TEXTS)
    print(f"embed_documents: {len(vectors)} vectors, length {len(vectors[0])} each")
    print(f"first vector (first 5 values): {vectors[0][:5]}")

    query_vec = embeddings.embed_query(QUERY)
    print(f"\nembed_query: length {len(query_vec)}")
    print(f"query vector (first 5 values): {query_vec[:5]}")

    print(f"\nQuery: {QUERY}\n")
    scores = [
        (cosine_similarity(query_vec, vec), text)
        for vec, text in zip(vectors, TEXTS)
    ]
    scores.sort(reverse=True)

    for score, text in scores:
        print(f"  [{score:.4f}] {text}")


if __name__ == "__main__":
    main()