Initial commit: Tasteby - YouTube restaurant map service
Backend (FastAPI + Oracle ADB), Frontend (Next.js), daemon worker. Features: channel/video/restaurant management, semantic search, Google OAuth, user reviews. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
97
backend/core/vector.py
Normal file
97
backend/core/vector.py
Normal file
@@ -0,0 +1,97 @@
|
||||
"""Vector embedding generation and storage for restaurant semantic search."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import array
|
||||
import os
|
||||
|
||||
import oci
|
||||
from oci.generative_ai_inference import GenerativeAiInferenceClient
|
||||
from oci.generative_ai_inference.models import (
|
||||
EmbedTextDetails,
|
||||
OnDemandServingMode,
|
||||
)
|
||||
|
||||
from core.db import conn
|
||||
|
||||
|
||||
def _embed_texts(texts: list[str]) -> list[list[float]]:
|
||||
config = oci.config.from_file()
|
||||
client = GenerativeAiInferenceClient(
|
||||
config,
|
||||
service_endpoint=os.environ["OCI_GENAI_ENDPOINT"],
|
||||
)
|
||||
model_id = os.environ.get("OCI_EMBED_MODEL_ID", "cohere.embed-v4.0")
|
||||
compartment_id = os.environ["OCI_COMPARTMENT_ID"]
|
||||
|
||||
details = EmbedTextDetails(
|
||||
inputs=texts,
|
||||
serving_mode=OnDemandServingMode(model_id=model_id),
|
||||
compartment_id=compartment_id,
|
||||
input_type="SEARCH_DOCUMENT",
|
||||
)
|
||||
response = client.embed_text(details)
|
||||
return response.data.embeddings
|
||||
|
||||
|
||||
def _to_vec(embedding: list[float]) -> array.array:
|
||||
return array.array("f", embedding)
|
||||
|
||||
|
||||
def save_restaurant_vectors(restaurant_id: str, chunks: list[str]) -> list[str]:
|
||||
"""Embed and store text chunks for a restaurant.
|
||||
|
||||
Returns list of inserted row IDs.
|
||||
"""
|
||||
if not chunks:
|
||||
return []
|
||||
|
||||
embeddings = _embed_texts(chunks)
|
||||
inserted: list[str] = []
|
||||
|
||||
sql = """
|
||||
INSERT INTO restaurant_vectors (restaurant_id, chunk_text, embedding)
|
||||
VALUES (:rid, :chunk, :emb)
|
||||
RETURNING id INTO :out_id
|
||||
"""
|
||||
import oracledb
|
||||
with conn() as c:
|
||||
cur = c.cursor()
|
||||
for chunk, emb in zip(chunks, embeddings):
|
||||
out_id = cur.var(oracledb.STRING)
|
||||
cur.execute(sql, {
|
||||
"rid": restaurant_id,
|
||||
"chunk": chunk,
|
||||
"emb": _to_vec(emb),
|
||||
"out_id": out_id,
|
||||
})
|
||||
inserted.append(out_id.getvalue()[0])
|
||||
return inserted
|
||||
|
||||
|
||||
def search_similar(query: str, top_k: int = 10) -> list[dict]:
|
||||
"""Semantic search: find restaurants similar to query text.
|
||||
|
||||
Returns list of dicts: restaurant_id, chunk_text, distance.
|
||||
"""
|
||||
embeddings = _embed_texts([query])
|
||||
query_vec = _to_vec(embeddings[0])
|
||||
|
||||
sql = """
|
||||
SELECT rv.restaurant_id, rv.chunk_text,
|
||||
VECTOR_DISTANCE(rv.embedding, :qvec, COSINE) AS dist
|
||||
FROM restaurant_vectors rv
|
||||
ORDER BY dist
|
||||
FETCH FIRST :k ROWS ONLY
|
||||
"""
|
||||
with conn() as c:
|
||||
cur = c.cursor()
|
||||
cur.execute(sql, {"qvec": query_vec, "k": top_k})
|
||||
return [
|
||||
{
|
||||
"restaurant_id": r[0],
|
||||
"chunk_text": r[1].read() if hasattr(r[1], "read") else r[1],
|
||||
"distance": r[2],
|
||||
}
|
||||
for r in cur.fetchall()
|
||||
]
|
||||
Reference in New Issue
Block a user