UX improvements: mobile bottom sheet, cuisine taxonomy, search enhancements

- Add BottomSheet component for Google Maps-style restaurant detail on mobile (3-snap drag: 40%/55%/92%, velocity-based close, backdrop overlay) - Mobile map mode now full-screen with bottom sheet overlay for details - Collapsible filter panel on mobile with active filter badge count - Standardized cuisine taxonomy (46 categories: 한식|국밥, 일식|스시 etc.) with LLM remap endpoint and admin UI button - Enhanced search: keyword search now includes foods_mentioned + video title - Search results include channels array for frontend filtering - Channel filter moved to frontend filteredRestaurants (not API-level) - LLM extraction prompt updated for pipe-delimited region + cuisine taxonomy - Vector rebuild endpoint with rich JSON chunks per restaurant - Geolocation-based auto region selection on page load - Desktop filters split into two clean rows Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-09 10:54:28 +09:00
parent 3694730501
commit 2bddb0f764
16 changed files with 2277 additions and 308 deletions
--- a/backend/core/vector.py
+++ b/backend/core/vector.py
@@ -3,9 +3,12 @@
 from __future__ import annotations

 import array
+import json
+import logging
 import os

 import oci
+import oracledb
 from oci.generative_ai_inference import GenerativeAiInferenceClient
 from oci.generative_ai_inference.models import (
    EmbedTextDetails,
@@ -14,6 +17,10 @@ from oci.generative_ai_inference.models import (

 from core.db import conn

+logger = logging.getLogger(__name__)
+
+_EMBED_BATCH_SIZE = 96  # Cohere embed v4 max batch size
+

 def _embed_texts(texts: list[str]) -> list[list[float]]:
    config = oci.config.from_file()
@@ -34,10 +41,148 @@ def _embed_texts(texts: list[str]) -> list[list[float]]:
    return response.data.embeddings


+def _embed_texts_batched(texts: list[str]) -> list[list[float]]:
+    """Embed texts in batches to respect API limits."""
+    all_embeddings: list[list[float]] = []
+    for i in range(0, len(texts), _EMBED_BATCH_SIZE):
+        batch = texts[i : i + _EMBED_BATCH_SIZE]
+        all_embeddings.extend(_embed_texts(batch))
+    return all_embeddings
+
+
 def _to_vec(embedding: list[float]) -> array.array:
    return array.array("f", embedding)


+def _parse_json_field(val, default):
+    if val is None:
+        return default
+    if isinstance(val, (list, dict)):
+        return val
+    if hasattr(val, "read"):
+        val = val.read()
+    if isinstance(val, str):
+        try:
+            return json.loads(val)
+        except (json.JSONDecodeError, ValueError):
+            return default
+    return default
+
+
+def _build_rich_chunk(rest: dict, video_links: list[dict]) -> str:
+    """Build a single JSON chunk per restaurant with all relevant info."""
+    # Collect all foods, evaluations, video titles from linked videos
+    all_foods: list[str] = []
+    all_evaluations: list[str] = []
+    video_titles: list[str] = []
+    channel_names: set[str] = set()
+
+    for vl in video_links:
+        if vl.get("title"):
+            video_titles.append(vl["title"])
+        if vl.get("channel_name"):
+            channel_names.add(vl["channel_name"])
+        foods = _parse_json_field(vl.get("foods_mentioned"), [])
+        if foods:
+            all_foods.extend(foods)
+        ev = _parse_json_field(vl.get("evaluation"), {})
+        if isinstance(ev, dict) and ev.get("text"):
+            all_evaluations.append(ev["text"])
+        elif isinstance(ev, str) and ev:
+            all_evaluations.append(ev)
+
+    doc = {
+        "name": rest.get("name"),
+        "cuisine_type": rest.get("cuisine_type"),
+        "region": rest.get("region"),
+        "address": rest.get("address"),
+        "price_range": rest.get("price_range"),
+        "menu": list(dict.fromkeys(all_foods)),  # deduplicate, preserve order
+        "summary": all_evaluations,
+        "video_titles": video_titles,
+        "channels": sorted(channel_names),
+    }
+    # Remove None/empty values
+    doc = {k: v for k, v in doc.items() if v}
+    return json.dumps(doc, ensure_ascii=False)
+
+
+def rebuild_all_vectors():
+    """Rebuild vector embeddings for ALL restaurants.
+
+    Yields progress dicts: {"status": "progress", "current": N, "total": M, "name": "..."}
+    Final yield: {"status": "done", "total": N}
+    """
+    # 1. Get all restaurants with video links
+    sql_restaurants = """
+        SELECT DISTINCT r.id, r.name, r.address, r.region, r.cuisine_type, r.price_range
+        FROM restaurants r
+        JOIN video_restaurants vr ON vr.restaurant_id = r.id
+        WHERE r.latitude IS NOT NULL
+        ORDER BY r.name
+    """
+    sql_video_links = """
+        SELECT v.title, vr.foods_mentioned, vr.evaluation, c.channel_name
+        FROM video_restaurants vr
+        JOIN videos v ON v.id = vr.video_id
+        JOIN channels c ON c.id = v.channel_id
+        WHERE vr.restaurant_id = :rid
+    """
+
+    # Load all restaurant data
+    restaurants_data: list[tuple[dict, str]] = []  # (rest_dict, chunk_text)
+    with conn() as c:
+        cur = c.cursor()
+        cur.execute(sql_restaurants)
+        cols = [d[0].lower() for d in cur.description]
+        all_rests = [dict(zip(cols, row)) for row in cur.fetchall()]
+
+    total = len(all_rests)
+    logger.info("Rebuilding vectors for %d restaurants", total)
+
+    for i, rest in enumerate(all_rests):
+        with conn() as c:
+            cur = c.cursor()
+            cur.execute(sql_video_links, {"rid": rest["id"]})
+            vl_cols = [d[0].lower() for d in cur.description]
+            video_links = [dict(zip(vl_cols, row)) for row in cur.fetchall()]
+
+        chunk = _build_rich_chunk(rest, video_links)
+        restaurants_data.append((rest, chunk))
+        yield {"status": "progress", "current": i + 1, "total": total, "phase": "prepare", "name": rest["name"]}
+
+    # 2. Delete all existing vectors
+    with conn() as c:
+        c.cursor().execute("DELETE FROM restaurant_vectors")
+    logger.info("Cleared existing vectors")
+    yield {"status": "progress", "current": 0, "total": total, "phase": "embed"}
+
+    # 3. Embed in batches and insert
+    chunks = [chunk for _, chunk in restaurants_data]
+    rest_ids = [rest["id"] for rest, _ in restaurants_data]
+
+    embeddings = _embed_texts_batched(chunks)
+    logger.info("Generated %d embeddings", len(embeddings))
+
+    insert_sql = """
+        INSERT INTO restaurant_vectors (restaurant_id, chunk_text, embedding)
+        VALUES (:rid, :chunk, :emb)
+    """
+    with conn() as c:
+        cur = c.cursor()
+        for i, (rid, chunk, emb) in enumerate(zip(rest_ids, chunks, embeddings)):
+            cur.execute(insert_sql, {
+                "rid": rid,
+                "chunk": chunk,
+                "emb": _to_vec(emb),
+            })
+            if (i + 1) % 50 == 0 or i + 1 == total:
+                yield {"status": "progress", "current": i + 1, "total": total, "phase": "insert"}
+
+    logger.info("Rebuilt vectors for %d restaurants", total)
+    yield {"status": "done", "total": total}
+
+
 def save_restaurant_vectors(restaurant_id: str, chunks: list[str]) -> list[str]:
    """Embed and store text chunks for a restaurant.

@@ -54,7 +199,6 @@ def save_restaurant_vectors(restaurant_id: str, chunks: list[str]) -> list[str]:
        VALUES (:rid, :chunk, :emb)
        RETURNING id INTO :out_id
    """
-    import oracledb
    with conn() as c:
        cur = c.cursor()
        for chunk, emb in zip(chunks, embeddings):
@@ -69,10 +213,11 @@ def save_restaurant_vectors(restaurant_id: str, chunks: list[str]) -> list[str]:
    return inserted


-def search_similar(query: str, top_k: int = 10) -> list[dict]:
+def search_similar(query: str, top_k: int = 10, max_distance: float = 0.57) -> list[dict]:
    """Semantic search: find restaurants similar to query text.

    Returns list of dicts: restaurant_id, chunk_text, distance.
+    Only results with cosine distance <= max_distance are returned.
    """
    embeddings = _embed_texts([query])
    query_vec = _to_vec(embeddings[0])
@@ -81,12 +226,13 @@ def search_similar(query: str, top_k: int = 10) -> list[dict]:
        SELECT rv.restaurant_id, rv.chunk_text,
               VECTOR_DISTANCE(rv.embedding, :qvec, COSINE) AS dist
        FROM restaurant_vectors rv
+        WHERE VECTOR_DISTANCE(rv.embedding, :qvec2, COSINE) <= :max_dist
        ORDER BY dist
        FETCH FIRST :k ROWS ONLY
    """
    with conn() as c:
        cur = c.cursor()
-        cur.execute(sql, {"qvec": query_vec, "k": top_k})
+        cur.execute(sql, {"qvec": query_vec, "qvec2": query_vec, "k": top_k, "max_dist": max_distance})
        return [
            {
                "restaurant_id": r[0],