UX improvements: mobile bottom sheet, cuisine taxonomy, search enhancements

- Add BottomSheet component for Google Maps-style restaurant detail on mobile (3-snap drag: 40%/55%/92%, velocity-based close, backdrop overlay) - Mobile map mode now full-screen with bottom sheet overlay for details - Collapsible filter panel on mobile with active filter badge count - Standardized cuisine taxonomy (46 categories: 한식|국밥, 일식|스시 etc.) with LLM remap endpoint and admin UI button - Enhanced search: keyword search now includes foods_mentioned + video title - Search results include channels array for frontend filtering - Channel filter moved to frontend filteredRestaurants (not API-level) - LLM extraction prompt updated for pipe-delimited region + cuisine taxonomy - Vector rebuild endpoint with rich JSON chunks per restaurant - Geolocation-based auto region selection on page load - Desktop filters split into two clean rows Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-09 10:54:28 +09:00
parent 3694730501
commit 2bddb0f764
16 changed files with 2277 additions and 308 deletions
--- a/backend/core/cache.py
+++ b/backend/core/cache.py
@@ -0,0 +1,107 @@
+"""Redis cache layer — graceful fallback when Redis is unavailable."""
+
+from __future__ import annotations
+
+import json
+import logging
+import os
+from typing import Any
+
+import redis
+
+logger = logging.getLogger(__name__)
+
+_client: redis.Redis | None = None
+_disabled = False
+
+DEFAULT_TTL = 600  # 10 minutes
+
+
+def _get_client() -> redis.Redis | None:
+    global _client, _disabled
+    if _disabled:
+        return None
+    if _client is None:
+        host = os.environ.get("REDIS_HOST", "192.168.0.147")
+        port = int(os.environ.get("REDIS_PORT", "6379"))
+        db = int(os.environ.get("REDIS_DB", "0"))
+        try:
+            _client = redis.Redis(
+                host=host, port=port, db=db,
+                socket_connect_timeout=2,
+                socket_timeout=2,
+                decode_responses=True,
+            )
+            _client.ping()
+            logger.info("Redis connected: %s:%s/%s", host, port, db)
+        except Exception as e:
+            logger.warning("Redis unavailable (%s), caching disabled", e)
+            _client = None
+            _disabled = True
+            return None
+    return _client
+
+
+def make_key(*parts: Any) -> str:
+    """Build a cache key like 'tasteby:restaurants:cuisine=한식:limit=100'."""
+    return "tasteby:" + ":".join(str(p) for p in parts if p is not None and p != "")
+
+
+def get(key: str) -> Any | None:
+    """Get cached value. Returns None on miss or error."""
+    try:
+        client = _get_client()
+        if not client:
+            return None
+        val = client.get(key)
+        if val is not None:
+            return json.loads(val)
+    except Exception as e:
+        logger.debug("Cache get error: %s", e)
+    return None
+
+
+def set(key: str, value: Any, ttl: int = DEFAULT_TTL) -> None:
+    """Cache a value as JSON with TTL."""
+    try:
+        client = _get_client()
+        if not client:
+            return
+        client.setex(key, ttl, json.dumps(value, ensure_ascii=False, default=str))
+    except Exception as e:
+        logger.debug("Cache set error: %s", e)
+
+
+def flush() -> None:
+    """Flush all tasteby cache keys."""
+    try:
+        client = _get_client()
+        if not client:
+            return
+        cursor = 0
+        while True:
+            cursor, keys = client.scan(cursor, match="tasteby:*", count=200)
+            if keys:
+                client.delete(*keys)
+            if cursor == 0:
+                break
+        logger.info("Cache flushed")
+    except Exception as e:
+        logger.debug("Cache flush error: %s", e)
+
+
+def invalidate_prefix(prefix: str) -> None:
+    """Delete all keys matching a prefix."""
+    try:
+        client = _get_client()
+        if not client:
+            return
+        cursor = 0
+        while True:
+            cursor, keys = client.scan(cursor, match=f"{prefix}*", count=200)
+            if keys:
+                client.delete(*keys)
+            if cursor == 0:
+                break
+    except Exception as e:
+        logger.debug("Cache invalidate error: %s", e)
--- a/backend/core/cuisine.py
+++ b/backend/core/cuisine.py
@@ -0,0 +1,102 @@
+"""Standardized cuisine type taxonomy and LLM remapping."""
+
+from __future__ import annotations
+
+# ── Canonical cuisine types ──
+# Format: "대분류|소분류"
+CUISINE_TYPES = [
+    # 한식
+    "한식|백반/한정식",
+    "한식|국밥/해장국",
+    "한식|찌개/전골/탕",
+    "한식|삼겹살/돼지구이",
+    "한식|소고기/한우구이",
+    "한식|곱창/막창",
+    "한식|닭/오리구이",
+    "한식|족발/보쌈",
+    "한식|회/횟집",
+    "한식|해산물",
+    "한식|분식",
+    "한식|면",
+    "한식|죽/죽집",
+    "한식|순대/순대국",
+    "한식|장어/민물",
+    "한식|주점/포차",
+    # 일식
+    "일식|스시/오마카세",
+    "일식|라멘",
+    "일식|돈카츠",
+    "일식|텐동/튀김",
+    "일식|이자카야",
+    "일식|야키니쿠",
+    "일식|카레",
+    "일식|소바/우동",
+    # 중식
+    "중식|중화요리",
+    "중식|마라/훠궈",
+    "중식|딤섬/만두",
+    "중식|양꼬치",
+    # 양식
+    "양식|파스타/이탈리안",
+    "양식|스테이크",
+    "양식|햄버거",
+    "양식|피자",
+    "양식|프렌치",
+    "양식|바베큐",
+    "양식|브런치",
+    "양식|비건/샐러드",
+    # 아시아
+    "아시아|베트남",
+    "아시아|태국",
+    "아시아|인도/중동",
+    "아시아|동남아기타",
+    # 기타
+    "기타|치킨",
+    "기타|카페/디저트",
+    "기타|베이커리",
+    "기타|뷔페",
+    "기타|퓨전",
+]
+
+# For LLM prompt
+CUISINE_LIST_TEXT = "\n".join(f"  - {c}" for c in CUISINE_TYPES)
+
+_REMAP_PROMPT = """\
+아래 식당들의 cuisine_type을 표준 분류로 매핑하세요.
+
+표준 분류 목록 (반드시 이 중 하나를 선택):
+{cuisine_types}
+
+식당 목록:
+{restaurants}
+
+규칙:
+- 모든 식당에 대해 빠짐없이 결과를 반환 (총 {count}개 모두 반환해야 함)
+- 반드시 위 표준 분류 목록의 값을 그대로 복사하여 사용 (오타 금지)
+- 식당 이름, 현재 분류, 메뉴를 종합적으로 고려
+- JSON 배열만 반환, 설명 없음
+- 형식: [{{"id": "식당ID", "cuisine_type": "한식|국밥/해장국"}}, ...]
+
+JSON 배열:"""
+
+
+def build_remap_prompt(restaurants: list[dict]) -> str:
+    """Build a prompt for remapping cuisine types."""
+    items = []
+    for r in restaurants:
+        items.append({
+            "id": r["id"],
+            "name": r["name"],
+            "current_cuisine_type": r.get("cuisine_type"),
+            "foods_mentioned": r.get("foods_mentioned"),
+        })
+    import json
+    return _REMAP_PROMPT.format(
+        cuisine_types=CUISINE_LIST_TEXT,
+        restaurants=json.dumps(items, ensure_ascii=False),
+        count=len(items),
+    )
+
+
+# Valid prefixes for loose validation
+VALID_PREFIXES = ("한식|", "일식|", "중식|", "양식|", "아시아|", "기타|")
--- a/backend/core/extractor.py
+++ b/backend/core/extractor.py
@@ -20,6 +20,8 @@ from oci.generative_ai_inference.models import (
    UserMessage,
 )

+from core.cuisine import CUISINE_LIST_TEXT
+
 logger = logging.getLogger(__name__)


@@ -101,18 +103,22 @@ _EXTRACT_PROMPT = """\
 필드:
 - name: 식당 이름 (string, 필수)
 - address: 주소 또는 위치 힌트 (string | null)
- region: 지역 (예: 서울 강남, 부산 해운대) (string | null)
- cuisine_type: 음식 종류 (예: 한식, 일식, 중식, 양식, 카페) (string | null)
+- region: 지역을 "나라|시/도|구/군/시" 파이프(|) 구분 형식으로 작성 (string | null)
+  - 한국 예시: "한국|서울|강남구", "한국|부산|해운대구", "한국|제주", "한국|강원|강릉시"
+  - 해외 예시: "일본|도쿄", "일본|오사카", "싱가포르", "미국|뉴욕", "태국|방콕"
+  - 나라는 한글로, 해외 도시도 한글로 표기
+- cuisine_type: 아래 목록에서 가장 적합한 것을 선택 (string, 필수). 반드시 아래 목록 중 하나를 사용:
+{cuisine_types}
 - price_range: 가격대 (예: 1만원대, 2-3만원) (string | null)
 - foods_mentioned: 언급된 메뉴들 (string[])
 - evaluation: 평가 내용 (string | null)
 - guests: 함께한 게스트 (string[])

-영상 제목: {title}
+영상 제목: {{title}}
 자막:
-{transcript}
+{{transcript}}

-JSON 배열:"""
+JSON 배열:""".format(cuisine_types=CUISINE_LIST_TEXT)


 def extract_restaurants(title: str, transcript: str, custom_prompt: str | None = None) -> tuple[list[dict], str]:
--- a/backend/core/restaurant.py
+++ b/backend/core/restaurant.py
@@ -3,12 +3,86 @@
 from __future__ import annotations

 import json
+import re

 import oracledb

 from core.db import conn


+# ── Region parser: address → "나라|시|구" ──
+
+_CITY_MAP = {
+    "서울특별시": "서울", "서울": "서울",
+    "부산광역시": "부산", "부산": "부산",
+    "대구광역시": "대구", "대구": "대구",
+    "인천광역시": "인천", "인천": "인천",
+    "광주광역시": "광주", "광주": "광주",
+    "대전광역시": "대전", "대전": "대전",
+    "울산광역시": "울산", "울산": "울산",
+    "세종특별자치시": "세종",
+    "경기도": "경기", "경기": "경기",
+    "강원특별자치도": "강원", "강원도": "강원",
+    "충청북도": "충북", "충청남도": "충남",
+    "전라북도": "전북", "전북특별자치도": "전북",
+    "전라남도": "전남",
+    "경상북도": "경북", "경상남도": "경남",
+    "제주특별자치도": "제주",
+}
+
+
+def parse_region_from_address(address: str | None) -> str | None:
+    """Parse address into 'country|city|district' format."""
+    if not address:
+        return None
+    addr = address.strip()
+
+    # Japanese
+    if addr.startswith("일본") or "Japan" in addr:
+        city = None
+        if "Tokyo" in addr: city = "도쿄"
+        elif "Osaka" in addr: city = "오사카"
+        elif "Sapporo" in addr or "Hokkaido" in addr: city = "삿포로"
+        elif "Kyoto" in addr: city = "교토"
+        elif "Fukuoka" in addr: city = "후쿠오카"
+        return f"일본|{city}" if city else "일본"
+
+    # Singapore
+    if "Singapore" in addr or "싱가포르" in addr:
+        return "싱가포르"
+
+    # Korean standard: "대한민국 시/도 구/시 ..."
+    if "대한민국" in addr:
+        m = re.match(r"대한민국\s+(\S+)\s+(\S+)", addr)
+        if m:
+            city = _CITY_MAP.get(m.group(1))
+            if city:
+                gu = m.group(2)
+                if gu.endswith(("구", "군", "시")):
+                    return f"한국|{city}|{gu}"
+                # Not a district — just city level
+                return f"한국|{city}"
+        # Reversed: "... 구 시 대한민국" / "... 시 KR"
+        parts = addr.split()
+        for i, p in enumerate(parts):
+            if p in _CITY_MAP:
+                city = _CITY_MAP[p]
+                gu = parts[i - 1] if i > 0 and parts[i - 1].endswith(("구", "군", "시")) else None
+                return f"한국|{city}|{gu}" if gu else f"한국|{city}"
+        return "한국"
+
+    # Korean without prefix
+    parts = addr.split()
+    if parts:
+        city = _CITY_MAP.get(parts[0])
+        if city and len(parts) > 1 and parts[1].endswith(("구", "군", "시")):
+            return f"한국|{city}|{parts[1]}"
+        elif city:
+            return f"한국|{city}"
+
+    return None
+
+
 def _truncate_bytes(val: str | None, max_bytes: int) -> str | None:
    """Truncate a string to fit within max_bytes when encoded as UTF-8."""
    if not val:
@@ -19,6 +93,21 @@ def _truncate_bytes(val: str | None, max_bytes: int) -> str | None:
    return encoded[:max_bytes].decode("utf-8", errors="ignore").rstrip()


+def find_by_place_id(google_place_id: str) -> dict | None:
+    """Find a restaurant by Google Place ID."""
+    sql = "SELECT id, name, address, region, latitude, longitude FROM restaurants WHERE google_place_id = :gid"
+    with conn() as c:
+        cur = c.cursor()
+        cur.execute(sql, {"gid": google_place_id})
+        r = cur.fetchone()
+        if r:
+            return {
+                "id": r[0], "name": r[1], "address": r[2],
+                "region": r[3], "latitude": r[4], "longitude": r[5],
+            }
+    return None
+
+
 def find_by_name(name: str) -> dict | None:
    """Find a restaurant by exact name match."""
    sql = "SELECT id, name, address, region, latitude, longitude FROM restaurants WHERE name = :n"
@@ -50,17 +139,27 @@ def upsert(
    rating_count: int | None = None,
 ) -> str:
    """Insert or update a restaurant. Returns row id."""
+    # Auto-derive region from address if not provided
+    if not region and address:
+        region = parse_region_from_address(address)
+
    # Truncate fields to fit DB column byte limits (VARCHAR2 is byte-based)
    price_range = _truncate_bytes(price_range, 50)
    cuisine_type = _truncate_bytes(cuisine_type, 100)
    region = _truncate_bytes(region, 100)
    website = _truncate_bytes(website, 500)

-    existing = find_by_name(name)
+    # 1) google_place_id로 먼저 찾고, 2) 이름으로 찾기
+    existing = None
+    if google_place_id:
+        existing = find_by_place_id(google_place_id)
+    if not existing:
+        existing = find_by_name(name)
    if existing:
        sql = """
            UPDATE restaurants
-            SET address = COALESCE(:addr, address),
+            SET name = :name,
+                address = COALESCE(:addr, address),
                region = COALESCE(:reg, region),
                latitude = COALESCE(:lat, latitude),
                longitude = COALESCE(:lng, longitude),
@@ -77,6 +176,7 @@ def upsert(
        """
        with conn() as c:
            c.cursor().execute(sql, {
+                "name": name,
                "addr": address, "reg": region,
                "lat": latitude, "lng": longitude,
                "cuisine": cuisine_type, "price": price_range,
--- a/backend/core/vector.py
+++ b/backend/core/vector.py
@@ -3,9 +3,12 @@
 from __future__ import annotations

 import array
+import json
+import logging
 import os

 import oci
+import oracledb
 from oci.generative_ai_inference import GenerativeAiInferenceClient
 from oci.generative_ai_inference.models import (
    EmbedTextDetails,
@@ -14,6 +17,10 @@ from oci.generative_ai_inference.models import (

 from core.db import conn

+logger = logging.getLogger(__name__)
+
+_EMBED_BATCH_SIZE = 96  # Cohere embed v4 max batch size
+

 def _embed_texts(texts: list[str]) -> list[list[float]]:
    config = oci.config.from_file()
@@ -34,10 +41,148 @@ def _embed_texts(texts: list[str]) -> list[list[float]]:
    return response.data.embeddings


+def _embed_texts_batched(texts: list[str]) -> list[list[float]]:
+    """Embed texts in batches to respect API limits."""
+    all_embeddings: list[list[float]] = []
+    for i in range(0, len(texts), _EMBED_BATCH_SIZE):
+        batch = texts[i : i + _EMBED_BATCH_SIZE]
+        all_embeddings.extend(_embed_texts(batch))
+    return all_embeddings
+
+
 def _to_vec(embedding: list[float]) -> array.array:
    return array.array("f", embedding)


+def _parse_json_field(val, default):
+    if val is None:
+        return default
+    if isinstance(val, (list, dict)):
+        return val
+    if hasattr(val, "read"):
+        val = val.read()
+    if isinstance(val, str):
+        try:
+            return json.loads(val)
+        except (json.JSONDecodeError, ValueError):
+            return default
+    return default
+
+
+def _build_rich_chunk(rest: dict, video_links: list[dict]) -> str:
+    """Build a single JSON chunk per restaurant with all relevant info."""
+    # Collect all foods, evaluations, video titles from linked videos
+    all_foods: list[str] = []
+    all_evaluations: list[str] = []
+    video_titles: list[str] = []
+    channel_names: set[str] = set()
+
+    for vl in video_links:
+        if vl.get("title"):
+            video_titles.append(vl["title"])
+        if vl.get("channel_name"):
+            channel_names.add(vl["channel_name"])
+        foods = _parse_json_field(vl.get("foods_mentioned"), [])
+        if foods:
+            all_foods.extend(foods)
+        ev = _parse_json_field(vl.get("evaluation"), {})
+        if isinstance(ev, dict) and ev.get("text"):
+            all_evaluations.append(ev["text"])
+        elif isinstance(ev, str) and ev:
+            all_evaluations.append(ev)
+
+    doc = {
+        "name": rest.get("name"),
+        "cuisine_type": rest.get("cuisine_type"),
+        "region": rest.get("region"),
+        "address": rest.get("address"),
+        "price_range": rest.get("price_range"),
+        "menu": list(dict.fromkeys(all_foods)),  # deduplicate, preserve order
+        "summary": all_evaluations,
+        "video_titles": video_titles,
+        "channels": sorted(channel_names),
+    }
+    # Remove None/empty values
+    doc = {k: v for k, v in doc.items() if v}
+    return json.dumps(doc, ensure_ascii=False)
+
+
+def rebuild_all_vectors():
+    """Rebuild vector embeddings for ALL restaurants.
+
+    Yields progress dicts: {"status": "progress", "current": N, "total": M, "name": "..."}
+    Final yield: {"status": "done", "total": N}
+    """
+    # 1. Get all restaurants with video links
+    sql_restaurants = """
+        SELECT DISTINCT r.id, r.name, r.address, r.region, r.cuisine_type, r.price_range
+        FROM restaurants r
+        JOIN video_restaurants vr ON vr.restaurant_id = r.id
+        WHERE r.latitude IS NOT NULL
+        ORDER BY r.name
+    """
+    sql_video_links = """
+        SELECT v.title, vr.foods_mentioned, vr.evaluation, c.channel_name
+        FROM video_restaurants vr
+        JOIN videos v ON v.id = vr.video_id
+        JOIN channels c ON c.id = v.channel_id
+        WHERE vr.restaurant_id = :rid
+    """
+
+    # Load all restaurant data
+    restaurants_data: list[tuple[dict, str]] = []  # (rest_dict, chunk_text)
+    with conn() as c:
+        cur = c.cursor()
+        cur.execute(sql_restaurants)
+        cols = [d[0].lower() for d in cur.description]
+        all_rests = [dict(zip(cols, row)) for row in cur.fetchall()]
+
+    total = len(all_rests)
+    logger.info("Rebuilding vectors for %d restaurants", total)
+
+    for i, rest in enumerate(all_rests):
+        with conn() as c:
+            cur = c.cursor()
+            cur.execute(sql_video_links, {"rid": rest["id"]})
+            vl_cols = [d[0].lower() for d in cur.description]
+            video_links = [dict(zip(vl_cols, row)) for row in cur.fetchall()]
+
+        chunk = _build_rich_chunk(rest, video_links)
+        restaurants_data.append((rest, chunk))
+        yield {"status": "progress", "current": i + 1, "total": total, "phase": "prepare", "name": rest["name"]}
+
+    # 2. Delete all existing vectors
+    with conn() as c:
+        c.cursor().execute("DELETE FROM restaurant_vectors")
+    logger.info("Cleared existing vectors")
+    yield {"status": "progress", "current": 0, "total": total, "phase": "embed"}
+
+    # 3. Embed in batches and insert
+    chunks = [chunk for _, chunk in restaurants_data]
+    rest_ids = [rest["id"] for rest, _ in restaurants_data]
+
+    embeddings = _embed_texts_batched(chunks)
+    logger.info("Generated %d embeddings", len(embeddings))
+
+    insert_sql = """
+        INSERT INTO restaurant_vectors (restaurant_id, chunk_text, embedding)
+        VALUES (:rid, :chunk, :emb)
+    """
+    with conn() as c:
+        cur = c.cursor()
+        for i, (rid, chunk, emb) in enumerate(zip(rest_ids, chunks, embeddings)):
+            cur.execute(insert_sql, {
+                "rid": rid,
+                "chunk": chunk,
+                "emb": _to_vec(emb),
+            })
+            if (i + 1) % 50 == 0 or i + 1 == total:
+                yield {"status": "progress", "current": i + 1, "total": total, "phase": "insert"}
+
+    logger.info("Rebuilt vectors for %d restaurants", total)
+    yield {"status": "done", "total": total}
+
+
 def save_restaurant_vectors(restaurant_id: str, chunks: list[str]) -> list[str]:
    """Embed and store text chunks for a restaurant.

@@ -54,7 +199,6 @@ def save_restaurant_vectors(restaurant_id: str, chunks: list[str]) -> list[str]:
        VALUES (:rid, :chunk, :emb)
        RETURNING id INTO :out_id
    """
-    import oracledb
    with conn() as c:
        cur = c.cursor()
        for chunk, emb in zip(chunks, embeddings):
@@ -69,10 +213,11 @@ def save_restaurant_vectors(restaurant_id: str, chunks: list[str]) -> list[str]:
    return inserted


-def search_similar(query: str, top_k: int = 10) -> list[dict]:
+def search_similar(query: str, top_k: int = 10, max_distance: float = 0.57) -> list[dict]:
    """Semantic search: find restaurants similar to query text.

    Returns list of dicts: restaurant_id, chunk_text, distance.
+    Only results with cosine distance <= max_distance are returned.
    """
    embeddings = _embed_texts([query])
    query_vec = _to_vec(embeddings[0])
@@ -81,12 +226,13 @@ def search_similar(query: str, top_k: int = 10) -> list[dict]:
        SELECT rv.restaurant_id, rv.chunk_text,
               VECTOR_DISTANCE(rv.embedding, :qvec, COSINE) AS dist
        FROM restaurant_vectors rv
+        WHERE VECTOR_DISTANCE(rv.embedding, :qvec2, COSINE) <= :max_dist
        ORDER BY dist
        FETCH FIRST :k ROWS ONLY
    """
    with conn() as c:
        cur = c.cursor()
-        cur.execute(sql, {"qvec": query_vec, "k": top_k})
+        cur.execute(sql, {"qvec": query_vec, "qvec2": query_vec, "k": top_k, "max_dist": max_distance})
        return [
            {
                "restaurant_id": r[0],