Initial commit: Tasteby - YouTube restaurant map service

Backend (FastAPI + Oracle ADB), Frontend (Next.js), daemon worker. Features: channel/video/restaurant management, semantic search, Google OAuth, user reviews. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-06 13:47:19 +09:00
commit 36bec10bd0
54 changed files with 9727 additions and 0 deletions
--- a/backend/core/init.py
+++ b/backend/core/init.py
--- a/backend/core/auth.py
+++ b/backend/core/auth.py
@@ -0,0 +1,122 @@
+"""Authentication helpers — Google OAuth2 + JWT."""
+
+from __future__ import annotations
+
+import os
+from datetime import datetime, timedelta, timezone
+
+import jwt
+import oracledb
+from google.oauth2 import id_token as google_id_token
+from google.auth.transport import requests as google_requests
+
+from core.db import conn
+
+JWT_SECRET = os.environ.get("JWT_SECRET", "tasteby-dev-secret-change-me")
+JWT_ALGORITHM = "HS256"
+JWT_EXPIRE_DAYS = 7
+
+
+def verify_google_token(token: str) -> dict:
+    """Verify a Google ID token and return user info.
+
+    Returns dict with keys: sub, email, name, picture.
+    Raises ValueError on invalid token.
+    """
+    info = google_id_token.verify_oauth2_token(
+        token, google_requests.Request(),
+    )
+    return {
+        "sub": info["sub"],
+        "email": info.get("email"),
+        "name": info.get("name"),
+        "picture": info.get("picture"),
+    }
+
+
+def find_or_create_user(
+    provider: str,
+    provider_id: str,
+    email: str | None = None,
+    nickname: str | None = None,
+    avatar_url: str | None = None,
+) -> dict:
+    """Find existing user or create new one. Returns user dict."""
+    # Try to find existing user
+    sql_find = """
+        SELECT id, provider, provider_id, email, nickname, avatar_url, created_at, last_login_at
+        FROM tasteby_users
+        WHERE provider = :provider AND provider_id = :provider_id
+    """
+    with conn() as c:
+        cur = c.cursor()
+        cur.execute(sql_find, {"provider": provider, "provider_id": provider_id})
+        row = cur.fetchone()
+
+        if row:
+            # Update last_login and optional fields
+            sql_update = """
+                UPDATE tasteby_users
+                SET last_login_at = SYSTIMESTAMP,
+                    email = COALESCE(:email, email),
+                    nickname = COALESCE(:nickname, nickname),
+                    avatar_url = COALESCE(:avatar_url, avatar_url)
+                WHERE id = :id
+            """
+            cur.execute(sql_update, {
+                "email": email, "nickname": nickname,
+                "avatar_url": avatar_url, "id": row[0],
+            })
+            return {
+                "id": row[0],
+                "provider": row[1],
+                "provider_id": row[2],
+                "email": email or row[3],
+                "nickname": nickname or row[4],
+                "avatar_url": avatar_url or row[5],
+            }
+
+        # Create new user
+        sql_insert = """
+            INSERT INTO tasteby_users (provider, provider_id, email, nickname, avatar_url, last_login_at)
+            VALUES (:provider, :provider_id, :email, :nickname, :avatar_url, SYSTIMESTAMP)
+            RETURNING id INTO :out_id
+        """
+        out_id = cur.var(oracledb.STRING)
+        cur.execute(sql_insert, {
+            "provider": provider,
+            "provider_id": provider_id,
+            "email": email,
+            "nickname": nickname,
+            "avatar_url": avatar_url,
+            "out_id": out_id,
+        })
+        new_id = out_id.getvalue()[0]
+        return {
+            "id": new_id,
+            "provider": provider,
+            "provider_id": provider_id,
+            "email": email,
+            "nickname": nickname,
+            "avatar_url": avatar_url,
+        }
+
+
+def create_jwt(user: dict) -> str:
+    """Create a JWT access token for the given user."""
+    payload = {
+        "sub": user["id"],
+        "email": user.get("email"),
+        "nickname": user.get("nickname"),
+        "exp": datetime.now(timezone.utc) + timedelta(days=JWT_EXPIRE_DAYS),
+        "iat": datetime.now(timezone.utc),
+    }
+    return jwt.encode(payload, JWT_SECRET, algorithm=JWT_ALGORITHM)
+
+
+def verify_jwt(token: str) -> dict:
+    """Verify a JWT and return the payload.
+
+    Raises jwt.InvalidTokenError on failure.
+    """
+    return jwt.decode(token, JWT_SECRET, algorithms=[JWT_ALGORITHM])
--- a/backend/core/db.py
+++ b/backend/core/db.py
@@ -0,0 +1,44 @@
+"""Oracle ADB connection pool — shared across all modules."""
+
+from __future__ import annotations
+
+import os
+from contextlib import contextmanager
+from typing import Generator, Optional
+
+import oracledb
+
+_pool: Optional[oracledb.ConnectionPool] = None
+
+
+def _get_pool() -> oracledb.ConnectionPool:
+    global _pool
+    if _pool is None:
+        kwargs: dict = dict(
+            user=os.environ["ORACLE_USER"],
+            password=os.environ["ORACLE_PASSWORD"],
+            dsn=os.environ["ORACLE_DSN"],
+            min=1,
+            max=5,
+            increment=1,
+        )
+        wallet = os.environ.get("ORACLE_WALLET")
+        if wallet:
+            kwargs["config_dir"] = wallet
+        _pool = oracledb.create_pool(**kwargs)
+    return _pool
+
+
+@contextmanager
+def conn() -> Generator[oracledb.Connection, None, None]:
+    """Acquire a pooled connection with auto-commit/rollback."""
+    pool = _get_pool()
+    c = pool.acquire()
+    try:
+        yield c
+        c.commit()
+    except Exception:
+        c.rollback()
+        raise
+    finally:
+        pool.release(c)
--- a/backend/core/extractor.py
+++ b/backend/core/extractor.py
@@ -0,0 +1,128 @@
+"""LLM-based restaurant info extraction from video transcripts.
+
+Uses OCI GenAI (Gemini Flash) to extract structured restaurant data.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import os
+import re
+
+import oci
+from oci.generative_ai_inference import GenerativeAiInferenceClient
+from oci.generative_ai_inference.models import (
+    ChatDetails,
+    GenericChatRequest,
+    OnDemandServingMode,
+    TextContent,
+    UserMessage,
+)
+
+logger = logging.getLogger(__name__)
+
+
+def _get_client() -> GenerativeAiInferenceClient:
+    config = oci.config.from_file()
+    endpoint = os.environ.get("OCI_CHAT_ENDPOINT") or os.environ["OCI_GENAI_ENDPOINT"]
+    return GenerativeAiInferenceClient(config, service_endpoint=endpoint)
+
+
+def _llm(prompt: str, max_tokens: int = 4096) -> str:
+    client = _get_client()
+    req = GenericChatRequest(
+        messages=[UserMessage(content=[TextContent(text=prompt)])],
+        max_tokens=max_tokens,
+        temperature=0,
+    )
+    det = ChatDetails(
+        compartment_id=os.environ["OCI_COMPARTMENT_ID"],
+        serving_mode=OnDemandServingMode(model_id=os.environ["OCI_CHAT_MODEL_ID"]),
+        chat_request=req,
+    )
+    resp = client.chat(det)
+    return resp.data.chat_response.choices[0].message.content[0].text.strip()
+
+
+def _parse_json(raw: str) -> dict | list:
+    raw = re.sub(r"^```(?:json)?\s*|\s*```$", "", raw, flags=re.MULTILINE).strip()
+    raw = re.sub(r",\s*([}\]])", r"\1", raw)
+    try:
+        return json.loads(raw)
+    except json.JSONDecodeError:
+        pass
+    try:
+        return json.JSONDecoder(strict=False).decode(raw)
+    except json.JSONDecodeError:
+        pass
+    # recover truncated array
+    if raw.lstrip().startswith("["):
+        decoder = json.JSONDecoder(strict=False)
+        items: list = []
+        idx = raw.index("[") + 1
+        while idx < len(raw):
+            while idx < len(raw) and raw[idx] in " \t\n\r,":
+                idx += 1
+            if idx >= len(raw) or raw[idx] == "]":
+                break
+            try:
+                obj, end = decoder.raw_decode(raw, idx)
+                items.append(obj)
+                idx = end
+            except json.JSONDecodeError:
+                break
+        if items:
+            return items
+    raise ValueError(f"JSON parse failed: {raw[:80]!r}")
+
+
+_EXTRACT_PROMPT = """\
+다음은 유튜브 먹방/맛집 영상의 자막입니다.
+이 영상에서 언급된 모든 식당 정보를 추출하세요.
+
+규칙:
+- 식당이 없으면 빈 배열 [] 반환
+- 각 식당에 대해 아래 필드를 JSON 배열로 반환
+- 확실하지 않은 정보는 null
+- 추가 설명 없이 JSON만 반환
+
+필드:
+- name: 식당 이름 (string, 필수)
+- address: 주소 또는 위치 힌트 (string | null)
+- region: 지역 (예: 서울 강남, 부산 해운대) (string | null)
+- cuisine_type: 음식 종류 (예: 한식, 일식, 중식, 양식, 카페) (string | null)
+- price_range: 가격대 (예: 1만원대, 2-3만원) (string | null)
+- foods_mentioned: 언급된 메뉴들 (string[])
+- evaluation: 평가 내용 (string | null)
+- guests: 함께한 게스트 (string[])
+
+영상 제목: {title}
+자막:
+{transcript}
+
+JSON 배열:"""
+
+
+def extract_restaurants(title: str, transcript: str) -> tuple[list[dict], str]:
+    """Extract restaurant info from a video transcript using LLM.
+
+    Returns (list of restaurant dicts, raw LLM response text).
+    """
+    # Truncate very long transcripts
+    if len(transcript) > 8000:
+        transcript = transcript[:7000] + "\n...(중략)...\n" + transcript[-1000:]
+
+    prompt = _EXTRACT_PROMPT.format(title=title, transcript=transcript)
+
+    try:
+        raw = _llm(prompt, max_tokens=4096)
+        result = _parse_json(raw)
+        if isinstance(result, list):
+            return result, raw
+        if isinstance(result, dict):
+            return [result], raw
+        return [], raw
+    except Exception as e:
+        logger.error("Restaurant extraction failed: %s", e)
+        return [], ""
--- a/backend/core/geocoding.py
+++ b/backend/core/geocoding.py
@@ -0,0 +1,97 @@
+"""Google Maps Geocoding + Place Search for restaurant location lookup."""
+
+from __future__ import annotations
+
+import logging
+import os
+
+import httpx
+
+logger = logging.getLogger(__name__)
+
+
+def _api_key() -> str:
+    return os.environ["GOOGLE_MAPS_API_KEY"]
+
+
+def geocode_restaurant(name: str, address: str | None = None, region: str | None = None) -> dict | None:
+    """Look up restaurant coordinates using Google Maps.
+
+    Tries Places Text Search first (more accurate for restaurant names),
+    falls back to Geocoding API.
+
+    Returns dict with: latitude, longitude, formatted_address, google_place_id
+    or None if not found.
+    """
+    query = name
+    if address:
+        query += f" {address}"
+    elif region:
+        query += f" {region}"
+
+    # Try Places Text Search (better for business names)
+    result = _places_text_search(query)
+    if result:
+        return result
+
+    # Fallback: Geocoding API
+    return _geocode(query)
+
+
+def _places_text_search(query: str) -> dict | None:
+    """Search for a place using Google Places Text Search API."""
+    try:
+        r = httpx.get(
+            "https://maps.googleapis.com/maps/api/place/textsearch/json",
+            params={
+                "query": query,
+                "key": _api_key(),
+                "language": "ko",
+                "type": "restaurant",
+            },
+            timeout=10,
+        )
+        r.raise_for_status()
+        data = r.json()
+
+        if data.get("status") == "OK" and data.get("results"):
+            place = data["results"][0]
+            loc = place["geometry"]["location"]
+            return {
+                "latitude": loc["lat"],
+                "longitude": loc["lng"],
+                "formatted_address": place.get("formatted_address", ""),
+                "google_place_id": place.get("place_id", ""),
+            }
+    except Exception as e:
+        logger.warning("Places text search failed for '%s': %s", query, e)
+    return None
+
+
+def _geocode(query: str) -> dict | None:
+    """Geocode an address string."""
+    try:
+        r = httpx.get(
+            "https://maps.googleapis.com/maps/api/geocode/json",
+            params={
+                "address": query,
+                "key": _api_key(),
+                "language": "ko",
+            },
+            timeout=10,
+        )
+        r.raise_for_status()
+        data = r.json()
+
+        if data.get("status") == "OK" and data.get("results"):
+            result = data["results"][0]
+            loc = result["geometry"]["location"]
+            return {
+                "latitude": loc["lat"],
+                "longitude": loc["lng"],
+                "formatted_address": result.get("formatted_address", ""),
+                "google_place_id": "",
+            }
+    except Exception as e:
+        logger.warning("Geocoding failed for '%s': %s", query, e)
+    return None
--- a/backend/core/pipeline.py
+++ b/backend/core/pipeline.py
@@ -0,0 +1,134 @@
+"""Data pipeline: process pending videos end-to-end.
+
+For each pending video:
+1. Fetch transcript
+2. Extract restaurant info via LLM
+3. Geocode each restaurant
+4. Save to DB + generate vector embeddings
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+
+from core import youtube, extractor, geocoding, restaurant, vector
+
+logger = logging.getLogger(__name__)
+
+
+def process_video(video: dict) -> int:
+    """Process a single pending video. Returns number of restaurants found."""
+    video_db_id = video["id"]
+    video_id = video["video_id"]
+    title = video["title"]
+
+    logger.info("Processing video: %s (%s)", title, video_id)
+    youtube.update_video_status(video_db_id, "processing")
+
+    try:
+        # 1. Transcript
+        transcript = youtube.get_transcript(video_id)
+        if not transcript:
+            logger.warning("No transcript for %s, marking done", video_id)
+            youtube.update_video_status(video_db_id, "done")
+            return 0
+
+        youtube.update_video_status(video_db_id, "processing", transcript)
+
+        # 2. LLM extraction
+        restaurants, llm_raw = extractor.extract_restaurants(title, transcript)
+        if not restaurants:
+            logger.info("No restaurants found in %s", video_id)
+            youtube.update_video_status(video_db_id, "done", llm_raw=llm_raw)
+            return 0
+
+        # 3-4. Geocode + save each restaurant
+        count = 0
+        for rest_data in restaurants:
+            name = rest_data.get("name")
+            if not name:
+                continue
+
+            # Geocode
+            geo = geocoding.geocode_restaurant(
+                name,
+                address=rest_data.get("address"),
+                region=rest_data.get("region"),
+            )
+
+            lat = geo["latitude"] if geo else None
+            lng = geo["longitude"] if geo else None
+            addr = geo["formatted_address"] if geo else rest_data.get("address")
+            place_id = geo["google_place_id"] if geo else None
+
+            # Upsert restaurant
+            rest_id = restaurant.upsert(
+                name=name,
+                address=addr,
+                region=rest_data.get("region"),
+                latitude=lat,
+                longitude=lng,
+                cuisine_type=rest_data.get("cuisine_type"),
+                price_range=rest_data.get("price_range"),
+                google_place_id=place_id,
+            )
+
+            # Link video <-> restaurant
+            restaurant.link_video_restaurant(
+                video_db_id=video_db_id,
+                restaurant_id=rest_id,
+                foods=rest_data.get("foods_mentioned"),
+                evaluation=rest_data.get("evaluation"),
+                guests=rest_data.get("guests"),
+            )
+
+            # Vector embeddings
+            chunks = _build_chunks(name, rest_data, title)
+            if chunks:
+                vector.save_restaurant_vectors(rest_id, chunks)
+
+            count += 1
+            logger.info("Saved restaurant: %s (geocoded=%s)", name, bool(geo))
+
+        youtube.update_video_status(video_db_id, "done", llm_raw=llm_raw)
+        logger.info("Video %s done: %d restaurants", video_id, count)
+        return count
+
+    except Exception as e:
+        logger.error("Pipeline error for %s: %s", video_id, e, exc_info=True)
+        youtube.update_video_status(video_db_id, "error")
+        return 0
+
+
+def _build_chunks(name: str, data: dict, video_title: str) -> list[str]:
+    """Build text chunks for vector embedding."""
+    parts = [f"식당: {name}"]
+    if data.get("region"):
+        parts.append(f"지역: {data['region']}")
+    if data.get("cuisine_type"):
+        parts.append(f"음식 종류: {data['cuisine_type']}")
+    if data.get("foods_mentioned"):
+        foods = data["foods_mentioned"]
+        if isinstance(foods, list):
+            parts.append(f"메뉴: {', '.join(foods)}")
+    if data.get("evaluation"):
+        parts.append(f"평가: {data['evaluation']}")
+    if data.get("price_range"):
+        parts.append(f"가격대: {data['price_range']}")
+    parts.append(f"영상: {video_title}")
+
+    return ["\n".join(parts)]
+
+
+def process_pending(limit: int = 5) -> int:
+    """Process up to `limit` pending videos. Returns total restaurants found."""
+    videos = youtube.get_pending_videos(limit)
+    if not videos:
+        logger.info("No pending videos")
+        return 0
+
+    total = 0
+    for v in videos:
+        total += process_video(v)
+    return total
--- a/backend/core/restaurant.py
+++ b/backend/core/restaurant.py
@@ -0,0 +1,205 @@
+"""Restaurant DB operations — save extracted data, link to videos."""
+
+from __future__ import annotations
+
+import json
+
+import oracledb
+
+from core.db import conn
+
+
+def find_by_name(name: str) -> dict | None:
+    """Find a restaurant by exact name match."""
+    sql = "SELECT id, name, address, region, latitude, longitude FROM restaurants WHERE name = :n"
+    with conn() as c:
+        cur = c.cursor()
+        cur.execute(sql, {"n": name})
+        r = cur.fetchone()
+        if r:
+            return {
+                "id": r[0], "name": r[1], "address": r[2],
+                "region": r[3], "latitude": r[4], "longitude": r[5],
+            }
+    return None
+
+
+def upsert(
+    name: str,
+    address: str | None = None,
+    region: str | None = None,
+    latitude: float | None = None,
+    longitude: float | None = None,
+    cuisine_type: str | None = None,
+    price_range: str | None = None,
+    google_place_id: str | None = None,
+) -> str:
+    """Insert or update a restaurant. Returns row id."""
+    existing = find_by_name(name)
+    if existing:
+        sql = """
+            UPDATE restaurants
+            SET address = COALESCE(:addr, address),
+                region = COALESCE(:reg, region),
+                latitude = COALESCE(:lat, latitude),
+                longitude = COALESCE(:lng, longitude),
+                cuisine_type = COALESCE(:cuisine, cuisine_type),
+                price_range = COALESCE(:price, price_range),
+                google_place_id = COALESCE(:gid, google_place_id),
+                updated_at = SYSTIMESTAMP
+            WHERE id = :id
+        """
+        with conn() as c:
+            c.cursor().execute(sql, {
+                "addr": address, "reg": region,
+                "lat": latitude, "lng": longitude,
+                "cuisine": cuisine_type, "price": price_range,
+                "gid": google_place_id, "id": existing["id"],
+            })
+        return existing["id"]
+
+    sql = """
+        INSERT INTO restaurants (name, address, region, latitude, longitude,
+                                 cuisine_type, price_range, google_place_id)
+        VALUES (:name, :addr, :reg, :lat, :lng, :cuisine, :price, :gid)
+        RETURNING id INTO :out_id
+    """
+    with conn() as c:
+        cur = c.cursor()
+        out_id = cur.var(oracledb.STRING)
+        cur.execute(sql, {
+            "name": name, "addr": address, "reg": region,
+            "lat": latitude, "lng": longitude,
+            "cuisine": cuisine_type, "price": price_range,
+            "gid": google_place_id, "out_id": out_id,
+        })
+        return out_id.getvalue()[0]
+
+
+def link_video_restaurant(
+    video_db_id: str,
+    restaurant_id: str,
+    foods: list[str] | None = None,
+    evaluation: str | None = None,
+    guests: list[str] | None = None,
+    citation: str | None = None,
+) -> str | None:
+    """Create video-restaurant mapping. Returns row id or None if duplicate."""
+    sql = """
+        INSERT INTO video_restaurants
+            (video_id, restaurant_id, foods_mentioned, evaluation, guests, citation_text)
+        VALUES (:vid, :rid, :foods, :eval, :guests, :cite)
+        RETURNING id INTO :out_id
+    """
+    with conn() as c:
+        cur = c.cursor()
+        out_id = cur.var(oracledb.STRING)
+        try:
+            cur.execute(sql, {
+                "vid": video_db_id,
+                "rid": restaurant_id,
+                "foods": json.dumps(foods or [], ensure_ascii=False),
+                "eval": json.dumps({"text": evaluation} if evaluation else {}, ensure_ascii=False),
+                "guests": json.dumps(guests or [], ensure_ascii=False),
+                "cite": citation,
+                "out_id": out_id,
+            })
+            return out_id.getvalue()[0]
+        except Exception as e:
+            if "UQ_VR_VIDEO_REST" in str(e).upper():
+                return None
+            raise
+
+
+def get_all(
+    limit: int = 100,
+    offset: int = 0,
+    cuisine: str | None = None,
+    region: str | None = None,
+) -> list[dict]:
+    """List restaurants with optional filters."""
+    conditions = ["latitude IS NOT NULL"]
+    params: dict = {"lim": limit, "off": offset}
+
+    if cuisine:
+        conditions.append("cuisine_type = :cuisine")
+        params["cuisine"] = cuisine
+    if region:
+        conditions.append("region LIKE :region")
+        params["region"] = f"%{region}%"
+
+    where = " AND ".join(conditions)
+    sql = f"""
+        SELECT id, name, address, region, latitude, longitude,
+               cuisine_type, price_range, google_place_id
+        FROM restaurants
+        WHERE {where}
+        ORDER BY updated_at DESC
+        OFFSET :off ROWS FETCH NEXT :lim ROWS ONLY
+    """
+    with conn() as c:
+        cur = c.cursor()
+        cur.execute(sql, params)
+        cols = [d[0].lower() for d in cur.description]
+        return [dict(zip(cols, row)) for row in cur.fetchall()]
+
+
+def get_by_id(restaurant_id: str) -> dict | None:
+    sql = """
+        SELECT r.id, r.name, r.address, r.region, r.latitude, r.longitude,
+               r.cuisine_type, r.price_range, r.phone, r.website, r.google_place_id
+        FROM restaurants r
+        WHERE r.id = :id
+    """
+    with conn() as c:
+        cur = c.cursor()
+        cur.execute(sql, {"id": restaurant_id})
+        row = cur.fetchone()
+        if not row:
+            return None
+        cols = [d[0].lower() for d in cur.description]
+        return dict(zip(cols, row))
+
+
+def get_video_links(restaurant_id: str) -> list[dict]:
+    """Get all video appearances for a restaurant."""
+    sql = """
+        SELECT v.video_id, v.title, v.url, v.published_at,
+               vr.foods_mentioned, vr.evaluation, vr.guests
+        FROM video_restaurants vr
+        JOIN videos v ON v.id = vr.video_id
+        WHERE vr.restaurant_id = :rid
+        ORDER BY v.published_at DESC
+    """
+    with conn() as c:
+        cur = c.cursor()
+        cur.execute(sql, {"rid": restaurant_id})
+        results = []
+        for r in cur.fetchall():
+            foods_raw = r[4].read() if hasattr(r[4], "read") else r[4]
+            eval_raw = r[5].read() if hasattr(r[5], "read") else r[5]
+            guests_raw = r[6].read() if hasattr(r[6], "read") else r[6]
+            results.append({
+                "video_id": r[0],
+                "title": r[1],
+                "url": r[2],
+                "published_at": r[3].isoformat() if r[3] else None,
+                "foods_mentioned": _parse_json_field(foods_raw, []),
+                "evaluation": _parse_json_field(eval_raw, {}),
+                "guests": _parse_json_field(guests_raw, []),
+            })
+        return results
+
+
+def _parse_json_field(val, default):
+    """Parse a JSON field that may be a string, already-parsed object, or None."""
+    if val is None:
+        return default
+    if isinstance(val, (list, dict)):
+        return val
+    if isinstance(val, str):
+        try:
+            return json.loads(val)
+        except (json.JSONDecodeError, ValueError):
+            return default
+    return default
--- a/backend/core/review.py
+++ b/backend/core/review.py
@@ -0,0 +1,189 @@
+"""User review DB operations."""
+
+from __future__ import annotations
+
+from datetime import date
+
+import oracledb
+
+from core.db import conn
+
+
+def create_review(
+    user_id: str,
+    restaurant_id: str,
+    rating: float,
+    review_text: str | None = None,
+    visited_at: date | None = None,
+) -> dict:
+    """Create a new review. Returns the created review dict."""
+    sql = """
+        INSERT INTO user_reviews (user_id, restaurant_id, rating, review_text, visited_at)
+        VALUES (:user_id, :restaurant_id, :rating, :review_text, :visited_at)
+        RETURNING id INTO :out_id
+    """
+    with conn() as c:
+        cur = c.cursor()
+        out_id = cur.var(oracledb.STRING)
+        cur.execute(sql, {
+            "user_id": user_id,
+            "restaurant_id": restaurant_id,
+            "rating": rating,
+            "review_text": review_text,
+            "visited_at": visited_at,
+            "out_id": out_id,
+        })
+        new_id = out_id.getvalue()[0]
+
+    return get_review_by_id(new_id)
+
+
+def update_review(
+    review_id: str,
+    user_id: str,
+    rating: float | None = None,
+    review_text: str | None = None,
+    visited_at: date | None = None,
+) -> dict:
+    """Update an existing review. Only the owner can update.
+
+    Returns the updated review dict, or None if not found / not owner.
+    """
+    sql = """
+        UPDATE user_reviews
+        SET rating = COALESCE(:rating, rating),
+            review_text = COALESCE(:review_text, review_text),
+            visited_at = COALESCE(:visited_at, visited_at),
+            updated_at = SYSTIMESTAMP
+        WHERE id = :id AND user_id = :user_id
+    """
+    with conn() as c:
+        cur = c.cursor()
+        cur.execute(sql, {
+            "rating": rating,
+            "review_text": review_text,
+            "visited_at": visited_at,
+            "id": review_id,
+            "user_id": user_id,
+        })
+        if cur.rowcount == 0:
+            return None
+
+    return get_review_by_id(review_id)
+
+
+def delete_review(review_id: str, user_id: str) -> bool:
+    """Delete a review. Only the owner can delete. Returns True if deleted."""
+    sql = "DELETE FROM user_reviews WHERE id = :id AND user_id = :user_id"
+    with conn() as c:
+        cur = c.cursor()
+        cur.execute(sql, {"id": review_id, "user_id": user_id})
+        return cur.rowcount > 0
+
+
+def get_review_by_id(review_id: str) -> dict | None:
+    """Get a single review by ID."""
+    sql = """
+        SELECT r.id, r.user_id, r.restaurant_id, r.rating, r.review_text,
+               r.visited_at, r.created_at, r.updated_at,
+               u.nickname, u.avatar_url
+        FROM user_reviews r
+        JOIN tasteby_users u ON u.id = r.user_id
+        WHERE r.id = :id
+    """
+    with conn() as c:
+        cur = c.cursor()
+        cur.execute(sql, {"id": review_id})
+        row = cur.fetchone()
+        if not row:
+            return None
+        return _row_to_dict(row)
+
+
+def get_reviews_for_restaurant(
+    restaurant_id: str,
+    limit: int = 20,
+    offset: int = 0,
+) -> list[dict]:
+    """List reviews for a restaurant, including user nickname/avatar."""
+    sql = """
+        SELECT r.id, r.user_id, r.restaurant_id, r.rating, r.review_text,
+               r.visited_at, r.created_at, r.updated_at,
+               u.nickname, u.avatar_url
+        FROM user_reviews r
+        JOIN tasteby_users u ON u.id = r.user_id
+        WHERE r.restaurant_id = :restaurant_id
+        ORDER BY r.created_at DESC
+        OFFSET :off ROWS FETCH NEXT :lim ROWS ONLY
+    """
+    with conn() as c:
+        cur = c.cursor()
+        cur.execute(sql, {
+            "restaurant_id": restaurant_id,
+            "off": offset,
+            "lim": limit,
+        })
+        return [_row_to_dict(row) for row in cur.fetchall()]
+
+
+def get_user_reviews(
+    user_id: str,
+    limit: int = 20,
+    offset: int = 0,
+) -> list[dict]:
+    """List reviews by a specific user."""
+    sql = """
+        SELECT r.id, r.user_id, r.restaurant_id, r.rating, r.review_text,
+               r.visited_at, r.created_at, r.updated_at,
+               u.nickname, u.avatar_url
+        FROM user_reviews r
+        JOIN tasteby_users u ON u.id = r.user_id
+        WHERE r.user_id = :user_id
+        ORDER BY r.created_at DESC
+        OFFSET :off ROWS FETCH NEXT :lim ROWS ONLY
+    """
+    with conn() as c:
+        cur = c.cursor()
+        cur.execute(sql, {
+            "user_id": user_id,
+            "off": offset,
+            "lim": limit,
+        })
+        return [_row_to_dict(row) for row in cur.fetchall()]
+
+
+def get_restaurant_avg_rating(restaurant_id: str) -> dict:
+    """Get average rating and review count for a restaurant."""
+    sql = """
+        SELECT ROUND(AVG(rating), 1) AS avg_rating, COUNT(*) AS review_count
+        FROM user_reviews
+        WHERE restaurant_id = :restaurant_id
+    """
+    with conn() as c:
+        cur = c.cursor()
+        cur.execute(sql, {"restaurant_id": restaurant_id})
+        row = cur.fetchone()
+        return {
+            "avg_rating": float(row[0]) if row[0] else None,
+            "review_count": int(row[1]),
+        }
+
+
+def _row_to_dict(row) -> dict:
+    """Convert a review query row to a dict."""
+    review_text = row[4]
+    if hasattr(review_text, "read"):
+        review_text = review_text.read()
+
+    return {
+        "id": row[0],
+        "user_id": row[1],
+        "restaurant_id": row[2],
+        "rating": float(row[3]),
+        "review_text": review_text,
+        "visited_at": row[5].isoformat() if row[5] else None,
+        "created_at": row[6].isoformat() if row[6] else None,
+        "updated_at": row[7].isoformat() if row[7] else None,
+        "user_nickname": row[8],
+        "user_avatar_url": row[9],
+    }
--- a/backend/core/vector.py
+++ b/backend/core/vector.py
@@ -0,0 +1,97 @@
+"""Vector embedding generation and storage for restaurant semantic search."""
+
+from __future__ import annotations
+
+import array
+import os
+
+import oci
+from oci.generative_ai_inference import GenerativeAiInferenceClient
+from oci.generative_ai_inference.models import (
+    EmbedTextDetails,
+    OnDemandServingMode,
+)
+
+from core.db import conn
+
+
+def _embed_texts(texts: list[str]) -> list[list[float]]:
+    config = oci.config.from_file()
+    client = GenerativeAiInferenceClient(
+        config,
+        service_endpoint=os.environ["OCI_GENAI_ENDPOINT"],
+    )
+    model_id = os.environ.get("OCI_EMBED_MODEL_ID", "cohere.embed-v4.0")
+    compartment_id = os.environ["OCI_COMPARTMENT_ID"]
+
+    details = EmbedTextDetails(
+        inputs=texts,
+        serving_mode=OnDemandServingMode(model_id=model_id),
+        compartment_id=compartment_id,
+        input_type="SEARCH_DOCUMENT",
+    )
+    response = client.embed_text(details)
+    return response.data.embeddings
+
+
+def _to_vec(embedding: list[float]) -> array.array:
+    return array.array("f", embedding)
+
+
+def save_restaurant_vectors(restaurant_id: str, chunks: list[str]) -> list[str]:
+    """Embed and store text chunks for a restaurant.
+
+    Returns list of inserted row IDs.
+    """
+    if not chunks:
+        return []
+
+    embeddings = _embed_texts(chunks)
+    inserted: list[str] = []
+
+    sql = """
+        INSERT INTO restaurant_vectors (restaurant_id, chunk_text, embedding)
+        VALUES (:rid, :chunk, :emb)
+        RETURNING id INTO :out_id
+    """
+    import oracledb
+    with conn() as c:
+        cur = c.cursor()
+        for chunk, emb in zip(chunks, embeddings):
+            out_id = cur.var(oracledb.STRING)
+            cur.execute(sql, {
+                "rid": restaurant_id,
+                "chunk": chunk,
+                "emb": _to_vec(emb),
+                "out_id": out_id,
+            })
+            inserted.append(out_id.getvalue()[0])
+    return inserted
+
+
+def search_similar(query: str, top_k: int = 10) -> list[dict]:
+    """Semantic search: find restaurants similar to query text.
+
+    Returns list of dicts: restaurant_id, chunk_text, distance.
+    """
+    embeddings = _embed_texts([query])
+    query_vec = _to_vec(embeddings[0])
+
+    sql = """
+        SELECT rv.restaurant_id, rv.chunk_text,
+               VECTOR_DISTANCE(rv.embedding, :qvec, COSINE) AS dist
+        FROM restaurant_vectors rv
+        ORDER BY dist
+        FETCH FIRST :k ROWS ONLY
+    """
+    with conn() as c:
+        cur = c.cursor()
+        cur.execute(sql, {"qvec": query_vec, "k": top_k})
+        return [
+            {
+                "restaurant_id": r[0],
+                "chunk_text": r[1].read() if hasattr(r[1], "read") else r[1],
+                "distance": r[2],
+            }
+            for r in cur.fetchall()
+        ]
--- a/backend/core/youtube.py
+++ b/backend/core/youtube.py
@@ -0,0 +1,221 @@
+"""YouTube channel scanner + transcript extraction.
+
+Uses YouTube Data API v3 for channel video listing,
+youtube-transcript-api for transcript extraction.
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+import re
+from datetime import datetime
+
+import httpx
+from youtube_transcript_api import YouTubeTranscriptApi
+
+from core.db import conn
+
+logger = logging.getLogger(__name__)
+
+
+def _api_key() -> str:
+    return os.environ["YOUTUBE_DATA_API_KEY"]
+
+
+def extract_video_id(url: str) -> str:
+    match = re.search(r"(?:v=|youtu\.be/)([^&?/\s]+)", url)
+    if not match:
+        raise ValueError(f"Cannot extract video ID from URL: {url}")
+    return match.group(1)
+
+
+# -- Channel operations -------------------------------------------------------
+
+def add_channel(channel_id: str, channel_name: str) -> str:
+    """Register a YouTube channel. Returns DB row id."""
+    sql = """
+        INSERT INTO channels (channel_id, channel_name, channel_url)
+        VALUES (:cid, :cname, :curl)
+        RETURNING id INTO :out_id
+    """
+    with conn() as c:
+        cur = c.cursor()
+        import oracledb
+        out_id = cur.var(oracledb.STRING)
+        cur.execute(sql, {
+            "cid": channel_id,
+            "cname": channel_name,
+            "curl": f"https://www.youtube.com/channel/{channel_id}",
+            "out_id": out_id,
+        })
+        return out_id.getvalue()[0]
+
+
+def get_active_channels() -> list[dict]:
+    sql = "SELECT id, channel_id, channel_name FROM channels WHERE is_active = 1"
+    with conn() as c:
+        cur = c.cursor()
+        cur.execute(sql)
+        return [
+            {"id": r[0], "channel_id": r[1], "channel_name": r[2]}
+            for r in cur.fetchall()
+        ]
+
+
+# -- Video listing via YouTube Data API v3 ------------------------------------
+
+def fetch_channel_videos(
+    channel_id: str,
+    max_results: int = 50,
+    published_after: str | None = None,
+) -> list[dict]:
+    """Fetch video list from a YouTube channel via Data API v3.
+
+    Returns list of dicts: video_id, title, published_at, url.
+    """
+    params: dict = {
+        "key": _api_key(),
+        "channelId": channel_id,
+        "part": "snippet",
+        "order": "date",
+        "maxResults": min(max_results, 50),
+        "type": "video",
+    }
+    if published_after:
+        params["publishedAfter"] = published_after
+
+    videos: list[dict] = []
+    next_page = None
+
+    while True:
+        if next_page:
+            params["pageToken"] = next_page
+
+        r = httpx.get(
+            "https://www.googleapis.com/youtube/v3/search",
+            params=params,
+            timeout=15,
+        )
+        r.raise_for_status()
+        data = r.json()
+
+        for item in data.get("items", []):
+            snippet = item["snippet"]
+            vid = item["id"]["videoId"]
+            videos.append({
+                "video_id": vid,
+                "title": snippet["title"],
+                "published_at": snippet["publishedAt"],
+                "url": f"https://www.youtube.com/watch?v={vid}",
+            })
+
+        next_page = data.get("nextPageToken")
+        if not next_page or len(videos) >= max_results:
+            break
+
+    return videos[:max_results]
+
+
+# -- Transcript extraction ----------------------------------------------------
+
+def get_transcript(video_id: str) -> str | None:
+    """Fetch transcript text for a video. Returns None if unavailable."""
+    try:
+        fetched = YouTubeTranscriptApi().fetch(video_id, languages=["ko", "en"])
+        return " ".join(seg.text for seg in fetched)
+    except Exception as e:
+        logger.warning("Transcript unavailable for %s: %s", video_id, e)
+        return None
+
+
+# -- DB operations for videos -------------------------------------------------
+
+def save_video(channel_db_id: str, video: dict) -> str | None:
+    """Insert a video row if not exists. Returns row id or None if duplicate."""
+    sql = """
+        INSERT INTO videos (channel_id, video_id, title, url, published_at, status)
+        VALUES (:ch_id, :vid, :title, :url, :pub_at, 'pending')
+        RETURNING id INTO :out_id
+    """
+    with conn() as c:
+        cur = c.cursor()
+        import oracledb
+        out_id = cur.var(oracledb.STRING)
+        try:
+            pub_at = None
+            if video.get("published_at"):
+                pub_at = datetime.fromisoformat(
+                    video["published_at"].replace("Z", "+00:00")
+                )
+            cur.execute(sql, {
+                "ch_id": channel_db_id,
+                "vid": video["video_id"],
+                "title": video["title"],
+                "url": video["url"],
+                "pub_at": pub_at,
+                "out_id": out_id,
+            })
+            return out_id.getvalue()[0]
+        except Exception as e:
+            if "UQ_VIDEOS_VID" in str(e).upper():
+                return None  # duplicate
+            raise
+
+
+def get_pending_videos(limit: int = 10) -> list[dict]:
+    sql = """
+        SELECT id, video_id, title, url
+        FROM videos
+        WHERE status = 'pending'
+        ORDER BY created_at
+        FETCH FIRST :n ROWS ONLY
+    """
+    with conn() as c:
+        cur = c.cursor()
+        cur.execute(sql, {"n": limit})
+        return [
+            {"id": r[0], "video_id": r[1], "title": r[2], "url": r[3]}
+            for r in cur.fetchall()
+        ]
+
+
+def update_video_status(
+    video_db_id: str,
+    status: str,
+    transcript: str | None = None,
+    llm_raw: str | None = None,
+) -> None:
+    sets = ["status = :st", "processed_at = SYSTIMESTAMP"]
+    params: dict = {"st": status, "vid": video_db_id}
+    if transcript:
+        sets.append("transcript_text = :txt")
+        params["txt"] = transcript
+    if llm_raw:
+        sets.append("llm_raw_response = :llm_resp")
+        params["llm_resp"] = llm_raw
+    sql = f"UPDATE videos SET {', '.join(sets)} WHERE id = :vid"
+    with conn() as c:
+        c.cursor().execute(sql, params)
+
+
+# -- Scan: fetch new videos for all active channels ---------------------------
+
+def scan_all_channels(max_per_channel: int = 50) -> int:
+    """Scan all active channels for new videos. Returns count of new videos."""
+    channels = get_active_channels()
+    total_new = 0
+    for ch in channels:
+        try:
+            videos = fetch_channel_videos(ch["channel_id"], max_per_channel)
+            for v in videos:
+                row_id = save_video(ch["id"], v)
+                if row_id:
+                    total_new += 1
+            logger.info(
+                "Channel %s: fetched %d videos, %d new",
+                ch["channel_name"], len(videos), total_new,
+            )
+        except Exception as e:
+            logger.error("Failed to scan channel %s: %s", ch["channel_name"], e)
+    return total_new