Initial commit: Tasteby - YouTube restaurant map service

Backend (FastAPI + Oracle ADB), Frontend (Next.js), daemon worker. Features: channel/video/restaurant management, semantic search, Google OAuth, user reviews. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-06 13:47:19 +09:00
commit 36bec10bd0
54 changed files with 9727 additions and 0 deletions
--- a/backend/.env.example
+++ b/backend/.env.example
@@ -0,0 +1,19 @@
+# Oracle ADB
+ORACLE_USER=admin
+ORACLE_PASSWORD=
+ORACLE_DSN=
+ORACLE_WALLET=
+
+# OCI GenAI
+OCI_COMPARTMENT_ID=
+OCI_GENAI_ENDPOINT=https://inference.generativeai.us-chicago-1.oci.oraclecloud.com
+OCI_EMBED_MODEL_ID=cohere.embed-v4.0
+OCI_CHAT_ENDPOINT=https://inference.generativeai.us-ashburn-1.oci.oraclecloud.com
+OCI_CHAT_MODEL_ID=
+
+# Google
+GOOGLE_MAPS_API_KEY=
+YOUTUBE_DATA_API_KEY=
+
+# Daemon
+DAEMON_INTERVAL=3600
--- a/backend/api/init.py
+++ b/backend/api/init.py
--- a/backend/api/deps.py
+++ b/backend/api/deps.py
@@ -0,0 +1,32 @@
+"""FastAPI dependencies for authentication."""
+
+from __future__ import annotations
+
+from fastapi import Header, HTTPException
+
+from core.auth import verify_jwt
+
+
+def get_current_user(authorization: str = Header(None)) -> dict:
+    """Extract and verify Bearer token, return user payload.
+
+    Raises 401 if token is missing or invalid.
+    """
+    if not authorization or not authorization.startswith("Bearer "):
+        raise HTTPException(401, "Missing or invalid Authorization header")
+    token = authorization.removeprefix("Bearer ").strip()
+    try:
+        return verify_jwt(token)
+    except Exception:
+        raise HTTPException(401, "Invalid or expired token")
+
+
+def get_optional_user(authorization: str = Header(None)) -> dict | None:
+    """Same as get_current_user but returns None if no token."""
+    if not authorization or not authorization.startswith("Bearer "):
+        return None
+    token = authorization.removeprefix("Bearer ").strip()
+    try:
+        return verify_jwt(token)
+    except Exception:
+        return None
--- a/backend/api/main.py
+++ b/backend/api/main.py
@@ -0,0 +1,32 @@
+"""FastAPI application entry point."""
+
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+
+from api.routes import restaurants, channels, videos, search, auth, reviews
+
+app = FastAPI(
+    title="Tasteby API",
+    description="YouTube restaurant map service API",
+    version="0.1.0",
+)
+
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["http://localhost:3000", "http://localhost:3001"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+app.include_router(restaurants.router, prefix="/api/restaurants", tags=["restaurants"])
+app.include_router(channels.router, prefix="/api/channels", tags=["channels"])
+app.include_router(videos.router, prefix="/api/videos", tags=["videos"])
+app.include_router(search.router, prefix="/api/search", tags=["search"])
+app.include_router(auth.router, prefix="/api/auth", tags=["auth"])
+app.include_router(reviews.router, prefix="/api", tags=["reviews"])
+
+
+@app.get("/api/health")
+def health():
+    return {"status": "ok"}
--- a/backend/api/routes/init.py
+++ b/backend/api/routes/init.py
--- a/backend/api/routes/auth.py
+++ b/backend/api/routes/auth.py
@@ -0,0 +1,40 @@
+"""Auth API routes — Google SSO login and user info."""
+
+from __future__ import annotations
+
+from fastapi import APIRouter, Depends, HTTPException
+from pydantic import BaseModel
+
+from core.auth import verify_google_token, find_or_create_user, create_jwt
+from api.deps import get_current_user
+
+router = APIRouter()
+
+
+class GoogleLoginRequest(BaseModel):
+    id_token: str
+
+
+@router.post("/google")
+def login_google(body: GoogleLoginRequest):
+    """Verify Google ID token and return JWT + user info."""
+    try:
+        google_info = verify_google_token(body.id_token)
+    except ValueError as e:
+        raise HTTPException(401, f"Invalid Google token: {e}")
+
+    user = find_or_create_user(
+        provider="google",
+        provider_id=google_info["sub"],
+        email=google_info.get("email"),
+        nickname=google_info.get("name"),
+        avatar_url=google_info.get("picture"),
+    )
+    access_token = create_jwt(user)
+    return {"access_token": access_token, "user": user}
+
+
+@router.get("/me")
+def get_me(current_user: dict = Depends(get_current_user)):
+    """Return current authenticated user info."""
+    return current_user
--- a/backend/api/routes/channels.py
+++ b/backend/api/routes/channels.py
@@ -0,0 +1,46 @@
+"""Channel API routes."""
+
+from fastapi import APIRouter, HTTPException
+from pydantic import BaseModel
+
+from core import youtube
+
+router = APIRouter()
+
+
+class ChannelCreate(BaseModel):
+    channel_id: str
+    channel_name: str
+
+
+@router.get("")
+def list_channels():
+    return youtube.get_active_channels()
+
+
+@router.post("", status_code=201)
+def create_channel(body: ChannelCreate):
+    try:
+        row_id = youtube.add_channel(body.channel_id, body.channel_name)
+        return {"id": row_id, "channel_id": body.channel_id}
+    except Exception as e:
+        if "UQ_CHANNELS_CID" in str(e).upper():
+            raise HTTPException(409, "Channel already exists")
+        raise
+
+
+@router.post("/{channel_id}/scan")
+def scan_channel(channel_id: str):
+    """Trigger a scan for new videos from this channel."""
+    channels = youtube.get_active_channels()
+    ch = next((c for c in channels if c["channel_id"] == channel_id), None)
+    if not ch:
+        raise HTTPException(404, "Channel not found")
+
+    videos = youtube.fetch_channel_videos(channel_id, max_results=50)
+    new_count = 0
+    for v in videos:
+        row_id = youtube.save_video(ch["id"], v)
+        if row_id:
+            new_count += 1
+    return {"total_fetched": len(videos), "new_videos": new_count}
--- a/backend/api/routes/restaurants.py
+++ b/backend/api/routes/restaurants.py
@@ -0,0 +1,33 @@
+"""Restaurant API routes."""
+
+from fastapi import APIRouter, HTTPException, Query
+
+from core import restaurant
+
+router = APIRouter()
+
+
+@router.get("")
+def list_restaurants(
+    limit: int = Query(100, le=500),
+    offset: int = Query(0, ge=0),
+    cuisine: str | None = None,
+    region: str | None = None,
+):
+    return restaurant.get_all(limit=limit, offset=offset, cuisine=cuisine, region=region)
+
+
+@router.get("/{restaurant_id}")
+def get_restaurant(restaurant_id: str):
+    r = restaurant.get_by_id(restaurant_id)
+    if not r:
+        raise HTTPException(404, "Restaurant not found")
+    return r
+
+
+@router.get("/{restaurant_id}/videos")
+def get_restaurant_videos(restaurant_id: str):
+    r = restaurant.get_by_id(restaurant_id)
+    if not r:
+        raise HTTPException(404, "Restaurant not found")
+    return restaurant.get_video_links(restaurant_id)
--- a/backend/api/routes/reviews.py
+++ b/backend/api/routes/reviews.py
@@ -0,0 +1,97 @@
+"""Review API routes."""
+
+from __future__ import annotations
+
+from datetime import date
+from typing import Optional
+
+from fastapi import APIRouter, Depends, HTTPException, Query
+from pydantic import BaseModel, Field
+
+from core import review
+from api.deps import get_current_user
+
+router = APIRouter()
+
+
+class ReviewCreate(BaseModel):
+    rating: float = Field(..., ge=0.5, le=5.0)
+    review_text: Optional[str] = None
+    visited_at: Optional[date] = None
+
+
+class ReviewUpdate(BaseModel):
+    rating: Optional[float] = Field(None, ge=0.5, le=5.0)
+    review_text: Optional[str] = None
+    visited_at: Optional[date] = None
+
+
+# --- Restaurant reviews ---
+
+@router.get("/restaurants/{restaurant_id}/reviews")
+def list_restaurant_reviews(
+    restaurant_id: str,
+    limit: int = Query(20, le=100),
+    offset: int = Query(0, ge=0),
+):
+    """List reviews for a restaurant (public)."""
+    reviews = review.get_reviews_for_restaurant(restaurant_id, limit=limit, offset=offset)
+    stats = review.get_restaurant_avg_rating(restaurant_id)
+    return {"reviews": reviews, **stats}
+
+
+@router.post("/restaurants/{restaurant_id}/reviews", status_code=201)
+def create_restaurant_review(
+    restaurant_id: str,
+    body: ReviewCreate,
+    current_user: dict = Depends(get_current_user),
+):
+    """Create a review for a restaurant (requires auth)."""
+    created = review.create_review(
+        user_id=current_user["sub"],
+        restaurant_id=restaurant_id,
+        rating=body.rating,
+        review_text=body.review_text,
+        visited_at=body.visited_at,
+    )
+    return created
+
+
+@router.put("/reviews/{review_id}")
+def update_review_route(
+    review_id: str,
+    body: ReviewUpdate,
+    current_user: dict = Depends(get_current_user),
+):
+    """Update a review (own review only)."""
+    updated = review.update_review(
+        review_id=review_id,
+        user_id=current_user["sub"],
+        rating=body.rating,
+        review_text=body.review_text,
+        visited_at=body.visited_at,
+    )
+    if not updated:
+        raise HTTPException(404, "Review not found or not yours")
+    return updated
+
+
+@router.delete("/reviews/{review_id}", status_code=204)
+def delete_review_route(
+    review_id: str,
+    current_user: dict = Depends(get_current_user),
+):
+    """Delete a review (own review only)."""
+    deleted = review.delete_review(review_id, current_user["sub"])
+    if not deleted:
+        raise HTTPException(404, "Review not found or not yours")
+
+
+@router.get("/users/me/reviews")
+def list_my_reviews(
+    limit: int = Query(20, le=100),
+    offset: int = Query(0, ge=0),
+    current_user: dict = Depends(get_current_user),
+):
+    """List current user's reviews."""
+    return review.get_user_reviews(current_user["sub"], limit=limit, offset=offset)
--- a/backend/api/routes/search.py
+++ b/backend/api/routes/search.py
@@ -0,0 +1,66 @@
+"""Search API routes — keyword + semantic search."""
+
+from fastapi import APIRouter, Query
+
+from core import restaurant, vector
+from core.db import conn
+
+router = APIRouter()
+
+
+@router.get("")
+def search_restaurants(
+    q: str = Query(..., min_length=1),
+    mode: str = Query("keyword", pattern="^(keyword|semantic|hybrid)$"),
+    limit: int = Query(20, le=100),
+):
+    """Search restaurants by keyword, semantic similarity, or hybrid."""
+    if mode == "semantic":
+        return _semantic_search(q, limit)
+    elif mode == "hybrid":
+        kw = _keyword_search(q, limit)
+        sem = _semantic_search(q, limit)
+        # merge: keyword results first, then semantic results not already in keyword
+        seen = {r["id"] for r in kw}
+        merged = list(kw)
+        for r in sem:
+            if r["id"] not in seen:
+                merged.append(r)
+                seen.add(r["id"])
+        return merged[:limit]
+    else:
+        return _keyword_search(q, limit)
+
+
+def _keyword_search(q: str, limit: int) -> list[dict]:
+    sql = """
+        SELECT id, name, address, region, latitude, longitude,
+               cuisine_type, price_range
+        FROM restaurants
+        WHERE latitude IS NOT NULL
+          AND (UPPER(name) LIKE UPPER(:q)
+               OR UPPER(address) LIKE UPPER(:q)
+               OR UPPER(region) LIKE UPPER(:q)
+               OR UPPER(cuisine_type) LIKE UPPER(:q))
+        FETCH FIRST :lim ROWS ONLY
+    """
+    pattern = f"%{q}%"
+    with conn() as c:
+        cur = c.cursor()
+        cur.execute(sql, {"q": pattern, "lim": limit})
+        cols = [d[0].lower() for d in cur.description]
+        return [dict(zip(cols, row)) for row in cur.fetchall()]
+
+
+def _semantic_search(q: str, limit: int) -> list[dict]:
+    similar = vector.search_similar(q, top_k=limit)
+    if not similar:
+        return []
+
+    rest_ids = list({s["restaurant_id"] for s in similar})
+    results = []
+    for rid in rest_ids[:limit]:
+        r = restaurant.get_by_id(rid)
+        if r and r.get("latitude"):
+            results.append(r)
+    return results
--- a/backend/api/routes/videos.py
+++ b/backend/api/routes/videos.py
@@ -0,0 +1,52 @@
+"""Video API routes."""
+
+from fastapi import APIRouter, Query
+
+from core.db import conn
+from core.pipeline import process_pending
+
+router = APIRouter()
+
+
+@router.get("")
+def list_videos(
+    status: str | None = None,
+    limit: int = Query(50, le=200),
+    offset: int = Query(0, ge=0),
+):
+    conditions = []
+    params: dict = {"lim": limit, "off": offset}
+    if status:
+        conditions.append("v.status = :st")
+        params["st"] = status
+
+    where = ("WHERE " + " AND ".join(conditions)) if conditions else ""
+    sql = f"""
+        SELECT v.id, v.video_id, v.title, v.url, v.status,
+               v.published_at, c.channel_name
+        FROM videos v
+        JOIN channels c ON c.id = v.channel_id
+        {where}
+        ORDER BY v.published_at DESC NULLS LAST
+        OFFSET :off ROWS FETCH NEXT :lim ROWS ONLY
+    """
+    with conn() as c:
+        cur = c.cursor()
+        cur.execute(sql, params)
+        cols = [d[0].lower() for d in cur.description]
+        rows = cur.fetchall()
+
+    results = []
+    for row in rows:
+        d = dict(zip(cols, row))
+        if d.get("published_at"):
+            d["published_at"] = d["published_at"].isoformat()
+        results.append(d)
+    return results
+
+
+@router.post("/process")
+def trigger_processing(limit: int = Query(5, le=20)):
+    """Manually trigger processing of pending videos."""
+    count = process_pending(limit)
+    return {"restaurants_extracted": count}
--- a/backend/core/init.py
+++ b/backend/core/init.py
--- a/backend/core/auth.py
+++ b/backend/core/auth.py
@@ -0,0 +1,122 @@
+"""Authentication helpers — Google OAuth2 + JWT."""
+
+from __future__ import annotations
+
+import os
+from datetime import datetime, timedelta, timezone
+
+import jwt
+import oracledb
+from google.oauth2 import id_token as google_id_token
+from google.auth.transport import requests as google_requests
+
+from core.db import conn
+
+JWT_SECRET = os.environ.get("JWT_SECRET", "tasteby-dev-secret-change-me")
+JWT_ALGORITHM = "HS256"
+JWT_EXPIRE_DAYS = 7
+
+
+def verify_google_token(token: str) -> dict:
+    """Verify a Google ID token and return user info.
+
+    Returns dict with keys: sub, email, name, picture.
+    Raises ValueError on invalid token.
+    """
+    info = google_id_token.verify_oauth2_token(
+        token, google_requests.Request(),
+    )
+    return {
+        "sub": info["sub"],
+        "email": info.get("email"),
+        "name": info.get("name"),
+        "picture": info.get("picture"),
+    }
+
+
+def find_or_create_user(
+    provider: str,
+    provider_id: str,
+    email: str | None = None,
+    nickname: str | None = None,
+    avatar_url: str | None = None,
+) -> dict:
+    """Find existing user or create new one. Returns user dict."""
+    # Try to find existing user
+    sql_find = """
+        SELECT id, provider, provider_id, email, nickname, avatar_url, created_at, last_login_at
+        FROM tasteby_users
+        WHERE provider = :provider AND provider_id = :provider_id
+    """
+    with conn() as c:
+        cur = c.cursor()
+        cur.execute(sql_find, {"provider": provider, "provider_id": provider_id})
+        row = cur.fetchone()
+
+        if row:
+            # Update last_login and optional fields
+            sql_update = """
+                UPDATE tasteby_users
+                SET last_login_at = SYSTIMESTAMP,
+                    email = COALESCE(:email, email),
+                    nickname = COALESCE(:nickname, nickname),
+                    avatar_url = COALESCE(:avatar_url, avatar_url)
+                WHERE id = :id
+            """
+            cur.execute(sql_update, {
+                "email": email, "nickname": nickname,
+                "avatar_url": avatar_url, "id": row[0],
+            })
+            return {
+                "id": row[0],
+                "provider": row[1],
+                "provider_id": row[2],
+                "email": email or row[3],
+                "nickname": nickname or row[4],
+                "avatar_url": avatar_url or row[5],
+            }
+
+        # Create new user
+        sql_insert = """
+            INSERT INTO tasteby_users (provider, provider_id, email, nickname, avatar_url, last_login_at)
+            VALUES (:provider, :provider_id, :email, :nickname, :avatar_url, SYSTIMESTAMP)
+            RETURNING id INTO :out_id
+        """
+        out_id = cur.var(oracledb.STRING)
+        cur.execute(sql_insert, {
+            "provider": provider,
+            "provider_id": provider_id,
+            "email": email,
+            "nickname": nickname,
+            "avatar_url": avatar_url,
+            "out_id": out_id,
+        })
+        new_id = out_id.getvalue()[0]
+        return {
+            "id": new_id,
+            "provider": provider,
+            "provider_id": provider_id,
+            "email": email,
+            "nickname": nickname,
+            "avatar_url": avatar_url,
+        }
+
+
+def create_jwt(user: dict) -> str:
+    """Create a JWT access token for the given user."""
+    payload = {
+        "sub": user["id"],
+        "email": user.get("email"),
+        "nickname": user.get("nickname"),
+        "exp": datetime.now(timezone.utc) + timedelta(days=JWT_EXPIRE_DAYS),
+        "iat": datetime.now(timezone.utc),
+    }
+    return jwt.encode(payload, JWT_SECRET, algorithm=JWT_ALGORITHM)
+
+
+def verify_jwt(token: str) -> dict:
+    """Verify a JWT and return the payload.
+
+    Raises jwt.InvalidTokenError on failure.
+    """
+    return jwt.decode(token, JWT_SECRET, algorithms=[JWT_ALGORITHM])
--- a/backend/core/db.py
+++ b/backend/core/db.py
@@ -0,0 +1,44 @@
+"""Oracle ADB connection pool — shared across all modules."""
+
+from __future__ import annotations
+
+import os
+from contextlib import contextmanager
+from typing import Generator, Optional
+
+import oracledb
+
+_pool: Optional[oracledb.ConnectionPool] = None
+
+
+def _get_pool() -> oracledb.ConnectionPool:
+    global _pool
+    if _pool is None:
+        kwargs: dict = dict(
+            user=os.environ["ORACLE_USER"],
+            password=os.environ["ORACLE_PASSWORD"],
+            dsn=os.environ["ORACLE_DSN"],
+            min=1,
+            max=5,
+            increment=1,
+        )
+        wallet = os.environ.get("ORACLE_WALLET")
+        if wallet:
+            kwargs["config_dir"] = wallet
+        _pool = oracledb.create_pool(**kwargs)
+    return _pool
+
+
+@contextmanager
+def conn() -> Generator[oracledb.Connection, None, None]:
+    """Acquire a pooled connection with auto-commit/rollback."""
+    pool = _get_pool()
+    c = pool.acquire()
+    try:
+        yield c
+        c.commit()
+    except Exception:
+        c.rollback()
+        raise
+    finally:
+        pool.release(c)
--- a/backend/core/extractor.py
+++ b/backend/core/extractor.py
@@ -0,0 +1,128 @@
+"""LLM-based restaurant info extraction from video transcripts.
+
+Uses OCI GenAI (Gemini Flash) to extract structured restaurant data.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import os
+import re
+
+import oci
+from oci.generative_ai_inference import GenerativeAiInferenceClient
+from oci.generative_ai_inference.models import (
+    ChatDetails,
+    GenericChatRequest,
+    OnDemandServingMode,
+    TextContent,
+    UserMessage,
+)
+
+logger = logging.getLogger(__name__)
+
+
+def _get_client() -> GenerativeAiInferenceClient:
+    config = oci.config.from_file()
+    endpoint = os.environ.get("OCI_CHAT_ENDPOINT") or os.environ["OCI_GENAI_ENDPOINT"]
+    return GenerativeAiInferenceClient(config, service_endpoint=endpoint)
+
+
+def _llm(prompt: str, max_tokens: int = 4096) -> str:
+    client = _get_client()
+    req = GenericChatRequest(
+        messages=[UserMessage(content=[TextContent(text=prompt)])],
+        max_tokens=max_tokens,
+        temperature=0,
+    )
+    det = ChatDetails(
+        compartment_id=os.environ["OCI_COMPARTMENT_ID"],
+        serving_mode=OnDemandServingMode(model_id=os.environ["OCI_CHAT_MODEL_ID"]),
+        chat_request=req,
+    )
+    resp = client.chat(det)
+    return resp.data.chat_response.choices[0].message.content[0].text.strip()
+
+
+def _parse_json(raw: str) -> dict | list:
+    raw = re.sub(r"^```(?:json)?\s*|\s*```$", "", raw, flags=re.MULTILINE).strip()
+    raw = re.sub(r",\s*([}\]])", r"\1", raw)
+    try:
+        return json.loads(raw)
+    except json.JSONDecodeError:
+        pass
+    try:
+        return json.JSONDecoder(strict=False).decode(raw)
+    except json.JSONDecodeError:
+        pass
+    # recover truncated array
+    if raw.lstrip().startswith("["):
+        decoder = json.JSONDecoder(strict=False)
+        items: list = []
+        idx = raw.index("[") + 1
+        while idx < len(raw):
+            while idx < len(raw) and raw[idx] in " \t\n\r,":
+                idx += 1
+            if idx >= len(raw) or raw[idx] == "]":
+                break
+            try:
+                obj, end = decoder.raw_decode(raw, idx)
+                items.append(obj)
+                idx = end
+            except json.JSONDecodeError:
+                break
+        if items:
+            return items
+    raise ValueError(f"JSON parse failed: {raw[:80]!r}")
+
+
+_EXTRACT_PROMPT = """\
+다음은 유튜브 먹방/맛집 영상의 자막입니다.
+이 영상에서 언급된 모든 식당 정보를 추출하세요.
+
+규칙:
+- 식당이 없으면 빈 배열 [] 반환
+- 각 식당에 대해 아래 필드를 JSON 배열로 반환
+- 확실하지 않은 정보는 null
+- 추가 설명 없이 JSON만 반환
+
+필드:
+- name: 식당 이름 (string, 필수)
+- address: 주소 또는 위치 힌트 (string | null)
+- region: 지역 (예: 서울 강남, 부산 해운대) (string | null)
+- cuisine_type: 음식 종류 (예: 한식, 일식, 중식, 양식, 카페) (string | null)
+- price_range: 가격대 (예: 1만원대, 2-3만원) (string | null)
+- foods_mentioned: 언급된 메뉴들 (string[])
+- evaluation: 평가 내용 (string | null)
+- guests: 함께한 게스트 (string[])
+
+영상 제목: {title}
+자막:
+{transcript}
+
+JSON 배열:"""
+
+
+def extract_restaurants(title: str, transcript: str) -> tuple[list[dict], str]:
+    """Extract restaurant info from a video transcript using LLM.
+
+    Returns (list of restaurant dicts, raw LLM response text).
+    """
+    # Truncate very long transcripts
+    if len(transcript) > 8000:
+        transcript = transcript[:7000] + "\n...(중략)...\n" + transcript[-1000:]
+
+    prompt = _EXTRACT_PROMPT.format(title=title, transcript=transcript)
+
+    try:
+        raw = _llm(prompt, max_tokens=4096)
+        result = _parse_json(raw)
+        if isinstance(result, list):
+            return result, raw
+        if isinstance(result, dict):
+            return [result], raw
+        return [], raw
+    except Exception as e:
+        logger.error("Restaurant extraction failed: %s", e)
+        return [], ""
--- a/backend/core/geocoding.py
+++ b/backend/core/geocoding.py
@@ -0,0 +1,97 @@
+"""Google Maps Geocoding + Place Search for restaurant location lookup."""
+
+from __future__ import annotations
+
+import logging
+import os
+
+import httpx
+
+logger = logging.getLogger(__name__)
+
+
+def _api_key() -> str:
+    return os.environ["GOOGLE_MAPS_API_KEY"]
+
+
+def geocode_restaurant(name: str, address: str | None = None, region: str | None = None) -> dict | None:
+    """Look up restaurant coordinates using Google Maps.
+
+    Tries Places Text Search first (more accurate for restaurant names),
+    falls back to Geocoding API.
+
+    Returns dict with: latitude, longitude, formatted_address, google_place_id
+    or None if not found.
+    """
+    query = name
+    if address:
+        query += f" {address}"
+    elif region:
+        query += f" {region}"
+
+    # Try Places Text Search (better for business names)
+    result = _places_text_search(query)
+    if result:
+        return result
+
+    # Fallback: Geocoding API
+    return _geocode(query)
+
+
+def _places_text_search(query: str) -> dict | None:
+    """Search for a place using Google Places Text Search API."""
+    try:
+        r = httpx.get(
+            "https://maps.googleapis.com/maps/api/place/textsearch/json",
+            params={
+                "query": query,
+                "key": _api_key(),
+                "language": "ko",
+                "type": "restaurant",
+            },
+            timeout=10,
+        )
+        r.raise_for_status()
+        data = r.json()
+
+        if data.get("status") == "OK" and data.get("results"):
+            place = data["results"][0]
+            loc = place["geometry"]["location"]
+            return {
+                "latitude": loc["lat"],
+                "longitude": loc["lng"],
+                "formatted_address": place.get("formatted_address", ""),
+                "google_place_id": place.get("place_id", ""),
+            }
+    except Exception as e:
+        logger.warning("Places text search failed for '%s': %s", query, e)
+    return None
+
+
+def _geocode(query: str) -> dict | None:
+    """Geocode an address string."""
+    try:
+        r = httpx.get(
+            "https://maps.googleapis.com/maps/api/geocode/json",
+            params={
+                "address": query,
+                "key": _api_key(),
+                "language": "ko",
+            },
+            timeout=10,
+        )
+        r.raise_for_status()
+        data = r.json()
+
+        if data.get("status") == "OK" and data.get("results"):
+            result = data["results"][0]
+            loc = result["geometry"]["location"]
+            return {
+                "latitude": loc["lat"],
+                "longitude": loc["lng"],
+                "formatted_address": result.get("formatted_address", ""),
+                "google_place_id": "",
+            }
+    except Exception as e:
+        logger.warning("Geocoding failed for '%s': %s", query, e)
+    return None
--- a/backend/core/pipeline.py
+++ b/backend/core/pipeline.py
@@ -0,0 +1,134 @@
+"""Data pipeline: process pending videos end-to-end.
+
+For each pending video:
+1. Fetch transcript
+2. Extract restaurant info via LLM
+3. Geocode each restaurant
+4. Save to DB + generate vector embeddings
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+
+from core import youtube, extractor, geocoding, restaurant, vector
+
+logger = logging.getLogger(__name__)
+
+
+def process_video(video: dict) -> int:
+    """Process a single pending video. Returns number of restaurants found."""
+    video_db_id = video["id"]
+    video_id = video["video_id"]
+    title = video["title"]
+
+    logger.info("Processing video: %s (%s)", title, video_id)
+    youtube.update_video_status(video_db_id, "processing")
+
+    try:
+        # 1. Transcript
+        transcript = youtube.get_transcript(video_id)
+        if not transcript:
+            logger.warning("No transcript for %s, marking done", video_id)
+            youtube.update_video_status(video_db_id, "done")
+            return 0
+
+        youtube.update_video_status(video_db_id, "processing", transcript)
+
+        # 2. LLM extraction
+        restaurants, llm_raw = extractor.extract_restaurants(title, transcript)
+        if not restaurants:
+            logger.info("No restaurants found in %s", video_id)
+            youtube.update_video_status(video_db_id, "done", llm_raw=llm_raw)
+            return 0
+
+        # 3-4. Geocode + save each restaurant
+        count = 0
+        for rest_data in restaurants:
+            name = rest_data.get("name")
+            if not name:
+                continue
+
+            # Geocode
+            geo = geocoding.geocode_restaurant(
+                name,
+                address=rest_data.get("address"),
+                region=rest_data.get("region"),
+            )
+
+            lat = geo["latitude"] if geo else None
+            lng = geo["longitude"] if geo else None
+            addr = geo["formatted_address"] if geo else rest_data.get("address")
+            place_id = geo["google_place_id"] if geo else None
+
+            # Upsert restaurant
+            rest_id = restaurant.upsert(
+                name=name,
+                address=addr,
+                region=rest_data.get("region"),
+                latitude=lat,
+                longitude=lng,
+                cuisine_type=rest_data.get("cuisine_type"),
+                price_range=rest_data.get("price_range"),
+                google_place_id=place_id,
+            )
+
+            # Link video <-> restaurant
+            restaurant.link_video_restaurant(
+                video_db_id=video_db_id,
+                restaurant_id=rest_id,
+                foods=rest_data.get("foods_mentioned"),
+                evaluation=rest_data.get("evaluation"),
+                guests=rest_data.get("guests"),
+            )
+
+            # Vector embeddings
+            chunks = _build_chunks(name, rest_data, title)
+            if chunks:
+                vector.save_restaurant_vectors(rest_id, chunks)
+
+            count += 1
+            logger.info("Saved restaurant: %s (geocoded=%s)", name, bool(geo))
+
+        youtube.update_video_status(video_db_id, "done", llm_raw=llm_raw)
+        logger.info("Video %s done: %d restaurants", video_id, count)
+        return count
+
+    except Exception as e:
+        logger.error("Pipeline error for %s: %s", video_id, e, exc_info=True)
+        youtube.update_video_status(video_db_id, "error")
+        return 0
+
+
+def _build_chunks(name: str, data: dict, video_title: str) -> list[str]:
+    """Build text chunks for vector embedding."""
+    parts = [f"식당: {name}"]
+    if data.get("region"):
+        parts.append(f"지역: {data['region']}")
+    if data.get("cuisine_type"):
+        parts.append(f"음식 종류: {data['cuisine_type']}")
+    if data.get("foods_mentioned"):
+        foods = data["foods_mentioned"]
+        if isinstance(foods, list):
+            parts.append(f"메뉴: {', '.join(foods)}")
+    if data.get("evaluation"):
+        parts.append(f"평가: {data['evaluation']}")
+    if data.get("price_range"):
+        parts.append(f"가격대: {data['price_range']}")
+    parts.append(f"영상: {video_title}")
+
+    return ["\n".join(parts)]
+
+
+def process_pending(limit: int = 5) -> int:
+    """Process up to `limit` pending videos. Returns total restaurants found."""
+    videos = youtube.get_pending_videos(limit)
+    if not videos:
+        logger.info("No pending videos")
+        return 0
+
+    total = 0
+    for v in videos:
+        total += process_video(v)
+    return total
--- a/backend/core/restaurant.py
+++ b/backend/core/restaurant.py
@@ -0,0 +1,205 @@
+"""Restaurant DB operations — save extracted data, link to videos."""
+
+from __future__ import annotations
+
+import json
+
+import oracledb
+
+from core.db import conn
+
+
+def find_by_name(name: str) -> dict | None:
+    """Find a restaurant by exact name match."""
+    sql = "SELECT id, name, address, region, latitude, longitude FROM restaurants WHERE name = :n"
+    with conn() as c:
+        cur = c.cursor()
+        cur.execute(sql, {"n": name})
+        r = cur.fetchone()
+        if r:
+            return {
+                "id": r[0], "name": r[1], "address": r[2],
+                "region": r[3], "latitude": r[4], "longitude": r[5],
+            }
+    return None
+
+
+def upsert(
+    name: str,
+    address: str | None = None,
+    region: str | None = None,
+    latitude: float | None = None,
+    longitude: float | None = None,
+    cuisine_type: str | None = None,
+    price_range: str | None = None,
+    google_place_id: str | None = None,
+) -> str:
+    """Insert or update a restaurant. Returns row id."""
+    existing = find_by_name(name)
+    if existing:
+        sql = """
+            UPDATE restaurants
+            SET address = COALESCE(:addr, address),
+                region = COALESCE(:reg, region),
+                latitude = COALESCE(:lat, latitude),
+                longitude = COALESCE(:lng, longitude),
+                cuisine_type = COALESCE(:cuisine, cuisine_type),
+                price_range = COALESCE(:price, price_range),
+                google_place_id = COALESCE(:gid, google_place_id),
+                updated_at = SYSTIMESTAMP
+            WHERE id = :id
+        """
+        with conn() as c:
+            c.cursor().execute(sql, {
+                "addr": address, "reg": region,
+                "lat": latitude, "lng": longitude,
+                "cuisine": cuisine_type, "price": price_range,
+                "gid": google_place_id, "id": existing["id"],
+            })
+        return existing["id"]
+
+    sql = """
+        INSERT INTO restaurants (name, address, region, latitude, longitude,
+                                 cuisine_type, price_range, google_place_id)
+        VALUES (:name, :addr, :reg, :lat, :lng, :cuisine, :price, :gid)
+        RETURNING id INTO :out_id
+    """
+    with conn() as c:
+        cur = c.cursor()
+        out_id = cur.var(oracledb.STRING)
+        cur.execute(sql, {
+            "name": name, "addr": address, "reg": region,
+            "lat": latitude, "lng": longitude,
+            "cuisine": cuisine_type, "price": price_range,
+            "gid": google_place_id, "out_id": out_id,
+        })
+        return out_id.getvalue()[0]
+
+
+def link_video_restaurant(
+    video_db_id: str,
+    restaurant_id: str,
+    foods: list[str] | None = None,
+    evaluation: str | None = None,
+    guests: list[str] | None = None,
+    citation: str | None = None,
+) -> str | None:
+    """Create video-restaurant mapping. Returns row id or None if duplicate."""
+    sql = """
+        INSERT INTO video_restaurants
+            (video_id, restaurant_id, foods_mentioned, evaluation, guests, citation_text)
+        VALUES (:vid, :rid, :foods, :eval, :guests, :cite)
+        RETURNING id INTO :out_id
+    """
+    with conn() as c:
+        cur = c.cursor()
+        out_id = cur.var(oracledb.STRING)
+        try:
+            cur.execute(sql, {
+                "vid": video_db_id,
+                "rid": restaurant_id,
+                "foods": json.dumps(foods or [], ensure_ascii=False),
+                "eval": json.dumps({"text": evaluation} if evaluation else {}, ensure_ascii=False),
+                "guests": json.dumps(guests or [], ensure_ascii=False),
+                "cite": citation,
+                "out_id": out_id,
+            })
+            return out_id.getvalue()[0]
+        except Exception as e:
+            if "UQ_VR_VIDEO_REST" in str(e).upper():
+                return None
+            raise
+
+
+def get_all(
+    limit: int = 100,
+    offset: int = 0,
+    cuisine: str | None = None,
+    region: str | None = None,
+) -> list[dict]:
+    """List restaurants with optional filters."""
+    conditions = ["latitude IS NOT NULL"]
+    params: dict = {"lim": limit, "off": offset}
+
+    if cuisine:
+        conditions.append("cuisine_type = :cuisine")
+        params["cuisine"] = cuisine
+    if region:
+        conditions.append("region LIKE :region")
+        params["region"] = f"%{region}%"
+
+    where = " AND ".join(conditions)
+    sql = f"""
+        SELECT id, name, address, region, latitude, longitude,
+               cuisine_type, price_range, google_place_id
+        FROM restaurants
+        WHERE {where}
+        ORDER BY updated_at DESC
+        OFFSET :off ROWS FETCH NEXT :lim ROWS ONLY
+    """
+    with conn() as c:
+        cur = c.cursor()
+        cur.execute(sql, params)
+        cols = [d[0].lower() for d in cur.description]
+        return [dict(zip(cols, row)) for row in cur.fetchall()]
+
+
+def get_by_id(restaurant_id: str) -> dict | None:
+    sql = """
+        SELECT r.id, r.name, r.address, r.region, r.latitude, r.longitude,
+               r.cuisine_type, r.price_range, r.phone, r.website, r.google_place_id
+        FROM restaurants r
+        WHERE r.id = :id
+    """
+    with conn() as c:
+        cur = c.cursor()
+        cur.execute(sql, {"id": restaurant_id})
+        row = cur.fetchone()
+        if not row:
+            return None
+        cols = [d[0].lower() for d in cur.description]
+        return dict(zip(cols, row))
+
+
+def get_video_links(restaurant_id: str) -> list[dict]:
+    """Get all video appearances for a restaurant."""
+    sql = """
+        SELECT v.video_id, v.title, v.url, v.published_at,
+               vr.foods_mentioned, vr.evaluation, vr.guests
+        FROM video_restaurants vr
+        JOIN videos v ON v.id = vr.video_id
+        WHERE vr.restaurant_id = :rid
+        ORDER BY v.published_at DESC
+    """
+    with conn() as c:
+        cur = c.cursor()
+        cur.execute(sql, {"rid": restaurant_id})
+        results = []
+        for r in cur.fetchall():
+            foods_raw = r[4].read() if hasattr(r[4], "read") else r[4]
+            eval_raw = r[5].read() if hasattr(r[5], "read") else r[5]
+            guests_raw = r[6].read() if hasattr(r[6], "read") else r[6]
+            results.append({
+                "video_id": r[0],
+                "title": r[1],
+                "url": r[2],
+                "published_at": r[3].isoformat() if r[3] else None,
+                "foods_mentioned": _parse_json_field(foods_raw, []),
+                "evaluation": _parse_json_field(eval_raw, {}),
+                "guests": _parse_json_field(guests_raw, []),
+            })
+        return results
+
+
+def _parse_json_field(val, default):
+    """Parse a JSON field that may be a string, already-parsed object, or None."""
+    if val is None:
+        return default
+    if isinstance(val, (list, dict)):
+        return val
+    if isinstance(val, str):
+        try:
+            return json.loads(val)
+        except (json.JSONDecodeError, ValueError):
+            return default
+    return default
--- a/backend/core/review.py
+++ b/backend/core/review.py
@@ -0,0 +1,189 @@
+"""User review DB operations."""
+
+from __future__ import annotations
+
+from datetime import date
+
+import oracledb
+
+from core.db import conn
+
+
+def create_review(
+    user_id: str,
+    restaurant_id: str,
+    rating: float,
+    review_text: str | None = None,
+    visited_at: date | None = None,
+) -> dict:
+    """Create a new review. Returns the created review dict."""
+    sql = """
+        INSERT INTO user_reviews (user_id, restaurant_id, rating, review_text, visited_at)
+        VALUES (:user_id, :restaurant_id, :rating, :review_text, :visited_at)
+        RETURNING id INTO :out_id
+    """
+    with conn() as c:
+        cur = c.cursor()
+        out_id = cur.var(oracledb.STRING)
+        cur.execute(sql, {
+            "user_id": user_id,
+            "restaurant_id": restaurant_id,
+            "rating": rating,
+            "review_text": review_text,
+            "visited_at": visited_at,
+            "out_id": out_id,
+        })
+        new_id = out_id.getvalue()[0]
+
+    return get_review_by_id(new_id)
+
+
+def update_review(
+    review_id: str,
+    user_id: str,
+    rating: float | None = None,
+    review_text: str | None = None,
+    visited_at: date | None = None,
+) -> dict:
+    """Update an existing review. Only the owner can update.
+
+    Returns the updated review dict, or None if not found / not owner.
+    """
+    sql = """
+        UPDATE user_reviews
+        SET rating = COALESCE(:rating, rating),
+            review_text = COALESCE(:review_text, review_text),
+            visited_at = COALESCE(:visited_at, visited_at),
+            updated_at = SYSTIMESTAMP
+        WHERE id = :id AND user_id = :user_id
+    """
+    with conn() as c:
+        cur = c.cursor()
+        cur.execute(sql, {
+            "rating": rating,
+            "review_text": review_text,
+            "visited_at": visited_at,
+            "id": review_id,
+            "user_id": user_id,
+        })
+        if cur.rowcount == 0:
+            return None
+
+    return get_review_by_id(review_id)
+
+
+def delete_review(review_id: str, user_id: str) -> bool:
+    """Delete a review. Only the owner can delete. Returns True if deleted."""
+    sql = "DELETE FROM user_reviews WHERE id = :id AND user_id = :user_id"
+    with conn() as c:
+        cur = c.cursor()
+        cur.execute(sql, {"id": review_id, "user_id": user_id})
+        return cur.rowcount > 0
+
+
+def get_review_by_id(review_id: str) -> dict | None:
+    """Get a single review by ID."""
+    sql = """
+        SELECT r.id, r.user_id, r.restaurant_id, r.rating, r.review_text,
+               r.visited_at, r.created_at, r.updated_at,
+               u.nickname, u.avatar_url
+        FROM user_reviews r
+        JOIN tasteby_users u ON u.id = r.user_id
+        WHERE r.id = :id
+    """
+    with conn() as c:
+        cur = c.cursor()
+        cur.execute(sql, {"id": review_id})
+        row = cur.fetchone()
+        if not row:
+            return None
+        return _row_to_dict(row)
+
+
+def get_reviews_for_restaurant(
+    restaurant_id: str,
+    limit: int = 20,
+    offset: int = 0,
+) -> list[dict]:
+    """List reviews for a restaurant, including user nickname/avatar."""
+    sql = """
+        SELECT r.id, r.user_id, r.restaurant_id, r.rating, r.review_text,
+               r.visited_at, r.created_at, r.updated_at,
+               u.nickname, u.avatar_url
+        FROM user_reviews r
+        JOIN tasteby_users u ON u.id = r.user_id
+        WHERE r.restaurant_id = :restaurant_id
+        ORDER BY r.created_at DESC
+        OFFSET :off ROWS FETCH NEXT :lim ROWS ONLY
+    """
+    with conn() as c:
+        cur = c.cursor()
+        cur.execute(sql, {
+            "restaurant_id": restaurant_id,
+            "off": offset,
+            "lim": limit,
+        })
+        return [_row_to_dict(row) for row in cur.fetchall()]
+
+
+def get_user_reviews(
+    user_id: str,
+    limit: int = 20,
+    offset: int = 0,
+) -> list[dict]:
+    """List reviews by a specific user."""
+    sql = """
+        SELECT r.id, r.user_id, r.restaurant_id, r.rating, r.review_text,
+               r.visited_at, r.created_at, r.updated_at,
+               u.nickname, u.avatar_url
+        FROM user_reviews r
+        JOIN tasteby_users u ON u.id = r.user_id
+        WHERE r.user_id = :user_id
+        ORDER BY r.created_at DESC
+        OFFSET :off ROWS FETCH NEXT :lim ROWS ONLY
+    """
+    with conn() as c:
+        cur = c.cursor()
+        cur.execute(sql, {
+            "user_id": user_id,
+            "off": offset,
+            "lim": limit,
+        })
+        return [_row_to_dict(row) for row in cur.fetchall()]
+
+
+def get_restaurant_avg_rating(restaurant_id: str) -> dict:
+    """Get average rating and review count for a restaurant."""
+    sql = """
+        SELECT ROUND(AVG(rating), 1) AS avg_rating, COUNT(*) AS review_count
+        FROM user_reviews
+        WHERE restaurant_id = :restaurant_id
+    """
+    with conn() as c:
+        cur = c.cursor()
+        cur.execute(sql, {"restaurant_id": restaurant_id})
+        row = cur.fetchone()
+        return {
+            "avg_rating": float(row[0]) if row[0] else None,
+            "review_count": int(row[1]),
+        }
+
+
+def _row_to_dict(row) -> dict:
+    """Convert a review query row to a dict."""
+    review_text = row[4]
+    if hasattr(review_text, "read"):
+        review_text = review_text.read()
+
+    return {
+        "id": row[0],
+        "user_id": row[1],
+        "restaurant_id": row[2],
+        "rating": float(row[3]),
+        "review_text": review_text,
+        "visited_at": row[5].isoformat() if row[5] else None,
+        "created_at": row[6].isoformat() if row[6] else None,
+        "updated_at": row[7].isoformat() if row[7] else None,
+        "user_nickname": row[8],
+        "user_avatar_url": row[9],
+    }
--- a/backend/core/vector.py
+++ b/backend/core/vector.py
@@ -0,0 +1,97 @@
+"""Vector embedding generation and storage for restaurant semantic search."""
+
+from __future__ import annotations
+
+import array
+import os
+
+import oci
+from oci.generative_ai_inference import GenerativeAiInferenceClient
+from oci.generative_ai_inference.models import (
+    EmbedTextDetails,
+    OnDemandServingMode,
+)
+
+from core.db import conn
+
+
+def _embed_texts(texts: list[str]) -> list[list[float]]:
+    config = oci.config.from_file()
+    client = GenerativeAiInferenceClient(
+        config,
+        service_endpoint=os.environ["OCI_GENAI_ENDPOINT"],
+    )
+    model_id = os.environ.get("OCI_EMBED_MODEL_ID", "cohere.embed-v4.0")
+    compartment_id = os.environ["OCI_COMPARTMENT_ID"]
+
+    details = EmbedTextDetails(
+        inputs=texts,
+        serving_mode=OnDemandServingMode(model_id=model_id),
+        compartment_id=compartment_id,
+        input_type="SEARCH_DOCUMENT",
+    )
+    response = client.embed_text(details)
+    return response.data.embeddings
+
+
+def _to_vec(embedding: list[float]) -> array.array:
+    return array.array("f", embedding)
+
+
+def save_restaurant_vectors(restaurant_id: str, chunks: list[str]) -> list[str]:
+    """Embed and store text chunks for a restaurant.
+
+    Returns list of inserted row IDs.
+    """
+    if not chunks:
+        return []
+
+    embeddings = _embed_texts(chunks)
+    inserted: list[str] = []
+
+    sql = """
+        INSERT INTO restaurant_vectors (restaurant_id, chunk_text, embedding)
+        VALUES (:rid, :chunk, :emb)
+        RETURNING id INTO :out_id
+    """
+    import oracledb
+    with conn() as c:
+        cur = c.cursor()
+        for chunk, emb in zip(chunks, embeddings):
+            out_id = cur.var(oracledb.STRING)
+            cur.execute(sql, {
+                "rid": restaurant_id,
+                "chunk": chunk,
+                "emb": _to_vec(emb),
+                "out_id": out_id,
+            })
+            inserted.append(out_id.getvalue()[0])
+    return inserted
+
+
+def search_similar(query: str, top_k: int = 10) -> list[dict]:
+    """Semantic search: find restaurants similar to query text.
+
+    Returns list of dicts: restaurant_id, chunk_text, distance.
+    """
+    embeddings = _embed_texts([query])
+    query_vec = _to_vec(embeddings[0])
+
+    sql = """
+        SELECT rv.restaurant_id, rv.chunk_text,
+               VECTOR_DISTANCE(rv.embedding, :qvec, COSINE) AS dist
+        FROM restaurant_vectors rv
+        ORDER BY dist
+        FETCH FIRST :k ROWS ONLY
+    """
+    with conn() as c:
+        cur = c.cursor()
+        cur.execute(sql, {"qvec": query_vec, "k": top_k})
+        return [
+            {
+                "restaurant_id": r[0],
+                "chunk_text": r[1].read() if hasattr(r[1], "read") else r[1],
+                "distance": r[2],
+            }
+            for r in cur.fetchall()
+        ]
--- a/backend/core/youtube.py
+++ b/backend/core/youtube.py
@@ -0,0 +1,221 @@
+"""YouTube channel scanner + transcript extraction.
+
+Uses YouTube Data API v3 for channel video listing,
+youtube-transcript-api for transcript extraction.
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+import re
+from datetime import datetime
+
+import httpx
+from youtube_transcript_api import YouTubeTranscriptApi
+
+from core.db import conn
+
+logger = logging.getLogger(__name__)
+
+
+def _api_key() -> str:
+    return os.environ["YOUTUBE_DATA_API_KEY"]
+
+
+def extract_video_id(url: str) -> str:
+    match = re.search(r"(?:v=|youtu\.be/)([^&?/\s]+)", url)
+    if not match:
+        raise ValueError(f"Cannot extract video ID from URL: {url}")
+    return match.group(1)
+
+
+# -- Channel operations -------------------------------------------------------
+
+def add_channel(channel_id: str, channel_name: str) -> str:
+    """Register a YouTube channel. Returns DB row id."""
+    sql = """
+        INSERT INTO channels (channel_id, channel_name, channel_url)
+        VALUES (:cid, :cname, :curl)
+        RETURNING id INTO :out_id
+    """
+    with conn() as c:
+        cur = c.cursor()
+        import oracledb
+        out_id = cur.var(oracledb.STRING)
+        cur.execute(sql, {
+            "cid": channel_id,
+            "cname": channel_name,
+            "curl": f"https://www.youtube.com/channel/{channel_id}",
+            "out_id": out_id,
+        })
+        return out_id.getvalue()[0]
+
+
+def get_active_channels() -> list[dict]:
+    sql = "SELECT id, channel_id, channel_name FROM channels WHERE is_active = 1"
+    with conn() as c:
+        cur = c.cursor()
+        cur.execute(sql)
+        return [
+            {"id": r[0], "channel_id": r[1], "channel_name": r[2]}
+            for r in cur.fetchall()
+        ]
+
+
+# -- Video listing via YouTube Data API v3 ------------------------------------
+
+def fetch_channel_videos(
+    channel_id: str,
+    max_results: int = 50,
+    published_after: str | None = None,
+) -> list[dict]:
+    """Fetch video list from a YouTube channel via Data API v3.
+
+    Returns list of dicts: video_id, title, published_at, url.
+    """
+    params: dict = {
+        "key": _api_key(),
+        "channelId": channel_id,
+        "part": "snippet",
+        "order": "date",
+        "maxResults": min(max_results, 50),
+        "type": "video",
+    }
+    if published_after:
+        params["publishedAfter"] = published_after
+
+    videos: list[dict] = []
+    next_page = None
+
+    while True:
+        if next_page:
+            params["pageToken"] = next_page
+
+        r = httpx.get(
+            "https://www.googleapis.com/youtube/v3/search",
+            params=params,
+            timeout=15,
+        )
+        r.raise_for_status()
+        data = r.json()
+
+        for item in data.get("items", []):
+            snippet = item["snippet"]
+            vid = item["id"]["videoId"]
+            videos.append({
+                "video_id": vid,
+                "title": snippet["title"],
+                "published_at": snippet["publishedAt"],
+                "url": f"https://www.youtube.com/watch?v={vid}",
+            })
+
+        next_page = data.get("nextPageToken")
+        if not next_page or len(videos) >= max_results:
+            break
+
+    return videos[:max_results]
+
+
+# -- Transcript extraction ----------------------------------------------------
+
+def get_transcript(video_id: str) -> str | None:
+    """Fetch transcript text for a video. Returns None if unavailable."""
+    try:
+        fetched = YouTubeTranscriptApi().fetch(video_id, languages=["ko", "en"])
+        return " ".join(seg.text for seg in fetched)
+    except Exception as e:
+        logger.warning("Transcript unavailable for %s: %s", video_id, e)
+        return None
+
+
+# -- DB operations for videos -------------------------------------------------
+
+def save_video(channel_db_id: str, video: dict) -> str | None:
+    """Insert a video row if not exists. Returns row id or None if duplicate."""
+    sql = """
+        INSERT INTO videos (channel_id, video_id, title, url, published_at, status)
+        VALUES (:ch_id, :vid, :title, :url, :pub_at, 'pending')
+        RETURNING id INTO :out_id
+    """
+    with conn() as c:
+        cur = c.cursor()
+        import oracledb
+        out_id = cur.var(oracledb.STRING)
+        try:
+            pub_at = None
+            if video.get("published_at"):
+                pub_at = datetime.fromisoformat(
+                    video["published_at"].replace("Z", "+00:00")
+                )
+            cur.execute(sql, {
+                "ch_id": channel_db_id,
+                "vid": video["video_id"],
+                "title": video["title"],
+                "url": video["url"],
+                "pub_at": pub_at,
+                "out_id": out_id,
+            })
+            return out_id.getvalue()[0]
+        except Exception as e:
+            if "UQ_VIDEOS_VID" in str(e).upper():
+                return None  # duplicate
+            raise
+
+
+def get_pending_videos(limit: int = 10) -> list[dict]:
+    sql = """
+        SELECT id, video_id, title, url
+        FROM videos
+        WHERE status = 'pending'
+        ORDER BY created_at
+        FETCH FIRST :n ROWS ONLY
+    """
+    with conn() as c:
+        cur = c.cursor()
+        cur.execute(sql, {"n": limit})
+        return [
+            {"id": r[0], "video_id": r[1], "title": r[2], "url": r[3]}
+            for r in cur.fetchall()
+        ]
+
+
+def update_video_status(
+    video_db_id: str,
+    status: str,
+    transcript: str | None = None,
+    llm_raw: str | None = None,
+) -> None:
+    sets = ["status = :st", "processed_at = SYSTIMESTAMP"]
+    params: dict = {"st": status, "vid": video_db_id}
+    if transcript:
+        sets.append("transcript_text = :txt")
+        params["txt"] = transcript
+    if llm_raw:
+        sets.append("llm_raw_response = :llm_resp")
+        params["llm_resp"] = llm_raw
+    sql = f"UPDATE videos SET {', '.join(sets)} WHERE id = :vid"
+    with conn() as c:
+        c.cursor().execute(sql, params)
+
+
+# -- Scan: fetch new videos for all active channels ---------------------------
+
+def scan_all_channels(max_per_channel: int = 50) -> int:
+    """Scan all active channels for new videos. Returns count of new videos."""
+    channels = get_active_channels()
+    total_new = 0
+    for ch in channels:
+        try:
+            videos = fetch_channel_videos(ch["channel_id"], max_per_channel)
+            for v in videos:
+                row_id = save_video(ch["id"], v)
+                if row_id:
+                    total_new += 1
+            logger.info(
+                "Channel %s: fetched %d videos, %d new",
+                ch["channel_name"], len(videos), total_new,
+            )
+        except Exception as e:
+            logger.error("Failed to scan channel %s: %s", ch["channel_name"], e)
+    return total_new
--- a/backend/daemon/init.py
+++ b/backend/daemon/init.py
--- a/backend/daemon/worker.py
+++ b/backend/daemon/worker.py
@@ -0,0 +1,37 @@
+"""Daemon worker: periodic channel scan + video processing."""
+
+from __future__ import annotations
+
+import logging
+import time
+
+from core.youtube import scan_all_channels
+from core.pipeline import process_pending
+
+logger = logging.getLogger(__name__)
+
+
+def run_once() -> None:
+    """Single daemon cycle: scan channels then process pending videos."""
+    logger.info("=== Daemon cycle start ===")
+    try:
+        new_count = scan_all_channels()
+        logger.info("Scan complete: %d new videos", new_count)
+    except Exception as e:
+        logger.error("Channel scan failed: %s", e)
+
+    try:
+        rest_count = process_pending(limit=10)
+        logger.info("Processing complete: %d restaurants extracted", rest_count)
+    except Exception as e:
+        logger.error("Video processing failed: %s", e)
+
+    logger.info("=== Daemon cycle end ===")
+
+
+def run_loop(interval: int = 3600) -> None:
+    """Run daemon in a loop with configurable interval (default 1 hour)."""
+    logger.info("Daemon started (interval=%ds)", interval)
+    while True:
+        run_once()
+        time.sleep(interval)
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@@ -0,0 +1,10 @@
+fastapi>=0.115
+uvicorn[standard]>=0.34
+python-dotenv>=1.0
+oracledb>=2.5
+oci>=2.140
+httpx>=0.28
+youtube-transcript-api>=1.0
+pydantic>=2.0
+pyjwt[crypto]>=2.8
+google-auth>=2.28
--- a/backend/run_api.py
+++ b/backend/run_api.py
@@ -0,0 +1,9 @@
+"""Run the FastAPI server."""
+
+from dotenv import load_dotenv
+load_dotenv()
+
+import uvicorn
+
+if __name__ == "__main__":
+    uvicorn.run("api.main:app", host="0.0.0.0", port=8000, reload=True)
--- a/backend/run_daemon.py
+++ b/backend/run_daemon.py
@@ -0,0 +1,18 @@
+"""Run the daemon worker."""
+
+import logging
+import os
+
+from dotenv import load_dotenv
+load_dotenv()
+
+from daemon.worker import run_loop
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
+)
+
+if __name__ == "__main__":
+    interval = int(os.environ.get("DAEMON_INTERVAL", "3600"))
+    run_loop(interval)
--- a/backend/sql/schema.sql
+++ b/backend/sql/schema.sql
@@ -0,0 +1,127 @@
+-- Tasteby: Oracle ADB (23ai) Schema
+-- Run against Oracle ADB before starting the backend.
+
+-- 1. channels
+CREATE TABLE channels (
+    id            VARCHAR2(36)  DEFAULT SYS_GUID()   NOT NULL,
+    channel_id    VARCHAR2(50)                        NOT NULL,
+    channel_name  VARCHAR2(200)                       NOT NULL,
+    channel_url   VARCHAR2(500),
+    is_active     NUMBER(1)     DEFAULT 1             NOT NULL,
+    created_at    TIMESTAMP     DEFAULT SYSTIMESTAMP  NOT NULL,
+    CONSTRAINT pk_channels PRIMARY KEY (id),
+    CONSTRAINT uq_channels_cid UNIQUE (channel_id)
+);
+
+-- 2. videos
+CREATE TABLE videos (
+    id              VARCHAR2(36)  DEFAULT SYS_GUID()   NOT NULL,
+    channel_id      VARCHAR2(36)                        NOT NULL,
+    video_id        VARCHAR2(20)                        NOT NULL,
+    title           VARCHAR2(500)                       NOT NULL,
+    url             VARCHAR2(500)                       NOT NULL,
+    published_at    TIMESTAMP,
+    transcript_text CLOB,
+    status          VARCHAR2(20)  DEFAULT 'pending'     NOT NULL,
+    processed_at    TIMESTAMP,
+    created_at      TIMESTAMP     DEFAULT SYSTIMESTAMP  NOT NULL,
+    CONSTRAINT pk_videos PRIMARY KEY (id),
+    CONSTRAINT uq_videos_vid UNIQUE (video_id),
+    CONSTRAINT fk_videos_channel FOREIGN KEY (channel_id) REFERENCES channels(id),
+    CONSTRAINT ck_videos_status CHECK (status IN ('pending','processing','done','error'))
+);
+
+-- 3. restaurants
+CREATE TABLE restaurants (
+    id              VARCHAR2(36)  DEFAULT SYS_GUID()   NOT NULL,
+    name            VARCHAR2(200)                       NOT NULL,
+    address         VARCHAR2(500),
+    region          VARCHAR2(100),
+    latitude        NUMBER(10,7),
+    longitude       NUMBER(10,7),
+    cuisine_type    VARCHAR2(100),
+    price_range     VARCHAR2(50),
+    phone           VARCHAR2(50),
+    website         VARCHAR2(500),
+    google_place_id VARCHAR2(200),
+    created_at      TIMESTAMP     DEFAULT SYSTIMESTAMP  NOT NULL,
+    updated_at      TIMESTAMP     DEFAULT SYSTIMESTAMP  NOT NULL,
+    CONSTRAINT pk_restaurants PRIMARY KEY (id)
+);
+
+-- 4. video_restaurants (N:M)
+CREATE TABLE video_restaurants (
+    id              VARCHAR2(36)  DEFAULT SYS_GUID()   NOT NULL,
+    video_id        VARCHAR2(36)                        NOT NULL,
+    restaurant_id   VARCHAR2(36)                        NOT NULL,
+    foods_mentioned CLOB,
+    evaluation      CLOB,
+    guests          CLOB,
+    air_date        DATE,
+    citation_text   CLOB,
+    created_at      TIMESTAMP     DEFAULT SYSTIMESTAMP  NOT NULL,
+    CONSTRAINT pk_video_restaurants PRIMARY KEY (id),
+    CONSTRAINT fk_vr_video FOREIGN KEY (video_id) REFERENCES videos(id),
+    CONSTRAINT fk_vr_restaurant FOREIGN KEY (restaurant_id) REFERENCES restaurants(id),
+    CONSTRAINT uq_vr_video_rest UNIQUE (video_id, restaurant_id)
+);
+
+-- JSON check constraints for CLOB JSON columns
+-- (Oracle 23ai supports IS JSON on CLOB)
+ALTER TABLE video_restaurants ADD CONSTRAINT ck_vr_foods CHECK (foods_mentioned IS JSON);
+ALTER TABLE video_restaurants ADD CONSTRAINT ck_vr_eval  CHECK (evaluation IS JSON);
+ALTER TABLE video_restaurants ADD CONSTRAINT ck_vr_guests CHECK (guests IS JSON);
+
+-- 5. tasteby_users (NOT "users" — that table belongs to another project)
+CREATE TABLE tasteby_users (
+    id            VARCHAR2(36)  DEFAULT SYS_GUID()   NOT NULL,
+    provider      VARCHAR2(20)                        NOT NULL,
+    provider_id   VARCHAR2(200)                       NOT NULL,
+    email         VARCHAR2(200),
+    nickname      VARCHAR2(100),
+    avatar_url    VARCHAR2(500),
+    created_at    TIMESTAMP     DEFAULT SYSTIMESTAMP  NOT NULL,
+    last_login_at TIMESTAMP,
+    CONSTRAINT pk_tasteby_users PRIMARY KEY (id),
+    CONSTRAINT uq_tasteby_users_provider UNIQUE (provider, provider_id),
+    CONSTRAINT ck_tasteby_users_provider CHECK (provider IN ('google','apple'))
+);
+
+-- 6. user_reviews
+CREATE TABLE user_reviews (
+    id              VARCHAR2(36)  DEFAULT SYS_GUID()   NOT NULL,
+    user_id         VARCHAR2(36)                        NOT NULL,
+    restaurant_id   VARCHAR2(36)                        NOT NULL,
+    rating          NUMBER(2,1)                         NOT NULL,
+    review_text     CLOB,
+    visited_at      DATE,
+    created_at      TIMESTAMP     DEFAULT SYSTIMESTAMP  NOT NULL,
+    updated_at      TIMESTAMP     DEFAULT SYSTIMESTAMP  NOT NULL,
+    CONSTRAINT pk_user_reviews PRIMARY KEY (id),
+    CONSTRAINT fk_ur_user FOREIGN KEY (user_id) REFERENCES tasteby_users(id),
+    CONSTRAINT fk_ur_restaurant FOREIGN KEY (restaurant_id) REFERENCES restaurants(id),
+    CONSTRAINT ck_ur_rating CHECK (rating BETWEEN 0.5 AND 5.0)
+);
+
+-- 7. restaurant_vectors (semantic search)
+CREATE TABLE restaurant_vectors (
+    id              VARCHAR2(36)  DEFAULT SYS_GUID()   NOT NULL,
+    restaurant_id   VARCHAR2(36)                        NOT NULL,
+    chunk_text      CLOB                                NOT NULL,
+    embedding       VECTOR(1536, FLOAT32),
+    created_at      TIMESTAMP     DEFAULT SYSTIMESTAMP  NOT NULL,
+    CONSTRAINT pk_restaurant_vectors PRIMARY KEY (id),
+    CONSTRAINT fk_rv_restaurant FOREIGN KEY (restaurant_id) REFERENCES restaurants(id)
+);
+
+CREATE VECTOR INDEX idx_rv_embedding
+    ON restaurant_vectors (embedding)
+    ORGANIZATION NEIGHBOR PARTITIONS
+    WITH DISTANCE COSINE;
+
+-- Useful indexes
+CREATE INDEX idx_videos_status ON videos(status);
+CREATE INDEX idx_videos_channel ON videos(channel_id);
+CREATE INDEX idx_vr_restaurant ON video_restaurants(restaurant_id);
+CREATE INDEX idx_ur_restaurant ON user_reviews(restaurant_id);
+CREATE INDEX idx_ur_user ON user_reviews(user_id);