Initial commit: Tasteby - YouTube restaurant map service
Backend (FastAPI + Oracle ADB), Frontend (Next.js), daemon worker. Features: channel/video/restaurant management, semantic search, Google OAuth, user reviews. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
19
backend/.env.example
Normal file
19
backend/.env.example
Normal file
@@ -0,0 +1,19 @@
|
||||
# Oracle ADB
|
||||
ORACLE_USER=admin
|
||||
ORACLE_PASSWORD=
|
||||
ORACLE_DSN=
|
||||
ORACLE_WALLET=
|
||||
|
||||
# OCI GenAI
|
||||
OCI_COMPARTMENT_ID=
|
||||
OCI_GENAI_ENDPOINT=https://inference.generativeai.us-chicago-1.oci.oraclecloud.com
|
||||
OCI_EMBED_MODEL_ID=cohere.embed-v4.0
|
||||
OCI_CHAT_ENDPOINT=https://inference.generativeai.us-ashburn-1.oci.oraclecloud.com
|
||||
OCI_CHAT_MODEL_ID=
|
||||
|
||||
# Google
|
||||
GOOGLE_MAPS_API_KEY=
|
||||
YOUTUBE_DATA_API_KEY=
|
||||
|
||||
# Daemon
|
||||
DAEMON_INTERVAL=3600
|
||||
0
backend/api/__init__.py
Normal file
0
backend/api/__init__.py
Normal file
32
backend/api/deps.py
Normal file
32
backend/api/deps.py
Normal file
@@ -0,0 +1,32 @@
|
||||
"""FastAPI dependencies for authentication."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from fastapi import Header, HTTPException
|
||||
|
||||
from core.auth import verify_jwt
|
||||
|
||||
|
||||
def get_current_user(authorization: str = Header(None)) -> dict:
|
||||
"""Extract and verify Bearer token, return user payload.
|
||||
|
||||
Raises 401 if token is missing or invalid.
|
||||
"""
|
||||
if not authorization or not authorization.startswith("Bearer "):
|
||||
raise HTTPException(401, "Missing or invalid Authorization header")
|
||||
token = authorization.removeprefix("Bearer ").strip()
|
||||
try:
|
||||
return verify_jwt(token)
|
||||
except Exception:
|
||||
raise HTTPException(401, "Invalid or expired token")
|
||||
|
||||
|
||||
def get_optional_user(authorization: str = Header(None)) -> dict | None:
|
||||
"""Same as get_current_user but returns None if no token."""
|
||||
if not authorization or not authorization.startswith("Bearer "):
|
||||
return None
|
||||
token = authorization.removeprefix("Bearer ").strip()
|
||||
try:
|
||||
return verify_jwt(token)
|
||||
except Exception:
|
||||
return None
|
||||
32
backend/api/main.py
Normal file
32
backend/api/main.py
Normal file
@@ -0,0 +1,32 @@
|
||||
"""FastAPI application entry point."""
|
||||
|
||||
from fastapi import FastAPI
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
|
||||
from api.routes import restaurants, channels, videos, search, auth, reviews
|
||||
|
||||
app = FastAPI(
|
||||
title="Tasteby API",
|
||||
description="YouTube restaurant map service API",
|
||||
version="0.1.0",
|
||||
)
|
||||
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["http://localhost:3000", "http://localhost:3001"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
app.include_router(restaurants.router, prefix="/api/restaurants", tags=["restaurants"])
|
||||
app.include_router(channels.router, prefix="/api/channels", tags=["channels"])
|
||||
app.include_router(videos.router, prefix="/api/videos", tags=["videos"])
|
||||
app.include_router(search.router, prefix="/api/search", tags=["search"])
|
||||
app.include_router(auth.router, prefix="/api/auth", tags=["auth"])
|
||||
app.include_router(reviews.router, prefix="/api", tags=["reviews"])
|
||||
|
||||
|
||||
@app.get("/api/health")
|
||||
def health():
|
||||
return {"status": "ok"}
|
||||
0
backend/api/routes/__init__.py
Normal file
0
backend/api/routes/__init__.py
Normal file
40
backend/api/routes/auth.py
Normal file
40
backend/api/routes/auth.py
Normal file
@@ -0,0 +1,40 @@
|
||||
"""Auth API routes — Google SSO login and user info."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from pydantic import BaseModel
|
||||
|
||||
from core.auth import verify_google_token, find_or_create_user, create_jwt
|
||||
from api.deps import get_current_user
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
class GoogleLoginRequest(BaseModel):
|
||||
id_token: str
|
||||
|
||||
|
||||
@router.post("/google")
|
||||
def login_google(body: GoogleLoginRequest):
|
||||
"""Verify Google ID token and return JWT + user info."""
|
||||
try:
|
||||
google_info = verify_google_token(body.id_token)
|
||||
except ValueError as e:
|
||||
raise HTTPException(401, f"Invalid Google token: {e}")
|
||||
|
||||
user = find_or_create_user(
|
||||
provider="google",
|
||||
provider_id=google_info["sub"],
|
||||
email=google_info.get("email"),
|
||||
nickname=google_info.get("name"),
|
||||
avatar_url=google_info.get("picture"),
|
||||
)
|
||||
access_token = create_jwt(user)
|
||||
return {"access_token": access_token, "user": user}
|
||||
|
||||
|
||||
@router.get("/me")
|
||||
def get_me(current_user: dict = Depends(get_current_user)):
|
||||
"""Return current authenticated user info."""
|
||||
return current_user
|
||||
46
backend/api/routes/channels.py
Normal file
46
backend/api/routes/channels.py
Normal file
@@ -0,0 +1,46 @@
|
||||
"""Channel API routes."""
|
||||
|
||||
from fastapi import APIRouter, HTTPException
|
||||
from pydantic import BaseModel
|
||||
|
||||
from core import youtube
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
class ChannelCreate(BaseModel):
|
||||
channel_id: str
|
||||
channel_name: str
|
||||
|
||||
|
||||
@router.get("")
|
||||
def list_channels():
|
||||
return youtube.get_active_channels()
|
||||
|
||||
|
||||
@router.post("", status_code=201)
|
||||
def create_channel(body: ChannelCreate):
|
||||
try:
|
||||
row_id = youtube.add_channel(body.channel_id, body.channel_name)
|
||||
return {"id": row_id, "channel_id": body.channel_id}
|
||||
except Exception as e:
|
||||
if "UQ_CHANNELS_CID" in str(e).upper():
|
||||
raise HTTPException(409, "Channel already exists")
|
||||
raise
|
||||
|
||||
|
||||
@router.post("/{channel_id}/scan")
|
||||
def scan_channel(channel_id: str):
|
||||
"""Trigger a scan for new videos from this channel."""
|
||||
channels = youtube.get_active_channels()
|
||||
ch = next((c for c in channels if c["channel_id"] == channel_id), None)
|
||||
if not ch:
|
||||
raise HTTPException(404, "Channel not found")
|
||||
|
||||
videos = youtube.fetch_channel_videos(channel_id, max_results=50)
|
||||
new_count = 0
|
||||
for v in videos:
|
||||
row_id = youtube.save_video(ch["id"], v)
|
||||
if row_id:
|
||||
new_count += 1
|
||||
return {"total_fetched": len(videos), "new_videos": new_count}
|
||||
33
backend/api/routes/restaurants.py
Normal file
33
backend/api/routes/restaurants.py
Normal file
@@ -0,0 +1,33 @@
|
||||
"""Restaurant API routes."""
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
|
||||
from core import restaurant
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.get("")
|
||||
def list_restaurants(
|
||||
limit: int = Query(100, le=500),
|
||||
offset: int = Query(0, ge=0),
|
||||
cuisine: str | None = None,
|
||||
region: str | None = None,
|
||||
):
|
||||
return restaurant.get_all(limit=limit, offset=offset, cuisine=cuisine, region=region)
|
||||
|
||||
|
||||
@router.get("/{restaurant_id}")
|
||||
def get_restaurant(restaurant_id: str):
|
||||
r = restaurant.get_by_id(restaurant_id)
|
||||
if not r:
|
||||
raise HTTPException(404, "Restaurant not found")
|
||||
return r
|
||||
|
||||
|
||||
@router.get("/{restaurant_id}/videos")
|
||||
def get_restaurant_videos(restaurant_id: str):
|
||||
r = restaurant.get_by_id(restaurant_id)
|
||||
if not r:
|
||||
raise HTTPException(404, "Restaurant not found")
|
||||
return restaurant.get_video_links(restaurant_id)
|
||||
97
backend/api/routes/reviews.py
Normal file
97
backend/api/routes/reviews.py
Normal file
@@ -0,0 +1,97 @@
|
||||
"""Review API routes."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import date
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from core import review
|
||||
from api.deps import get_current_user
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
class ReviewCreate(BaseModel):
|
||||
rating: float = Field(..., ge=0.5, le=5.0)
|
||||
review_text: Optional[str] = None
|
||||
visited_at: Optional[date] = None
|
||||
|
||||
|
||||
class ReviewUpdate(BaseModel):
|
||||
rating: Optional[float] = Field(None, ge=0.5, le=5.0)
|
||||
review_text: Optional[str] = None
|
||||
visited_at: Optional[date] = None
|
||||
|
||||
|
||||
# --- Restaurant reviews ---
|
||||
|
||||
@router.get("/restaurants/{restaurant_id}/reviews")
|
||||
def list_restaurant_reviews(
|
||||
restaurant_id: str,
|
||||
limit: int = Query(20, le=100),
|
||||
offset: int = Query(0, ge=0),
|
||||
):
|
||||
"""List reviews for a restaurant (public)."""
|
||||
reviews = review.get_reviews_for_restaurant(restaurant_id, limit=limit, offset=offset)
|
||||
stats = review.get_restaurant_avg_rating(restaurant_id)
|
||||
return {"reviews": reviews, **stats}
|
||||
|
||||
|
||||
@router.post("/restaurants/{restaurant_id}/reviews", status_code=201)
|
||||
def create_restaurant_review(
|
||||
restaurant_id: str,
|
||||
body: ReviewCreate,
|
||||
current_user: dict = Depends(get_current_user),
|
||||
):
|
||||
"""Create a review for a restaurant (requires auth)."""
|
||||
created = review.create_review(
|
||||
user_id=current_user["sub"],
|
||||
restaurant_id=restaurant_id,
|
||||
rating=body.rating,
|
||||
review_text=body.review_text,
|
||||
visited_at=body.visited_at,
|
||||
)
|
||||
return created
|
||||
|
||||
|
||||
@router.put("/reviews/{review_id}")
|
||||
def update_review_route(
|
||||
review_id: str,
|
||||
body: ReviewUpdate,
|
||||
current_user: dict = Depends(get_current_user),
|
||||
):
|
||||
"""Update a review (own review only)."""
|
||||
updated = review.update_review(
|
||||
review_id=review_id,
|
||||
user_id=current_user["sub"],
|
||||
rating=body.rating,
|
||||
review_text=body.review_text,
|
||||
visited_at=body.visited_at,
|
||||
)
|
||||
if not updated:
|
||||
raise HTTPException(404, "Review not found or not yours")
|
||||
return updated
|
||||
|
||||
|
||||
@router.delete("/reviews/{review_id}", status_code=204)
|
||||
def delete_review_route(
|
||||
review_id: str,
|
||||
current_user: dict = Depends(get_current_user),
|
||||
):
|
||||
"""Delete a review (own review only)."""
|
||||
deleted = review.delete_review(review_id, current_user["sub"])
|
||||
if not deleted:
|
||||
raise HTTPException(404, "Review not found or not yours")
|
||||
|
||||
|
||||
@router.get("/users/me/reviews")
|
||||
def list_my_reviews(
|
||||
limit: int = Query(20, le=100),
|
||||
offset: int = Query(0, ge=0),
|
||||
current_user: dict = Depends(get_current_user),
|
||||
):
|
||||
"""List current user's reviews."""
|
||||
return review.get_user_reviews(current_user["sub"], limit=limit, offset=offset)
|
||||
66
backend/api/routes/search.py
Normal file
66
backend/api/routes/search.py
Normal file
@@ -0,0 +1,66 @@
|
||||
"""Search API routes — keyword + semantic search."""
|
||||
|
||||
from fastapi import APIRouter, Query
|
||||
|
||||
from core import restaurant, vector
|
||||
from core.db import conn
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.get("")
|
||||
def search_restaurants(
|
||||
q: str = Query(..., min_length=1),
|
||||
mode: str = Query("keyword", pattern="^(keyword|semantic|hybrid)$"),
|
||||
limit: int = Query(20, le=100),
|
||||
):
|
||||
"""Search restaurants by keyword, semantic similarity, or hybrid."""
|
||||
if mode == "semantic":
|
||||
return _semantic_search(q, limit)
|
||||
elif mode == "hybrid":
|
||||
kw = _keyword_search(q, limit)
|
||||
sem = _semantic_search(q, limit)
|
||||
# merge: keyword results first, then semantic results not already in keyword
|
||||
seen = {r["id"] for r in kw}
|
||||
merged = list(kw)
|
||||
for r in sem:
|
||||
if r["id"] not in seen:
|
||||
merged.append(r)
|
||||
seen.add(r["id"])
|
||||
return merged[:limit]
|
||||
else:
|
||||
return _keyword_search(q, limit)
|
||||
|
||||
|
||||
def _keyword_search(q: str, limit: int) -> list[dict]:
|
||||
sql = """
|
||||
SELECT id, name, address, region, latitude, longitude,
|
||||
cuisine_type, price_range
|
||||
FROM restaurants
|
||||
WHERE latitude IS NOT NULL
|
||||
AND (UPPER(name) LIKE UPPER(:q)
|
||||
OR UPPER(address) LIKE UPPER(:q)
|
||||
OR UPPER(region) LIKE UPPER(:q)
|
||||
OR UPPER(cuisine_type) LIKE UPPER(:q))
|
||||
FETCH FIRST :lim ROWS ONLY
|
||||
"""
|
||||
pattern = f"%{q}%"
|
||||
with conn() as c:
|
||||
cur = c.cursor()
|
||||
cur.execute(sql, {"q": pattern, "lim": limit})
|
||||
cols = [d[0].lower() for d in cur.description]
|
||||
return [dict(zip(cols, row)) for row in cur.fetchall()]
|
||||
|
||||
|
||||
def _semantic_search(q: str, limit: int) -> list[dict]:
|
||||
similar = vector.search_similar(q, top_k=limit)
|
||||
if not similar:
|
||||
return []
|
||||
|
||||
rest_ids = list({s["restaurant_id"] for s in similar})
|
||||
results = []
|
||||
for rid in rest_ids[:limit]:
|
||||
r = restaurant.get_by_id(rid)
|
||||
if r and r.get("latitude"):
|
||||
results.append(r)
|
||||
return results
|
||||
52
backend/api/routes/videos.py
Normal file
52
backend/api/routes/videos.py
Normal file
@@ -0,0 +1,52 @@
|
||||
"""Video API routes."""
|
||||
|
||||
from fastapi import APIRouter, Query
|
||||
|
||||
from core.db import conn
|
||||
from core.pipeline import process_pending
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.get("")
|
||||
def list_videos(
|
||||
status: str | None = None,
|
||||
limit: int = Query(50, le=200),
|
||||
offset: int = Query(0, ge=0),
|
||||
):
|
||||
conditions = []
|
||||
params: dict = {"lim": limit, "off": offset}
|
||||
if status:
|
||||
conditions.append("v.status = :st")
|
||||
params["st"] = status
|
||||
|
||||
where = ("WHERE " + " AND ".join(conditions)) if conditions else ""
|
||||
sql = f"""
|
||||
SELECT v.id, v.video_id, v.title, v.url, v.status,
|
||||
v.published_at, c.channel_name
|
||||
FROM videos v
|
||||
JOIN channels c ON c.id = v.channel_id
|
||||
{where}
|
||||
ORDER BY v.published_at DESC NULLS LAST
|
||||
OFFSET :off ROWS FETCH NEXT :lim ROWS ONLY
|
||||
"""
|
||||
with conn() as c:
|
||||
cur = c.cursor()
|
||||
cur.execute(sql, params)
|
||||
cols = [d[0].lower() for d in cur.description]
|
||||
rows = cur.fetchall()
|
||||
|
||||
results = []
|
||||
for row in rows:
|
||||
d = dict(zip(cols, row))
|
||||
if d.get("published_at"):
|
||||
d["published_at"] = d["published_at"].isoformat()
|
||||
results.append(d)
|
||||
return results
|
||||
|
||||
|
||||
@router.post("/process")
|
||||
def trigger_processing(limit: int = Query(5, le=20)):
|
||||
"""Manually trigger processing of pending videos."""
|
||||
count = process_pending(limit)
|
||||
return {"restaurants_extracted": count}
|
||||
0
backend/core/__init__.py
Normal file
0
backend/core/__init__.py
Normal file
122
backend/core/auth.py
Normal file
122
backend/core/auth.py
Normal file
@@ -0,0 +1,122 @@
|
||||
"""Authentication helpers — Google OAuth2 + JWT."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from datetime import datetime, timedelta, timezone
|
||||
|
||||
import jwt
|
||||
import oracledb
|
||||
from google.oauth2 import id_token as google_id_token
|
||||
from google.auth.transport import requests as google_requests
|
||||
|
||||
from core.db import conn
|
||||
|
||||
JWT_SECRET = os.environ.get("JWT_SECRET", "tasteby-dev-secret-change-me")
|
||||
JWT_ALGORITHM = "HS256"
|
||||
JWT_EXPIRE_DAYS = 7
|
||||
|
||||
|
||||
def verify_google_token(token: str) -> dict:
|
||||
"""Verify a Google ID token and return user info.
|
||||
|
||||
Returns dict with keys: sub, email, name, picture.
|
||||
Raises ValueError on invalid token.
|
||||
"""
|
||||
info = google_id_token.verify_oauth2_token(
|
||||
token, google_requests.Request(),
|
||||
)
|
||||
return {
|
||||
"sub": info["sub"],
|
||||
"email": info.get("email"),
|
||||
"name": info.get("name"),
|
||||
"picture": info.get("picture"),
|
||||
}
|
||||
|
||||
|
||||
def find_or_create_user(
|
||||
provider: str,
|
||||
provider_id: str,
|
||||
email: str | None = None,
|
||||
nickname: str | None = None,
|
||||
avatar_url: str | None = None,
|
||||
) -> dict:
|
||||
"""Find existing user or create new one. Returns user dict."""
|
||||
# Try to find existing user
|
||||
sql_find = """
|
||||
SELECT id, provider, provider_id, email, nickname, avatar_url, created_at, last_login_at
|
||||
FROM tasteby_users
|
||||
WHERE provider = :provider AND provider_id = :provider_id
|
||||
"""
|
||||
with conn() as c:
|
||||
cur = c.cursor()
|
||||
cur.execute(sql_find, {"provider": provider, "provider_id": provider_id})
|
||||
row = cur.fetchone()
|
||||
|
||||
if row:
|
||||
# Update last_login and optional fields
|
||||
sql_update = """
|
||||
UPDATE tasteby_users
|
||||
SET last_login_at = SYSTIMESTAMP,
|
||||
email = COALESCE(:email, email),
|
||||
nickname = COALESCE(:nickname, nickname),
|
||||
avatar_url = COALESCE(:avatar_url, avatar_url)
|
||||
WHERE id = :id
|
||||
"""
|
||||
cur.execute(sql_update, {
|
||||
"email": email, "nickname": nickname,
|
||||
"avatar_url": avatar_url, "id": row[0],
|
||||
})
|
||||
return {
|
||||
"id": row[0],
|
||||
"provider": row[1],
|
||||
"provider_id": row[2],
|
||||
"email": email or row[3],
|
||||
"nickname": nickname or row[4],
|
||||
"avatar_url": avatar_url or row[5],
|
||||
}
|
||||
|
||||
# Create new user
|
||||
sql_insert = """
|
||||
INSERT INTO tasteby_users (provider, provider_id, email, nickname, avatar_url, last_login_at)
|
||||
VALUES (:provider, :provider_id, :email, :nickname, :avatar_url, SYSTIMESTAMP)
|
||||
RETURNING id INTO :out_id
|
||||
"""
|
||||
out_id = cur.var(oracledb.STRING)
|
||||
cur.execute(sql_insert, {
|
||||
"provider": provider,
|
||||
"provider_id": provider_id,
|
||||
"email": email,
|
||||
"nickname": nickname,
|
||||
"avatar_url": avatar_url,
|
||||
"out_id": out_id,
|
||||
})
|
||||
new_id = out_id.getvalue()[0]
|
||||
return {
|
||||
"id": new_id,
|
||||
"provider": provider,
|
||||
"provider_id": provider_id,
|
||||
"email": email,
|
||||
"nickname": nickname,
|
||||
"avatar_url": avatar_url,
|
||||
}
|
||||
|
||||
|
||||
def create_jwt(user: dict) -> str:
|
||||
"""Create a JWT access token for the given user."""
|
||||
payload = {
|
||||
"sub": user["id"],
|
||||
"email": user.get("email"),
|
||||
"nickname": user.get("nickname"),
|
||||
"exp": datetime.now(timezone.utc) + timedelta(days=JWT_EXPIRE_DAYS),
|
||||
"iat": datetime.now(timezone.utc),
|
||||
}
|
||||
return jwt.encode(payload, JWT_SECRET, algorithm=JWT_ALGORITHM)
|
||||
|
||||
|
||||
def verify_jwt(token: str) -> dict:
|
||||
"""Verify a JWT and return the payload.
|
||||
|
||||
Raises jwt.InvalidTokenError on failure.
|
||||
"""
|
||||
return jwt.decode(token, JWT_SECRET, algorithms=[JWT_ALGORITHM])
|
||||
44
backend/core/db.py
Normal file
44
backend/core/db.py
Normal file
@@ -0,0 +1,44 @@
|
||||
"""Oracle ADB connection pool — shared across all modules."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from contextlib import contextmanager
|
||||
from typing import Generator, Optional
|
||||
|
||||
import oracledb
|
||||
|
||||
_pool: Optional[oracledb.ConnectionPool] = None
|
||||
|
||||
|
||||
def _get_pool() -> oracledb.ConnectionPool:
|
||||
global _pool
|
||||
if _pool is None:
|
||||
kwargs: dict = dict(
|
||||
user=os.environ["ORACLE_USER"],
|
||||
password=os.environ["ORACLE_PASSWORD"],
|
||||
dsn=os.environ["ORACLE_DSN"],
|
||||
min=1,
|
||||
max=5,
|
||||
increment=1,
|
||||
)
|
||||
wallet = os.environ.get("ORACLE_WALLET")
|
||||
if wallet:
|
||||
kwargs["config_dir"] = wallet
|
||||
_pool = oracledb.create_pool(**kwargs)
|
||||
return _pool
|
||||
|
||||
|
||||
@contextmanager
|
||||
def conn() -> Generator[oracledb.Connection, None, None]:
|
||||
"""Acquire a pooled connection with auto-commit/rollback."""
|
||||
pool = _get_pool()
|
||||
c = pool.acquire()
|
||||
try:
|
||||
yield c
|
||||
c.commit()
|
||||
except Exception:
|
||||
c.rollback()
|
||||
raise
|
||||
finally:
|
||||
pool.release(c)
|
||||
128
backend/core/extractor.py
Normal file
128
backend/core/extractor.py
Normal file
@@ -0,0 +1,128 @@
|
||||
"""LLM-based restaurant info extraction from video transcripts.
|
||||
|
||||
Uses OCI GenAI (Gemini Flash) to extract structured restaurant data.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
|
||||
import oci
|
||||
from oci.generative_ai_inference import GenerativeAiInferenceClient
|
||||
from oci.generative_ai_inference.models import (
|
||||
ChatDetails,
|
||||
GenericChatRequest,
|
||||
OnDemandServingMode,
|
||||
TextContent,
|
||||
UserMessage,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _get_client() -> GenerativeAiInferenceClient:
|
||||
config = oci.config.from_file()
|
||||
endpoint = os.environ.get("OCI_CHAT_ENDPOINT") or os.environ["OCI_GENAI_ENDPOINT"]
|
||||
return GenerativeAiInferenceClient(config, service_endpoint=endpoint)
|
||||
|
||||
|
||||
def _llm(prompt: str, max_tokens: int = 4096) -> str:
|
||||
client = _get_client()
|
||||
req = GenericChatRequest(
|
||||
messages=[UserMessage(content=[TextContent(text=prompt)])],
|
||||
max_tokens=max_tokens,
|
||||
temperature=0,
|
||||
)
|
||||
det = ChatDetails(
|
||||
compartment_id=os.environ["OCI_COMPARTMENT_ID"],
|
||||
serving_mode=OnDemandServingMode(model_id=os.environ["OCI_CHAT_MODEL_ID"]),
|
||||
chat_request=req,
|
||||
)
|
||||
resp = client.chat(det)
|
||||
return resp.data.chat_response.choices[0].message.content[0].text.strip()
|
||||
|
||||
|
||||
def _parse_json(raw: str) -> dict | list:
|
||||
raw = re.sub(r"^```(?:json)?\s*|\s*```$", "", raw, flags=re.MULTILINE).strip()
|
||||
raw = re.sub(r",\s*([}\]])", r"\1", raw)
|
||||
try:
|
||||
return json.loads(raw)
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
try:
|
||||
return json.JSONDecoder(strict=False).decode(raw)
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
# recover truncated array
|
||||
if raw.lstrip().startswith("["):
|
||||
decoder = json.JSONDecoder(strict=False)
|
||||
items: list = []
|
||||
idx = raw.index("[") + 1
|
||||
while idx < len(raw):
|
||||
while idx < len(raw) and raw[idx] in " \t\n\r,":
|
||||
idx += 1
|
||||
if idx >= len(raw) or raw[idx] == "]":
|
||||
break
|
||||
try:
|
||||
obj, end = decoder.raw_decode(raw, idx)
|
||||
items.append(obj)
|
||||
idx = end
|
||||
except json.JSONDecodeError:
|
||||
break
|
||||
if items:
|
||||
return items
|
||||
raise ValueError(f"JSON parse failed: {raw[:80]!r}")
|
||||
|
||||
|
||||
_EXTRACT_PROMPT = """\
|
||||
다음은 유튜브 먹방/맛집 영상의 자막입니다.
|
||||
이 영상에서 언급된 모든 식당 정보를 추출하세요.
|
||||
|
||||
규칙:
|
||||
- 식당이 없으면 빈 배열 [] 반환
|
||||
- 각 식당에 대해 아래 필드를 JSON 배열로 반환
|
||||
- 확실하지 않은 정보는 null
|
||||
- 추가 설명 없이 JSON만 반환
|
||||
|
||||
필드:
|
||||
- name: 식당 이름 (string, 필수)
|
||||
- address: 주소 또는 위치 힌트 (string | null)
|
||||
- region: 지역 (예: 서울 강남, 부산 해운대) (string | null)
|
||||
- cuisine_type: 음식 종류 (예: 한식, 일식, 중식, 양식, 카페) (string | null)
|
||||
- price_range: 가격대 (예: 1만원대, 2-3만원) (string | null)
|
||||
- foods_mentioned: 언급된 메뉴들 (string[])
|
||||
- evaluation: 평가 내용 (string | null)
|
||||
- guests: 함께한 게스트 (string[])
|
||||
|
||||
영상 제목: {title}
|
||||
자막:
|
||||
{transcript}
|
||||
|
||||
JSON 배열:"""
|
||||
|
||||
|
||||
def extract_restaurants(title: str, transcript: str) -> tuple[list[dict], str]:
|
||||
"""Extract restaurant info from a video transcript using LLM.
|
||||
|
||||
Returns (list of restaurant dicts, raw LLM response text).
|
||||
"""
|
||||
# Truncate very long transcripts
|
||||
if len(transcript) > 8000:
|
||||
transcript = transcript[:7000] + "\n...(중략)...\n" + transcript[-1000:]
|
||||
|
||||
prompt = _EXTRACT_PROMPT.format(title=title, transcript=transcript)
|
||||
|
||||
try:
|
||||
raw = _llm(prompt, max_tokens=4096)
|
||||
result = _parse_json(raw)
|
||||
if isinstance(result, list):
|
||||
return result, raw
|
||||
if isinstance(result, dict):
|
||||
return [result], raw
|
||||
return [], raw
|
||||
except Exception as e:
|
||||
logger.error("Restaurant extraction failed: %s", e)
|
||||
return [], ""
|
||||
97
backend/core/geocoding.py
Normal file
97
backend/core/geocoding.py
Normal file
@@ -0,0 +1,97 @@
|
||||
"""Google Maps Geocoding + Place Search for restaurant location lookup."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
|
||||
import httpx
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _api_key() -> str:
|
||||
return os.environ["GOOGLE_MAPS_API_KEY"]
|
||||
|
||||
|
||||
def geocode_restaurant(name: str, address: str | None = None, region: str | None = None) -> dict | None:
|
||||
"""Look up restaurant coordinates using Google Maps.
|
||||
|
||||
Tries Places Text Search first (more accurate for restaurant names),
|
||||
falls back to Geocoding API.
|
||||
|
||||
Returns dict with: latitude, longitude, formatted_address, google_place_id
|
||||
or None if not found.
|
||||
"""
|
||||
query = name
|
||||
if address:
|
||||
query += f" {address}"
|
||||
elif region:
|
||||
query += f" {region}"
|
||||
|
||||
# Try Places Text Search (better for business names)
|
||||
result = _places_text_search(query)
|
||||
if result:
|
||||
return result
|
||||
|
||||
# Fallback: Geocoding API
|
||||
return _geocode(query)
|
||||
|
||||
|
||||
def _places_text_search(query: str) -> dict | None:
|
||||
"""Search for a place using Google Places Text Search API."""
|
||||
try:
|
||||
r = httpx.get(
|
||||
"https://maps.googleapis.com/maps/api/place/textsearch/json",
|
||||
params={
|
||||
"query": query,
|
||||
"key": _api_key(),
|
||||
"language": "ko",
|
||||
"type": "restaurant",
|
||||
},
|
||||
timeout=10,
|
||||
)
|
||||
r.raise_for_status()
|
||||
data = r.json()
|
||||
|
||||
if data.get("status") == "OK" and data.get("results"):
|
||||
place = data["results"][0]
|
||||
loc = place["geometry"]["location"]
|
||||
return {
|
||||
"latitude": loc["lat"],
|
||||
"longitude": loc["lng"],
|
||||
"formatted_address": place.get("formatted_address", ""),
|
||||
"google_place_id": place.get("place_id", ""),
|
||||
}
|
||||
except Exception as e:
|
||||
logger.warning("Places text search failed for '%s': %s", query, e)
|
||||
return None
|
||||
|
||||
|
||||
def _geocode(query: str) -> dict | None:
|
||||
"""Geocode an address string."""
|
||||
try:
|
||||
r = httpx.get(
|
||||
"https://maps.googleapis.com/maps/api/geocode/json",
|
||||
params={
|
||||
"address": query,
|
||||
"key": _api_key(),
|
||||
"language": "ko",
|
||||
},
|
||||
timeout=10,
|
||||
)
|
||||
r.raise_for_status()
|
||||
data = r.json()
|
||||
|
||||
if data.get("status") == "OK" and data.get("results"):
|
||||
result = data["results"][0]
|
||||
loc = result["geometry"]["location"]
|
||||
return {
|
||||
"latitude": loc["lat"],
|
||||
"longitude": loc["lng"],
|
||||
"formatted_address": result.get("formatted_address", ""),
|
||||
"google_place_id": "",
|
||||
}
|
||||
except Exception as e:
|
||||
logger.warning("Geocoding failed for '%s': %s", query, e)
|
||||
return None
|
||||
134
backend/core/pipeline.py
Normal file
134
backend/core/pipeline.py
Normal file
@@ -0,0 +1,134 @@
|
||||
"""Data pipeline: process pending videos end-to-end.
|
||||
|
||||
For each pending video:
|
||||
1. Fetch transcript
|
||||
2. Extract restaurant info via LLM
|
||||
3. Geocode each restaurant
|
||||
4. Save to DB + generate vector embeddings
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
|
||||
from core import youtube, extractor, geocoding, restaurant, vector
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def process_video(video: dict) -> int:
|
||||
"""Process a single pending video. Returns number of restaurants found."""
|
||||
video_db_id = video["id"]
|
||||
video_id = video["video_id"]
|
||||
title = video["title"]
|
||||
|
||||
logger.info("Processing video: %s (%s)", title, video_id)
|
||||
youtube.update_video_status(video_db_id, "processing")
|
||||
|
||||
try:
|
||||
# 1. Transcript
|
||||
transcript = youtube.get_transcript(video_id)
|
||||
if not transcript:
|
||||
logger.warning("No transcript for %s, marking done", video_id)
|
||||
youtube.update_video_status(video_db_id, "done")
|
||||
return 0
|
||||
|
||||
youtube.update_video_status(video_db_id, "processing", transcript)
|
||||
|
||||
# 2. LLM extraction
|
||||
restaurants, llm_raw = extractor.extract_restaurants(title, transcript)
|
||||
if not restaurants:
|
||||
logger.info("No restaurants found in %s", video_id)
|
||||
youtube.update_video_status(video_db_id, "done", llm_raw=llm_raw)
|
||||
return 0
|
||||
|
||||
# 3-4. Geocode + save each restaurant
|
||||
count = 0
|
||||
for rest_data in restaurants:
|
||||
name = rest_data.get("name")
|
||||
if not name:
|
||||
continue
|
||||
|
||||
# Geocode
|
||||
geo = geocoding.geocode_restaurant(
|
||||
name,
|
||||
address=rest_data.get("address"),
|
||||
region=rest_data.get("region"),
|
||||
)
|
||||
|
||||
lat = geo["latitude"] if geo else None
|
||||
lng = geo["longitude"] if geo else None
|
||||
addr = geo["formatted_address"] if geo else rest_data.get("address")
|
||||
place_id = geo["google_place_id"] if geo else None
|
||||
|
||||
# Upsert restaurant
|
||||
rest_id = restaurant.upsert(
|
||||
name=name,
|
||||
address=addr,
|
||||
region=rest_data.get("region"),
|
||||
latitude=lat,
|
||||
longitude=lng,
|
||||
cuisine_type=rest_data.get("cuisine_type"),
|
||||
price_range=rest_data.get("price_range"),
|
||||
google_place_id=place_id,
|
||||
)
|
||||
|
||||
# Link video <-> restaurant
|
||||
restaurant.link_video_restaurant(
|
||||
video_db_id=video_db_id,
|
||||
restaurant_id=rest_id,
|
||||
foods=rest_data.get("foods_mentioned"),
|
||||
evaluation=rest_data.get("evaluation"),
|
||||
guests=rest_data.get("guests"),
|
||||
)
|
||||
|
||||
# Vector embeddings
|
||||
chunks = _build_chunks(name, rest_data, title)
|
||||
if chunks:
|
||||
vector.save_restaurant_vectors(rest_id, chunks)
|
||||
|
||||
count += 1
|
||||
logger.info("Saved restaurant: %s (geocoded=%s)", name, bool(geo))
|
||||
|
||||
youtube.update_video_status(video_db_id, "done", llm_raw=llm_raw)
|
||||
logger.info("Video %s done: %d restaurants", video_id, count)
|
||||
return count
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Pipeline error for %s: %s", video_id, e, exc_info=True)
|
||||
youtube.update_video_status(video_db_id, "error")
|
||||
return 0
|
||||
|
||||
|
||||
def _build_chunks(name: str, data: dict, video_title: str) -> list[str]:
|
||||
"""Build text chunks for vector embedding."""
|
||||
parts = [f"식당: {name}"]
|
||||
if data.get("region"):
|
||||
parts.append(f"지역: {data['region']}")
|
||||
if data.get("cuisine_type"):
|
||||
parts.append(f"음식 종류: {data['cuisine_type']}")
|
||||
if data.get("foods_mentioned"):
|
||||
foods = data["foods_mentioned"]
|
||||
if isinstance(foods, list):
|
||||
parts.append(f"메뉴: {', '.join(foods)}")
|
||||
if data.get("evaluation"):
|
||||
parts.append(f"평가: {data['evaluation']}")
|
||||
if data.get("price_range"):
|
||||
parts.append(f"가격대: {data['price_range']}")
|
||||
parts.append(f"영상: {video_title}")
|
||||
|
||||
return ["\n".join(parts)]
|
||||
|
||||
|
||||
def process_pending(limit: int = 5) -> int:
|
||||
"""Process up to `limit` pending videos. Returns total restaurants found."""
|
||||
videos = youtube.get_pending_videos(limit)
|
||||
if not videos:
|
||||
logger.info("No pending videos")
|
||||
return 0
|
||||
|
||||
total = 0
|
||||
for v in videos:
|
||||
total += process_video(v)
|
||||
return total
|
||||
205
backend/core/restaurant.py
Normal file
205
backend/core/restaurant.py
Normal file
@@ -0,0 +1,205 @@
|
||||
"""Restaurant DB operations — save extracted data, link to videos."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
|
||||
import oracledb
|
||||
|
||||
from core.db import conn
|
||||
|
||||
|
||||
def find_by_name(name: str) -> dict | None:
|
||||
"""Find a restaurant by exact name match."""
|
||||
sql = "SELECT id, name, address, region, latitude, longitude FROM restaurants WHERE name = :n"
|
||||
with conn() as c:
|
||||
cur = c.cursor()
|
||||
cur.execute(sql, {"n": name})
|
||||
r = cur.fetchone()
|
||||
if r:
|
||||
return {
|
||||
"id": r[0], "name": r[1], "address": r[2],
|
||||
"region": r[3], "latitude": r[4], "longitude": r[5],
|
||||
}
|
||||
return None
|
||||
|
||||
|
||||
def upsert(
|
||||
name: str,
|
||||
address: str | None = None,
|
||||
region: str | None = None,
|
||||
latitude: float | None = None,
|
||||
longitude: float | None = None,
|
||||
cuisine_type: str | None = None,
|
||||
price_range: str | None = None,
|
||||
google_place_id: str | None = None,
|
||||
) -> str:
|
||||
"""Insert or update a restaurant. Returns row id."""
|
||||
existing = find_by_name(name)
|
||||
if existing:
|
||||
sql = """
|
||||
UPDATE restaurants
|
||||
SET address = COALESCE(:addr, address),
|
||||
region = COALESCE(:reg, region),
|
||||
latitude = COALESCE(:lat, latitude),
|
||||
longitude = COALESCE(:lng, longitude),
|
||||
cuisine_type = COALESCE(:cuisine, cuisine_type),
|
||||
price_range = COALESCE(:price, price_range),
|
||||
google_place_id = COALESCE(:gid, google_place_id),
|
||||
updated_at = SYSTIMESTAMP
|
||||
WHERE id = :id
|
||||
"""
|
||||
with conn() as c:
|
||||
c.cursor().execute(sql, {
|
||||
"addr": address, "reg": region,
|
||||
"lat": latitude, "lng": longitude,
|
||||
"cuisine": cuisine_type, "price": price_range,
|
||||
"gid": google_place_id, "id": existing["id"],
|
||||
})
|
||||
return existing["id"]
|
||||
|
||||
sql = """
|
||||
INSERT INTO restaurants (name, address, region, latitude, longitude,
|
||||
cuisine_type, price_range, google_place_id)
|
||||
VALUES (:name, :addr, :reg, :lat, :lng, :cuisine, :price, :gid)
|
||||
RETURNING id INTO :out_id
|
||||
"""
|
||||
with conn() as c:
|
||||
cur = c.cursor()
|
||||
out_id = cur.var(oracledb.STRING)
|
||||
cur.execute(sql, {
|
||||
"name": name, "addr": address, "reg": region,
|
||||
"lat": latitude, "lng": longitude,
|
||||
"cuisine": cuisine_type, "price": price_range,
|
||||
"gid": google_place_id, "out_id": out_id,
|
||||
})
|
||||
return out_id.getvalue()[0]
|
||||
|
||||
|
||||
def link_video_restaurant(
|
||||
video_db_id: str,
|
||||
restaurant_id: str,
|
||||
foods: list[str] | None = None,
|
||||
evaluation: str | None = None,
|
||||
guests: list[str] | None = None,
|
||||
citation: str | None = None,
|
||||
) -> str | None:
|
||||
"""Create video-restaurant mapping. Returns row id or None if duplicate."""
|
||||
sql = """
|
||||
INSERT INTO video_restaurants
|
||||
(video_id, restaurant_id, foods_mentioned, evaluation, guests, citation_text)
|
||||
VALUES (:vid, :rid, :foods, :eval, :guests, :cite)
|
||||
RETURNING id INTO :out_id
|
||||
"""
|
||||
with conn() as c:
|
||||
cur = c.cursor()
|
||||
out_id = cur.var(oracledb.STRING)
|
||||
try:
|
||||
cur.execute(sql, {
|
||||
"vid": video_db_id,
|
||||
"rid": restaurant_id,
|
||||
"foods": json.dumps(foods or [], ensure_ascii=False),
|
||||
"eval": json.dumps({"text": evaluation} if evaluation else {}, ensure_ascii=False),
|
||||
"guests": json.dumps(guests or [], ensure_ascii=False),
|
||||
"cite": citation,
|
||||
"out_id": out_id,
|
||||
})
|
||||
return out_id.getvalue()[0]
|
||||
except Exception as e:
|
||||
if "UQ_VR_VIDEO_REST" in str(e).upper():
|
||||
return None
|
||||
raise
|
||||
|
||||
|
||||
def get_all(
|
||||
limit: int = 100,
|
||||
offset: int = 0,
|
||||
cuisine: str | None = None,
|
||||
region: str | None = None,
|
||||
) -> list[dict]:
|
||||
"""List restaurants with optional filters."""
|
||||
conditions = ["latitude IS NOT NULL"]
|
||||
params: dict = {"lim": limit, "off": offset}
|
||||
|
||||
if cuisine:
|
||||
conditions.append("cuisine_type = :cuisine")
|
||||
params["cuisine"] = cuisine
|
||||
if region:
|
||||
conditions.append("region LIKE :region")
|
||||
params["region"] = f"%{region}%"
|
||||
|
||||
where = " AND ".join(conditions)
|
||||
sql = f"""
|
||||
SELECT id, name, address, region, latitude, longitude,
|
||||
cuisine_type, price_range, google_place_id
|
||||
FROM restaurants
|
||||
WHERE {where}
|
||||
ORDER BY updated_at DESC
|
||||
OFFSET :off ROWS FETCH NEXT :lim ROWS ONLY
|
||||
"""
|
||||
with conn() as c:
|
||||
cur = c.cursor()
|
||||
cur.execute(sql, params)
|
||||
cols = [d[0].lower() for d in cur.description]
|
||||
return [dict(zip(cols, row)) for row in cur.fetchall()]
|
||||
|
||||
|
||||
def get_by_id(restaurant_id: str) -> dict | None:
|
||||
sql = """
|
||||
SELECT r.id, r.name, r.address, r.region, r.latitude, r.longitude,
|
||||
r.cuisine_type, r.price_range, r.phone, r.website, r.google_place_id
|
||||
FROM restaurants r
|
||||
WHERE r.id = :id
|
||||
"""
|
||||
with conn() as c:
|
||||
cur = c.cursor()
|
||||
cur.execute(sql, {"id": restaurant_id})
|
||||
row = cur.fetchone()
|
||||
if not row:
|
||||
return None
|
||||
cols = [d[0].lower() for d in cur.description]
|
||||
return dict(zip(cols, row))
|
||||
|
||||
|
||||
def get_video_links(restaurant_id: str) -> list[dict]:
|
||||
"""Get all video appearances for a restaurant."""
|
||||
sql = """
|
||||
SELECT v.video_id, v.title, v.url, v.published_at,
|
||||
vr.foods_mentioned, vr.evaluation, vr.guests
|
||||
FROM video_restaurants vr
|
||||
JOIN videos v ON v.id = vr.video_id
|
||||
WHERE vr.restaurant_id = :rid
|
||||
ORDER BY v.published_at DESC
|
||||
"""
|
||||
with conn() as c:
|
||||
cur = c.cursor()
|
||||
cur.execute(sql, {"rid": restaurant_id})
|
||||
results = []
|
||||
for r in cur.fetchall():
|
||||
foods_raw = r[4].read() if hasattr(r[4], "read") else r[4]
|
||||
eval_raw = r[5].read() if hasattr(r[5], "read") else r[5]
|
||||
guests_raw = r[6].read() if hasattr(r[6], "read") else r[6]
|
||||
results.append({
|
||||
"video_id": r[0],
|
||||
"title": r[1],
|
||||
"url": r[2],
|
||||
"published_at": r[3].isoformat() if r[3] else None,
|
||||
"foods_mentioned": _parse_json_field(foods_raw, []),
|
||||
"evaluation": _parse_json_field(eval_raw, {}),
|
||||
"guests": _parse_json_field(guests_raw, []),
|
||||
})
|
||||
return results
|
||||
|
||||
|
||||
def _parse_json_field(val, default):
|
||||
"""Parse a JSON field that may be a string, already-parsed object, or None."""
|
||||
if val is None:
|
||||
return default
|
||||
if isinstance(val, (list, dict)):
|
||||
return val
|
||||
if isinstance(val, str):
|
||||
try:
|
||||
return json.loads(val)
|
||||
except (json.JSONDecodeError, ValueError):
|
||||
return default
|
||||
return default
|
||||
189
backend/core/review.py
Normal file
189
backend/core/review.py
Normal file
@@ -0,0 +1,189 @@
|
||||
"""User review DB operations."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import date
|
||||
|
||||
import oracledb
|
||||
|
||||
from core.db import conn
|
||||
|
||||
|
||||
def create_review(
|
||||
user_id: str,
|
||||
restaurant_id: str,
|
||||
rating: float,
|
||||
review_text: str | None = None,
|
||||
visited_at: date | None = None,
|
||||
) -> dict:
|
||||
"""Create a new review. Returns the created review dict."""
|
||||
sql = """
|
||||
INSERT INTO user_reviews (user_id, restaurant_id, rating, review_text, visited_at)
|
||||
VALUES (:user_id, :restaurant_id, :rating, :review_text, :visited_at)
|
||||
RETURNING id INTO :out_id
|
||||
"""
|
||||
with conn() as c:
|
||||
cur = c.cursor()
|
||||
out_id = cur.var(oracledb.STRING)
|
||||
cur.execute(sql, {
|
||||
"user_id": user_id,
|
||||
"restaurant_id": restaurant_id,
|
||||
"rating": rating,
|
||||
"review_text": review_text,
|
||||
"visited_at": visited_at,
|
||||
"out_id": out_id,
|
||||
})
|
||||
new_id = out_id.getvalue()[0]
|
||||
|
||||
return get_review_by_id(new_id)
|
||||
|
||||
|
||||
def update_review(
|
||||
review_id: str,
|
||||
user_id: str,
|
||||
rating: float | None = None,
|
||||
review_text: str | None = None,
|
||||
visited_at: date | None = None,
|
||||
) -> dict:
|
||||
"""Update an existing review. Only the owner can update.
|
||||
|
||||
Returns the updated review dict, or None if not found / not owner.
|
||||
"""
|
||||
sql = """
|
||||
UPDATE user_reviews
|
||||
SET rating = COALESCE(:rating, rating),
|
||||
review_text = COALESCE(:review_text, review_text),
|
||||
visited_at = COALESCE(:visited_at, visited_at),
|
||||
updated_at = SYSTIMESTAMP
|
||||
WHERE id = :id AND user_id = :user_id
|
||||
"""
|
||||
with conn() as c:
|
||||
cur = c.cursor()
|
||||
cur.execute(sql, {
|
||||
"rating": rating,
|
||||
"review_text": review_text,
|
||||
"visited_at": visited_at,
|
||||
"id": review_id,
|
||||
"user_id": user_id,
|
||||
})
|
||||
if cur.rowcount == 0:
|
||||
return None
|
||||
|
||||
return get_review_by_id(review_id)
|
||||
|
||||
|
||||
def delete_review(review_id: str, user_id: str) -> bool:
|
||||
"""Delete a review. Only the owner can delete. Returns True if deleted."""
|
||||
sql = "DELETE FROM user_reviews WHERE id = :id AND user_id = :user_id"
|
||||
with conn() as c:
|
||||
cur = c.cursor()
|
||||
cur.execute(sql, {"id": review_id, "user_id": user_id})
|
||||
return cur.rowcount > 0
|
||||
|
||||
|
||||
def get_review_by_id(review_id: str) -> dict | None:
|
||||
"""Get a single review by ID."""
|
||||
sql = """
|
||||
SELECT r.id, r.user_id, r.restaurant_id, r.rating, r.review_text,
|
||||
r.visited_at, r.created_at, r.updated_at,
|
||||
u.nickname, u.avatar_url
|
||||
FROM user_reviews r
|
||||
JOIN tasteby_users u ON u.id = r.user_id
|
||||
WHERE r.id = :id
|
||||
"""
|
||||
with conn() as c:
|
||||
cur = c.cursor()
|
||||
cur.execute(sql, {"id": review_id})
|
||||
row = cur.fetchone()
|
||||
if not row:
|
||||
return None
|
||||
return _row_to_dict(row)
|
||||
|
||||
|
||||
def get_reviews_for_restaurant(
|
||||
restaurant_id: str,
|
||||
limit: int = 20,
|
||||
offset: int = 0,
|
||||
) -> list[dict]:
|
||||
"""List reviews for a restaurant, including user nickname/avatar."""
|
||||
sql = """
|
||||
SELECT r.id, r.user_id, r.restaurant_id, r.rating, r.review_text,
|
||||
r.visited_at, r.created_at, r.updated_at,
|
||||
u.nickname, u.avatar_url
|
||||
FROM user_reviews r
|
||||
JOIN tasteby_users u ON u.id = r.user_id
|
||||
WHERE r.restaurant_id = :restaurant_id
|
||||
ORDER BY r.created_at DESC
|
||||
OFFSET :off ROWS FETCH NEXT :lim ROWS ONLY
|
||||
"""
|
||||
with conn() as c:
|
||||
cur = c.cursor()
|
||||
cur.execute(sql, {
|
||||
"restaurant_id": restaurant_id,
|
||||
"off": offset,
|
||||
"lim": limit,
|
||||
})
|
||||
return [_row_to_dict(row) for row in cur.fetchall()]
|
||||
|
||||
|
||||
def get_user_reviews(
|
||||
user_id: str,
|
||||
limit: int = 20,
|
||||
offset: int = 0,
|
||||
) -> list[dict]:
|
||||
"""List reviews by a specific user."""
|
||||
sql = """
|
||||
SELECT r.id, r.user_id, r.restaurant_id, r.rating, r.review_text,
|
||||
r.visited_at, r.created_at, r.updated_at,
|
||||
u.nickname, u.avatar_url
|
||||
FROM user_reviews r
|
||||
JOIN tasteby_users u ON u.id = r.user_id
|
||||
WHERE r.user_id = :user_id
|
||||
ORDER BY r.created_at DESC
|
||||
OFFSET :off ROWS FETCH NEXT :lim ROWS ONLY
|
||||
"""
|
||||
with conn() as c:
|
||||
cur = c.cursor()
|
||||
cur.execute(sql, {
|
||||
"user_id": user_id,
|
||||
"off": offset,
|
||||
"lim": limit,
|
||||
})
|
||||
return [_row_to_dict(row) for row in cur.fetchall()]
|
||||
|
||||
|
||||
def get_restaurant_avg_rating(restaurant_id: str) -> dict:
|
||||
"""Get average rating and review count for a restaurant."""
|
||||
sql = """
|
||||
SELECT ROUND(AVG(rating), 1) AS avg_rating, COUNT(*) AS review_count
|
||||
FROM user_reviews
|
||||
WHERE restaurant_id = :restaurant_id
|
||||
"""
|
||||
with conn() as c:
|
||||
cur = c.cursor()
|
||||
cur.execute(sql, {"restaurant_id": restaurant_id})
|
||||
row = cur.fetchone()
|
||||
return {
|
||||
"avg_rating": float(row[0]) if row[0] else None,
|
||||
"review_count": int(row[1]),
|
||||
}
|
||||
|
||||
|
||||
def _row_to_dict(row) -> dict:
|
||||
"""Convert a review query row to a dict."""
|
||||
review_text = row[4]
|
||||
if hasattr(review_text, "read"):
|
||||
review_text = review_text.read()
|
||||
|
||||
return {
|
||||
"id": row[0],
|
||||
"user_id": row[1],
|
||||
"restaurant_id": row[2],
|
||||
"rating": float(row[3]),
|
||||
"review_text": review_text,
|
||||
"visited_at": row[5].isoformat() if row[5] else None,
|
||||
"created_at": row[6].isoformat() if row[6] else None,
|
||||
"updated_at": row[7].isoformat() if row[7] else None,
|
||||
"user_nickname": row[8],
|
||||
"user_avatar_url": row[9],
|
||||
}
|
||||
97
backend/core/vector.py
Normal file
97
backend/core/vector.py
Normal file
@@ -0,0 +1,97 @@
|
||||
"""Vector embedding generation and storage for restaurant semantic search."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import array
|
||||
import os
|
||||
|
||||
import oci
|
||||
from oci.generative_ai_inference import GenerativeAiInferenceClient
|
||||
from oci.generative_ai_inference.models import (
|
||||
EmbedTextDetails,
|
||||
OnDemandServingMode,
|
||||
)
|
||||
|
||||
from core.db import conn
|
||||
|
||||
|
||||
def _embed_texts(texts: list[str]) -> list[list[float]]:
|
||||
config = oci.config.from_file()
|
||||
client = GenerativeAiInferenceClient(
|
||||
config,
|
||||
service_endpoint=os.environ["OCI_GENAI_ENDPOINT"],
|
||||
)
|
||||
model_id = os.environ.get("OCI_EMBED_MODEL_ID", "cohere.embed-v4.0")
|
||||
compartment_id = os.environ["OCI_COMPARTMENT_ID"]
|
||||
|
||||
details = EmbedTextDetails(
|
||||
inputs=texts,
|
||||
serving_mode=OnDemandServingMode(model_id=model_id),
|
||||
compartment_id=compartment_id,
|
||||
input_type="SEARCH_DOCUMENT",
|
||||
)
|
||||
response = client.embed_text(details)
|
||||
return response.data.embeddings
|
||||
|
||||
|
||||
def _to_vec(embedding: list[float]) -> array.array:
|
||||
return array.array("f", embedding)
|
||||
|
||||
|
||||
def save_restaurant_vectors(restaurant_id: str, chunks: list[str]) -> list[str]:
|
||||
"""Embed and store text chunks for a restaurant.
|
||||
|
||||
Returns list of inserted row IDs.
|
||||
"""
|
||||
if not chunks:
|
||||
return []
|
||||
|
||||
embeddings = _embed_texts(chunks)
|
||||
inserted: list[str] = []
|
||||
|
||||
sql = """
|
||||
INSERT INTO restaurant_vectors (restaurant_id, chunk_text, embedding)
|
||||
VALUES (:rid, :chunk, :emb)
|
||||
RETURNING id INTO :out_id
|
||||
"""
|
||||
import oracledb
|
||||
with conn() as c:
|
||||
cur = c.cursor()
|
||||
for chunk, emb in zip(chunks, embeddings):
|
||||
out_id = cur.var(oracledb.STRING)
|
||||
cur.execute(sql, {
|
||||
"rid": restaurant_id,
|
||||
"chunk": chunk,
|
||||
"emb": _to_vec(emb),
|
||||
"out_id": out_id,
|
||||
})
|
||||
inserted.append(out_id.getvalue()[0])
|
||||
return inserted
|
||||
|
||||
|
||||
def search_similar(query: str, top_k: int = 10) -> list[dict]:
|
||||
"""Semantic search: find restaurants similar to query text.
|
||||
|
||||
Returns list of dicts: restaurant_id, chunk_text, distance.
|
||||
"""
|
||||
embeddings = _embed_texts([query])
|
||||
query_vec = _to_vec(embeddings[0])
|
||||
|
||||
sql = """
|
||||
SELECT rv.restaurant_id, rv.chunk_text,
|
||||
VECTOR_DISTANCE(rv.embedding, :qvec, COSINE) AS dist
|
||||
FROM restaurant_vectors rv
|
||||
ORDER BY dist
|
||||
FETCH FIRST :k ROWS ONLY
|
||||
"""
|
||||
with conn() as c:
|
||||
cur = c.cursor()
|
||||
cur.execute(sql, {"qvec": query_vec, "k": top_k})
|
||||
return [
|
||||
{
|
||||
"restaurant_id": r[0],
|
||||
"chunk_text": r[1].read() if hasattr(r[1], "read") else r[1],
|
||||
"distance": r[2],
|
||||
}
|
||||
for r in cur.fetchall()
|
||||
]
|
||||
221
backend/core/youtube.py
Normal file
221
backend/core/youtube.py
Normal file
@@ -0,0 +1,221 @@
|
||||
"""YouTube channel scanner + transcript extraction.
|
||||
|
||||
Uses YouTube Data API v3 for channel video listing,
|
||||
youtube-transcript-api for transcript extraction.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from datetime import datetime
|
||||
|
||||
import httpx
|
||||
from youtube_transcript_api import YouTubeTranscriptApi
|
||||
|
||||
from core.db import conn
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _api_key() -> str:
|
||||
return os.environ["YOUTUBE_DATA_API_KEY"]
|
||||
|
||||
|
||||
def extract_video_id(url: str) -> str:
|
||||
match = re.search(r"(?:v=|youtu\.be/)([^&?/\s]+)", url)
|
||||
if not match:
|
||||
raise ValueError(f"Cannot extract video ID from URL: {url}")
|
||||
return match.group(1)
|
||||
|
||||
|
||||
# -- Channel operations -------------------------------------------------------
|
||||
|
||||
def add_channel(channel_id: str, channel_name: str) -> str:
|
||||
"""Register a YouTube channel. Returns DB row id."""
|
||||
sql = """
|
||||
INSERT INTO channels (channel_id, channel_name, channel_url)
|
||||
VALUES (:cid, :cname, :curl)
|
||||
RETURNING id INTO :out_id
|
||||
"""
|
||||
with conn() as c:
|
||||
cur = c.cursor()
|
||||
import oracledb
|
||||
out_id = cur.var(oracledb.STRING)
|
||||
cur.execute(sql, {
|
||||
"cid": channel_id,
|
||||
"cname": channel_name,
|
||||
"curl": f"https://www.youtube.com/channel/{channel_id}",
|
||||
"out_id": out_id,
|
||||
})
|
||||
return out_id.getvalue()[0]
|
||||
|
||||
|
||||
def get_active_channels() -> list[dict]:
|
||||
sql = "SELECT id, channel_id, channel_name FROM channels WHERE is_active = 1"
|
||||
with conn() as c:
|
||||
cur = c.cursor()
|
||||
cur.execute(sql)
|
||||
return [
|
||||
{"id": r[0], "channel_id": r[1], "channel_name": r[2]}
|
||||
for r in cur.fetchall()
|
||||
]
|
||||
|
||||
|
||||
# -- Video listing via YouTube Data API v3 ------------------------------------
|
||||
|
||||
def fetch_channel_videos(
|
||||
channel_id: str,
|
||||
max_results: int = 50,
|
||||
published_after: str | None = None,
|
||||
) -> list[dict]:
|
||||
"""Fetch video list from a YouTube channel via Data API v3.
|
||||
|
||||
Returns list of dicts: video_id, title, published_at, url.
|
||||
"""
|
||||
params: dict = {
|
||||
"key": _api_key(),
|
||||
"channelId": channel_id,
|
||||
"part": "snippet",
|
||||
"order": "date",
|
||||
"maxResults": min(max_results, 50),
|
||||
"type": "video",
|
||||
}
|
||||
if published_after:
|
||||
params["publishedAfter"] = published_after
|
||||
|
||||
videos: list[dict] = []
|
||||
next_page = None
|
||||
|
||||
while True:
|
||||
if next_page:
|
||||
params["pageToken"] = next_page
|
||||
|
||||
r = httpx.get(
|
||||
"https://www.googleapis.com/youtube/v3/search",
|
||||
params=params,
|
||||
timeout=15,
|
||||
)
|
||||
r.raise_for_status()
|
||||
data = r.json()
|
||||
|
||||
for item in data.get("items", []):
|
||||
snippet = item["snippet"]
|
||||
vid = item["id"]["videoId"]
|
||||
videos.append({
|
||||
"video_id": vid,
|
||||
"title": snippet["title"],
|
||||
"published_at": snippet["publishedAt"],
|
||||
"url": f"https://www.youtube.com/watch?v={vid}",
|
||||
})
|
||||
|
||||
next_page = data.get("nextPageToken")
|
||||
if not next_page or len(videos) >= max_results:
|
||||
break
|
||||
|
||||
return videos[:max_results]
|
||||
|
||||
|
||||
# -- Transcript extraction ----------------------------------------------------
|
||||
|
||||
def get_transcript(video_id: str) -> str | None:
|
||||
"""Fetch transcript text for a video. Returns None if unavailable."""
|
||||
try:
|
||||
fetched = YouTubeTranscriptApi().fetch(video_id, languages=["ko", "en"])
|
||||
return " ".join(seg.text for seg in fetched)
|
||||
except Exception as e:
|
||||
logger.warning("Transcript unavailable for %s: %s", video_id, e)
|
||||
return None
|
||||
|
||||
|
||||
# -- DB operations for videos -------------------------------------------------
|
||||
|
||||
def save_video(channel_db_id: str, video: dict) -> str | None:
|
||||
"""Insert a video row if not exists. Returns row id or None if duplicate."""
|
||||
sql = """
|
||||
INSERT INTO videos (channel_id, video_id, title, url, published_at, status)
|
||||
VALUES (:ch_id, :vid, :title, :url, :pub_at, 'pending')
|
||||
RETURNING id INTO :out_id
|
||||
"""
|
||||
with conn() as c:
|
||||
cur = c.cursor()
|
||||
import oracledb
|
||||
out_id = cur.var(oracledb.STRING)
|
||||
try:
|
||||
pub_at = None
|
||||
if video.get("published_at"):
|
||||
pub_at = datetime.fromisoformat(
|
||||
video["published_at"].replace("Z", "+00:00")
|
||||
)
|
||||
cur.execute(sql, {
|
||||
"ch_id": channel_db_id,
|
||||
"vid": video["video_id"],
|
||||
"title": video["title"],
|
||||
"url": video["url"],
|
||||
"pub_at": pub_at,
|
||||
"out_id": out_id,
|
||||
})
|
||||
return out_id.getvalue()[0]
|
||||
except Exception as e:
|
||||
if "UQ_VIDEOS_VID" in str(e).upper():
|
||||
return None # duplicate
|
||||
raise
|
||||
|
||||
|
||||
def get_pending_videos(limit: int = 10) -> list[dict]:
|
||||
sql = """
|
||||
SELECT id, video_id, title, url
|
||||
FROM videos
|
||||
WHERE status = 'pending'
|
||||
ORDER BY created_at
|
||||
FETCH FIRST :n ROWS ONLY
|
||||
"""
|
||||
with conn() as c:
|
||||
cur = c.cursor()
|
||||
cur.execute(sql, {"n": limit})
|
||||
return [
|
||||
{"id": r[0], "video_id": r[1], "title": r[2], "url": r[3]}
|
||||
for r in cur.fetchall()
|
||||
]
|
||||
|
||||
|
||||
def update_video_status(
|
||||
video_db_id: str,
|
||||
status: str,
|
||||
transcript: str | None = None,
|
||||
llm_raw: str | None = None,
|
||||
) -> None:
|
||||
sets = ["status = :st", "processed_at = SYSTIMESTAMP"]
|
||||
params: dict = {"st": status, "vid": video_db_id}
|
||||
if transcript:
|
||||
sets.append("transcript_text = :txt")
|
||||
params["txt"] = transcript
|
||||
if llm_raw:
|
||||
sets.append("llm_raw_response = :llm_resp")
|
||||
params["llm_resp"] = llm_raw
|
||||
sql = f"UPDATE videos SET {', '.join(sets)} WHERE id = :vid"
|
||||
with conn() as c:
|
||||
c.cursor().execute(sql, params)
|
||||
|
||||
|
||||
# -- Scan: fetch new videos for all active channels ---------------------------
|
||||
|
||||
def scan_all_channels(max_per_channel: int = 50) -> int:
|
||||
"""Scan all active channels for new videos. Returns count of new videos."""
|
||||
channels = get_active_channels()
|
||||
total_new = 0
|
||||
for ch in channels:
|
||||
try:
|
||||
videos = fetch_channel_videos(ch["channel_id"], max_per_channel)
|
||||
for v in videos:
|
||||
row_id = save_video(ch["id"], v)
|
||||
if row_id:
|
||||
total_new += 1
|
||||
logger.info(
|
||||
"Channel %s: fetched %d videos, %d new",
|
||||
ch["channel_name"], len(videos), total_new,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error("Failed to scan channel %s: %s", ch["channel_name"], e)
|
||||
return total_new
|
||||
0
backend/daemon/__init__.py
Normal file
0
backend/daemon/__init__.py
Normal file
37
backend/daemon/worker.py
Normal file
37
backend/daemon/worker.py
Normal file
@@ -0,0 +1,37 @@
|
||||
"""Daemon worker: periodic channel scan + video processing."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import time
|
||||
|
||||
from core.youtube import scan_all_channels
|
||||
from core.pipeline import process_pending
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def run_once() -> None:
|
||||
"""Single daemon cycle: scan channels then process pending videos."""
|
||||
logger.info("=== Daemon cycle start ===")
|
||||
try:
|
||||
new_count = scan_all_channels()
|
||||
logger.info("Scan complete: %d new videos", new_count)
|
||||
except Exception as e:
|
||||
logger.error("Channel scan failed: %s", e)
|
||||
|
||||
try:
|
||||
rest_count = process_pending(limit=10)
|
||||
logger.info("Processing complete: %d restaurants extracted", rest_count)
|
||||
except Exception as e:
|
||||
logger.error("Video processing failed: %s", e)
|
||||
|
||||
logger.info("=== Daemon cycle end ===")
|
||||
|
||||
|
||||
def run_loop(interval: int = 3600) -> None:
|
||||
"""Run daemon in a loop with configurable interval (default 1 hour)."""
|
||||
logger.info("Daemon started (interval=%ds)", interval)
|
||||
while True:
|
||||
run_once()
|
||||
time.sleep(interval)
|
||||
10
backend/requirements.txt
Normal file
10
backend/requirements.txt
Normal file
@@ -0,0 +1,10 @@
|
||||
fastapi>=0.115
|
||||
uvicorn[standard]>=0.34
|
||||
python-dotenv>=1.0
|
||||
oracledb>=2.5
|
||||
oci>=2.140
|
||||
httpx>=0.28
|
||||
youtube-transcript-api>=1.0
|
||||
pydantic>=2.0
|
||||
pyjwt[crypto]>=2.8
|
||||
google-auth>=2.28
|
||||
9
backend/run_api.py
Normal file
9
backend/run_api.py
Normal file
@@ -0,0 +1,9 @@
|
||||
"""Run the FastAPI server."""
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv()
|
||||
|
||||
import uvicorn
|
||||
|
||||
if __name__ == "__main__":
|
||||
uvicorn.run("api.main:app", host="0.0.0.0", port=8000, reload=True)
|
||||
18
backend/run_daemon.py
Normal file
18
backend/run_daemon.py
Normal file
@@ -0,0 +1,18 @@
|
||||
"""Run the daemon worker."""
|
||||
|
||||
import logging
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv()
|
||||
|
||||
from daemon.worker import run_loop
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
|
||||
)
|
||||
|
||||
if __name__ == "__main__":
|
||||
interval = int(os.environ.get("DAEMON_INTERVAL", "3600"))
|
||||
run_loop(interval)
|
||||
127
backend/sql/schema.sql
Normal file
127
backend/sql/schema.sql
Normal file
@@ -0,0 +1,127 @@
|
||||
-- Tasteby: Oracle ADB (23ai) Schema
|
||||
-- Run against Oracle ADB before starting the backend.
|
||||
|
||||
-- 1. channels
|
||||
CREATE TABLE channels (
|
||||
id VARCHAR2(36) DEFAULT SYS_GUID() NOT NULL,
|
||||
channel_id VARCHAR2(50) NOT NULL,
|
||||
channel_name VARCHAR2(200) NOT NULL,
|
||||
channel_url VARCHAR2(500),
|
||||
is_active NUMBER(1) DEFAULT 1 NOT NULL,
|
||||
created_at TIMESTAMP DEFAULT SYSTIMESTAMP NOT NULL,
|
||||
CONSTRAINT pk_channels PRIMARY KEY (id),
|
||||
CONSTRAINT uq_channels_cid UNIQUE (channel_id)
|
||||
);
|
||||
|
||||
-- 2. videos
|
||||
CREATE TABLE videos (
|
||||
id VARCHAR2(36) DEFAULT SYS_GUID() NOT NULL,
|
||||
channel_id VARCHAR2(36) NOT NULL,
|
||||
video_id VARCHAR2(20) NOT NULL,
|
||||
title VARCHAR2(500) NOT NULL,
|
||||
url VARCHAR2(500) NOT NULL,
|
||||
published_at TIMESTAMP,
|
||||
transcript_text CLOB,
|
||||
status VARCHAR2(20) DEFAULT 'pending' NOT NULL,
|
||||
processed_at TIMESTAMP,
|
||||
created_at TIMESTAMP DEFAULT SYSTIMESTAMP NOT NULL,
|
||||
CONSTRAINT pk_videos PRIMARY KEY (id),
|
||||
CONSTRAINT uq_videos_vid UNIQUE (video_id),
|
||||
CONSTRAINT fk_videos_channel FOREIGN KEY (channel_id) REFERENCES channels(id),
|
||||
CONSTRAINT ck_videos_status CHECK (status IN ('pending','processing','done','error'))
|
||||
);
|
||||
|
||||
-- 3. restaurants
|
||||
CREATE TABLE restaurants (
|
||||
id VARCHAR2(36) DEFAULT SYS_GUID() NOT NULL,
|
||||
name VARCHAR2(200) NOT NULL,
|
||||
address VARCHAR2(500),
|
||||
region VARCHAR2(100),
|
||||
latitude NUMBER(10,7),
|
||||
longitude NUMBER(10,7),
|
||||
cuisine_type VARCHAR2(100),
|
||||
price_range VARCHAR2(50),
|
||||
phone VARCHAR2(50),
|
||||
website VARCHAR2(500),
|
||||
google_place_id VARCHAR2(200),
|
||||
created_at TIMESTAMP DEFAULT SYSTIMESTAMP NOT NULL,
|
||||
updated_at TIMESTAMP DEFAULT SYSTIMESTAMP NOT NULL,
|
||||
CONSTRAINT pk_restaurants PRIMARY KEY (id)
|
||||
);
|
||||
|
||||
-- 4. video_restaurants (N:M)
|
||||
CREATE TABLE video_restaurants (
|
||||
id VARCHAR2(36) DEFAULT SYS_GUID() NOT NULL,
|
||||
video_id VARCHAR2(36) NOT NULL,
|
||||
restaurant_id VARCHAR2(36) NOT NULL,
|
||||
foods_mentioned CLOB,
|
||||
evaluation CLOB,
|
||||
guests CLOB,
|
||||
air_date DATE,
|
||||
citation_text CLOB,
|
||||
created_at TIMESTAMP DEFAULT SYSTIMESTAMP NOT NULL,
|
||||
CONSTRAINT pk_video_restaurants PRIMARY KEY (id),
|
||||
CONSTRAINT fk_vr_video FOREIGN KEY (video_id) REFERENCES videos(id),
|
||||
CONSTRAINT fk_vr_restaurant FOREIGN KEY (restaurant_id) REFERENCES restaurants(id),
|
||||
CONSTRAINT uq_vr_video_rest UNIQUE (video_id, restaurant_id)
|
||||
);
|
||||
|
||||
-- JSON check constraints for CLOB JSON columns
|
||||
-- (Oracle 23ai supports IS JSON on CLOB)
|
||||
ALTER TABLE video_restaurants ADD CONSTRAINT ck_vr_foods CHECK (foods_mentioned IS JSON);
|
||||
ALTER TABLE video_restaurants ADD CONSTRAINT ck_vr_eval CHECK (evaluation IS JSON);
|
||||
ALTER TABLE video_restaurants ADD CONSTRAINT ck_vr_guests CHECK (guests IS JSON);
|
||||
|
||||
-- 5. tasteby_users (NOT "users" — that table belongs to another project)
|
||||
CREATE TABLE tasteby_users (
|
||||
id VARCHAR2(36) DEFAULT SYS_GUID() NOT NULL,
|
||||
provider VARCHAR2(20) NOT NULL,
|
||||
provider_id VARCHAR2(200) NOT NULL,
|
||||
email VARCHAR2(200),
|
||||
nickname VARCHAR2(100),
|
||||
avatar_url VARCHAR2(500),
|
||||
created_at TIMESTAMP DEFAULT SYSTIMESTAMP NOT NULL,
|
||||
last_login_at TIMESTAMP,
|
||||
CONSTRAINT pk_tasteby_users PRIMARY KEY (id),
|
||||
CONSTRAINT uq_tasteby_users_provider UNIQUE (provider, provider_id),
|
||||
CONSTRAINT ck_tasteby_users_provider CHECK (provider IN ('google','apple'))
|
||||
);
|
||||
|
||||
-- 6. user_reviews
|
||||
CREATE TABLE user_reviews (
|
||||
id VARCHAR2(36) DEFAULT SYS_GUID() NOT NULL,
|
||||
user_id VARCHAR2(36) NOT NULL,
|
||||
restaurant_id VARCHAR2(36) NOT NULL,
|
||||
rating NUMBER(2,1) NOT NULL,
|
||||
review_text CLOB,
|
||||
visited_at DATE,
|
||||
created_at TIMESTAMP DEFAULT SYSTIMESTAMP NOT NULL,
|
||||
updated_at TIMESTAMP DEFAULT SYSTIMESTAMP NOT NULL,
|
||||
CONSTRAINT pk_user_reviews PRIMARY KEY (id),
|
||||
CONSTRAINT fk_ur_user FOREIGN KEY (user_id) REFERENCES tasteby_users(id),
|
||||
CONSTRAINT fk_ur_restaurant FOREIGN KEY (restaurant_id) REFERENCES restaurants(id),
|
||||
CONSTRAINT ck_ur_rating CHECK (rating BETWEEN 0.5 AND 5.0)
|
||||
);
|
||||
|
||||
-- 7. restaurant_vectors (semantic search)
|
||||
CREATE TABLE restaurant_vectors (
|
||||
id VARCHAR2(36) DEFAULT SYS_GUID() NOT NULL,
|
||||
restaurant_id VARCHAR2(36) NOT NULL,
|
||||
chunk_text CLOB NOT NULL,
|
||||
embedding VECTOR(1536, FLOAT32),
|
||||
created_at TIMESTAMP DEFAULT SYSTIMESTAMP NOT NULL,
|
||||
CONSTRAINT pk_restaurant_vectors PRIMARY KEY (id),
|
||||
CONSTRAINT fk_rv_restaurant FOREIGN KEY (restaurant_id) REFERENCES restaurants(id)
|
||||
);
|
||||
|
||||
CREATE VECTOR INDEX idx_rv_embedding
|
||||
ON restaurant_vectors (embedding)
|
||||
ORGANIZATION NEIGHBOR PARTITIONS
|
||||
WITH DISTANCE COSINE;
|
||||
|
||||
-- Useful indexes
|
||||
CREATE INDEX idx_videos_status ON videos(status);
|
||||
CREATE INDEX idx_videos_channel ON videos(channel_id);
|
||||
CREATE INDEX idx_vr_restaurant ON video_restaurants(restaurant_id);
|
||||
CREATE INDEX idx_ur_restaurant ON user_reviews(restaurant_id);
|
||||
CREATE INDEX idx_ur_user ON user_reviews(user_id);
|
||||
Reference in New Issue
Block a user