Initial commit: Tasteby - YouTube restaurant map service
Backend (FastAPI + Oracle ADB), Frontend (Next.js), daemon worker. Features: channel/video/restaurant management, semantic search, Google OAuth, user reviews. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
0
backend/core/__init__.py
Normal file
0
backend/core/__init__.py
Normal file
122
backend/core/auth.py
Normal file
122
backend/core/auth.py
Normal file
@@ -0,0 +1,122 @@
|
||||
"""Authentication helpers — Google OAuth2 + JWT."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from datetime import datetime, timedelta, timezone
|
||||
|
||||
import jwt
|
||||
import oracledb
|
||||
from google.oauth2 import id_token as google_id_token
|
||||
from google.auth.transport import requests as google_requests
|
||||
|
||||
from core.db import conn
|
||||
|
||||
JWT_SECRET = os.environ.get("JWT_SECRET", "tasteby-dev-secret-change-me")
|
||||
JWT_ALGORITHM = "HS256"
|
||||
JWT_EXPIRE_DAYS = 7
|
||||
|
||||
|
||||
def verify_google_token(token: str) -> dict:
|
||||
"""Verify a Google ID token and return user info.
|
||||
|
||||
Returns dict with keys: sub, email, name, picture.
|
||||
Raises ValueError on invalid token.
|
||||
"""
|
||||
info = google_id_token.verify_oauth2_token(
|
||||
token, google_requests.Request(),
|
||||
)
|
||||
return {
|
||||
"sub": info["sub"],
|
||||
"email": info.get("email"),
|
||||
"name": info.get("name"),
|
||||
"picture": info.get("picture"),
|
||||
}
|
||||
|
||||
|
||||
def find_or_create_user(
|
||||
provider: str,
|
||||
provider_id: str,
|
||||
email: str | None = None,
|
||||
nickname: str | None = None,
|
||||
avatar_url: str | None = None,
|
||||
) -> dict:
|
||||
"""Find existing user or create new one. Returns user dict."""
|
||||
# Try to find existing user
|
||||
sql_find = """
|
||||
SELECT id, provider, provider_id, email, nickname, avatar_url, created_at, last_login_at
|
||||
FROM tasteby_users
|
||||
WHERE provider = :provider AND provider_id = :provider_id
|
||||
"""
|
||||
with conn() as c:
|
||||
cur = c.cursor()
|
||||
cur.execute(sql_find, {"provider": provider, "provider_id": provider_id})
|
||||
row = cur.fetchone()
|
||||
|
||||
if row:
|
||||
# Update last_login and optional fields
|
||||
sql_update = """
|
||||
UPDATE tasteby_users
|
||||
SET last_login_at = SYSTIMESTAMP,
|
||||
email = COALESCE(:email, email),
|
||||
nickname = COALESCE(:nickname, nickname),
|
||||
avatar_url = COALESCE(:avatar_url, avatar_url)
|
||||
WHERE id = :id
|
||||
"""
|
||||
cur.execute(sql_update, {
|
||||
"email": email, "nickname": nickname,
|
||||
"avatar_url": avatar_url, "id": row[0],
|
||||
})
|
||||
return {
|
||||
"id": row[0],
|
||||
"provider": row[1],
|
||||
"provider_id": row[2],
|
||||
"email": email or row[3],
|
||||
"nickname": nickname or row[4],
|
||||
"avatar_url": avatar_url or row[5],
|
||||
}
|
||||
|
||||
# Create new user
|
||||
sql_insert = """
|
||||
INSERT INTO tasteby_users (provider, provider_id, email, nickname, avatar_url, last_login_at)
|
||||
VALUES (:provider, :provider_id, :email, :nickname, :avatar_url, SYSTIMESTAMP)
|
||||
RETURNING id INTO :out_id
|
||||
"""
|
||||
out_id = cur.var(oracledb.STRING)
|
||||
cur.execute(sql_insert, {
|
||||
"provider": provider,
|
||||
"provider_id": provider_id,
|
||||
"email": email,
|
||||
"nickname": nickname,
|
||||
"avatar_url": avatar_url,
|
||||
"out_id": out_id,
|
||||
})
|
||||
new_id = out_id.getvalue()[0]
|
||||
return {
|
||||
"id": new_id,
|
||||
"provider": provider,
|
||||
"provider_id": provider_id,
|
||||
"email": email,
|
||||
"nickname": nickname,
|
||||
"avatar_url": avatar_url,
|
||||
}
|
||||
|
||||
|
||||
def create_jwt(user: dict) -> str:
|
||||
"""Create a JWT access token for the given user."""
|
||||
payload = {
|
||||
"sub": user["id"],
|
||||
"email": user.get("email"),
|
||||
"nickname": user.get("nickname"),
|
||||
"exp": datetime.now(timezone.utc) + timedelta(days=JWT_EXPIRE_DAYS),
|
||||
"iat": datetime.now(timezone.utc),
|
||||
}
|
||||
return jwt.encode(payload, JWT_SECRET, algorithm=JWT_ALGORITHM)
|
||||
|
||||
|
||||
def verify_jwt(token: str) -> dict:
|
||||
"""Verify a JWT and return the payload.
|
||||
|
||||
Raises jwt.InvalidTokenError on failure.
|
||||
"""
|
||||
return jwt.decode(token, JWT_SECRET, algorithms=[JWT_ALGORITHM])
|
||||
44
backend/core/db.py
Normal file
44
backend/core/db.py
Normal file
@@ -0,0 +1,44 @@
|
||||
"""Oracle ADB connection pool — shared across all modules."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from contextlib import contextmanager
|
||||
from typing import Generator, Optional
|
||||
|
||||
import oracledb
|
||||
|
||||
_pool: Optional[oracledb.ConnectionPool] = None
|
||||
|
||||
|
||||
def _get_pool() -> oracledb.ConnectionPool:
|
||||
global _pool
|
||||
if _pool is None:
|
||||
kwargs: dict = dict(
|
||||
user=os.environ["ORACLE_USER"],
|
||||
password=os.environ["ORACLE_PASSWORD"],
|
||||
dsn=os.environ["ORACLE_DSN"],
|
||||
min=1,
|
||||
max=5,
|
||||
increment=1,
|
||||
)
|
||||
wallet = os.environ.get("ORACLE_WALLET")
|
||||
if wallet:
|
||||
kwargs["config_dir"] = wallet
|
||||
_pool = oracledb.create_pool(**kwargs)
|
||||
return _pool
|
||||
|
||||
|
||||
@contextmanager
|
||||
def conn() -> Generator[oracledb.Connection, None, None]:
|
||||
"""Acquire a pooled connection with auto-commit/rollback."""
|
||||
pool = _get_pool()
|
||||
c = pool.acquire()
|
||||
try:
|
||||
yield c
|
||||
c.commit()
|
||||
except Exception:
|
||||
c.rollback()
|
||||
raise
|
||||
finally:
|
||||
pool.release(c)
|
||||
128
backend/core/extractor.py
Normal file
128
backend/core/extractor.py
Normal file
@@ -0,0 +1,128 @@
|
||||
"""LLM-based restaurant info extraction from video transcripts.
|
||||
|
||||
Uses OCI GenAI (Gemini Flash) to extract structured restaurant data.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
|
||||
import oci
|
||||
from oci.generative_ai_inference import GenerativeAiInferenceClient
|
||||
from oci.generative_ai_inference.models import (
|
||||
ChatDetails,
|
||||
GenericChatRequest,
|
||||
OnDemandServingMode,
|
||||
TextContent,
|
||||
UserMessage,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _get_client() -> GenerativeAiInferenceClient:
|
||||
config = oci.config.from_file()
|
||||
endpoint = os.environ.get("OCI_CHAT_ENDPOINT") or os.environ["OCI_GENAI_ENDPOINT"]
|
||||
return GenerativeAiInferenceClient(config, service_endpoint=endpoint)
|
||||
|
||||
|
||||
def _llm(prompt: str, max_tokens: int = 4096) -> str:
|
||||
client = _get_client()
|
||||
req = GenericChatRequest(
|
||||
messages=[UserMessage(content=[TextContent(text=prompt)])],
|
||||
max_tokens=max_tokens,
|
||||
temperature=0,
|
||||
)
|
||||
det = ChatDetails(
|
||||
compartment_id=os.environ["OCI_COMPARTMENT_ID"],
|
||||
serving_mode=OnDemandServingMode(model_id=os.environ["OCI_CHAT_MODEL_ID"]),
|
||||
chat_request=req,
|
||||
)
|
||||
resp = client.chat(det)
|
||||
return resp.data.chat_response.choices[0].message.content[0].text.strip()
|
||||
|
||||
|
||||
def _parse_json(raw: str) -> dict | list:
|
||||
raw = re.sub(r"^```(?:json)?\s*|\s*```$", "", raw, flags=re.MULTILINE).strip()
|
||||
raw = re.sub(r",\s*([}\]])", r"\1", raw)
|
||||
try:
|
||||
return json.loads(raw)
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
try:
|
||||
return json.JSONDecoder(strict=False).decode(raw)
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
# recover truncated array
|
||||
if raw.lstrip().startswith("["):
|
||||
decoder = json.JSONDecoder(strict=False)
|
||||
items: list = []
|
||||
idx = raw.index("[") + 1
|
||||
while idx < len(raw):
|
||||
while idx < len(raw) and raw[idx] in " \t\n\r,":
|
||||
idx += 1
|
||||
if idx >= len(raw) or raw[idx] == "]":
|
||||
break
|
||||
try:
|
||||
obj, end = decoder.raw_decode(raw, idx)
|
||||
items.append(obj)
|
||||
idx = end
|
||||
except json.JSONDecodeError:
|
||||
break
|
||||
if items:
|
||||
return items
|
||||
raise ValueError(f"JSON parse failed: {raw[:80]!r}")
|
||||
|
||||
|
||||
_EXTRACT_PROMPT = """\
|
||||
다음은 유튜브 먹방/맛집 영상의 자막입니다.
|
||||
이 영상에서 언급된 모든 식당 정보를 추출하세요.
|
||||
|
||||
규칙:
|
||||
- 식당이 없으면 빈 배열 [] 반환
|
||||
- 각 식당에 대해 아래 필드를 JSON 배열로 반환
|
||||
- 확실하지 않은 정보는 null
|
||||
- 추가 설명 없이 JSON만 반환
|
||||
|
||||
필드:
|
||||
- name: 식당 이름 (string, 필수)
|
||||
- address: 주소 또는 위치 힌트 (string | null)
|
||||
- region: 지역 (예: 서울 강남, 부산 해운대) (string | null)
|
||||
- cuisine_type: 음식 종류 (예: 한식, 일식, 중식, 양식, 카페) (string | null)
|
||||
- price_range: 가격대 (예: 1만원대, 2-3만원) (string | null)
|
||||
- foods_mentioned: 언급된 메뉴들 (string[])
|
||||
- evaluation: 평가 내용 (string | null)
|
||||
- guests: 함께한 게스트 (string[])
|
||||
|
||||
영상 제목: {title}
|
||||
자막:
|
||||
{transcript}
|
||||
|
||||
JSON 배열:"""
|
||||
|
||||
|
||||
def extract_restaurants(title: str, transcript: str) -> tuple[list[dict], str]:
|
||||
"""Extract restaurant info from a video transcript using LLM.
|
||||
|
||||
Returns (list of restaurant dicts, raw LLM response text).
|
||||
"""
|
||||
# Truncate very long transcripts
|
||||
if len(transcript) > 8000:
|
||||
transcript = transcript[:7000] + "\n...(중략)...\n" + transcript[-1000:]
|
||||
|
||||
prompt = _EXTRACT_PROMPT.format(title=title, transcript=transcript)
|
||||
|
||||
try:
|
||||
raw = _llm(prompt, max_tokens=4096)
|
||||
result = _parse_json(raw)
|
||||
if isinstance(result, list):
|
||||
return result, raw
|
||||
if isinstance(result, dict):
|
||||
return [result], raw
|
||||
return [], raw
|
||||
except Exception as e:
|
||||
logger.error("Restaurant extraction failed: %s", e)
|
||||
return [], ""
|
||||
97
backend/core/geocoding.py
Normal file
97
backend/core/geocoding.py
Normal file
@@ -0,0 +1,97 @@
|
||||
"""Google Maps Geocoding + Place Search for restaurant location lookup."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
|
||||
import httpx
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _api_key() -> str:
|
||||
return os.environ["GOOGLE_MAPS_API_KEY"]
|
||||
|
||||
|
||||
def geocode_restaurant(name: str, address: str | None = None, region: str | None = None) -> dict | None:
|
||||
"""Look up restaurant coordinates using Google Maps.
|
||||
|
||||
Tries Places Text Search first (more accurate for restaurant names),
|
||||
falls back to Geocoding API.
|
||||
|
||||
Returns dict with: latitude, longitude, formatted_address, google_place_id
|
||||
or None if not found.
|
||||
"""
|
||||
query = name
|
||||
if address:
|
||||
query += f" {address}"
|
||||
elif region:
|
||||
query += f" {region}"
|
||||
|
||||
# Try Places Text Search (better for business names)
|
||||
result = _places_text_search(query)
|
||||
if result:
|
||||
return result
|
||||
|
||||
# Fallback: Geocoding API
|
||||
return _geocode(query)
|
||||
|
||||
|
||||
def _places_text_search(query: str) -> dict | None:
|
||||
"""Search for a place using Google Places Text Search API."""
|
||||
try:
|
||||
r = httpx.get(
|
||||
"https://maps.googleapis.com/maps/api/place/textsearch/json",
|
||||
params={
|
||||
"query": query,
|
||||
"key": _api_key(),
|
||||
"language": "ko",
|
||||
"type": "restaurant",
|
||||
},
|
||||
timeout=10,
|
||||
)
|
||||
r.raise_for_status()
|
||||
data = r.json()
|
||||
|
||||
if data.get("status") == "OK" and data.get("results"):
|
||||
place = data["results"][0]
|
||||
loc = place["geometry"]["location"]
|
||||
return {
|
||||
"latitude": loc["lat"],
|
||||
"longitude": loc["lng"],
|
||||
"formatted_address": place.get("formatted_address", ""),
|
||||
"google_place_id": place.get("place_id", ""),
|
||||
}
|
||||
except Exception as e:
|
||||
logger.warning("Places text search failed for '%s': %s", query, e)
|
||||
return None
|
||||
|
||||
|
||||
def _geocode(query: str) -> dict | None:
|
||||
"""Geocode an address string."""
|
||||
try:
|
||||
r = httpx.get(
|
||||
"https://maps.googleapis.com/maps/api/geocode/json",
|
||||
params={
|
||||
"address": query,
|
||||
"key": _api_key(),
|
||||
"language": "ko",
|
||||
},
|
||||
timeout=10,
|
||||
)
|
||||
r.raise_for_status()
|
||||
data = r.json()
|
||||
|
||||
if data.get("status") == "OK" and data.get("results"):
|
||||
result = data["results"][0]
|
||||
loc = result["geometry"]["location"]
|
||||
return {
|
||||
"latitude": loc["lat"],
|
||||
"longitude": loc["lng"],
|
||||
"formatted_address": result.get("formatted_address", ""),
|
||||
"google_place_id": "",
|
||||
}
|
||||
except Exception as e:
|
||||
logger.warning("Geocoding failed for '%s': %s", query, e)
|
||||
return None
|
||||
134
backend/core/pipeline.py
Normal file
134
backend/core/pipeline.py
Normal file
@@ -0,0 +1,134 @@
|
||||
"""Data pipeline: process pending videos end-to-end.
|
||||
|
||||
For each pending video:
|
||||
1. Fetch transcript
|
||||
2. Extract restaurant info via LLM
|
||||
3. Geocode each restaurant
|
||||
4. Save to DB + generate vector embeddings
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
|
||||
from core import youtube, extractor, geocoding, restaurant, vector
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def process_video(video: dict) -> int:
|
||||
"""Process a single pending video. Returns number of restaurants found."""
|
||||
video_db_id = video["id"]
|
||||
video_id = video["video_id"]
|
||||
title = video["title"]
|
||||
|
||||
logger.info("Processing video: %s (%s)", title, video_id)
|
||||
youtube.update_video_status(video_db_id, "processing")
|
||||
|
||||
try:
|
||||
# 1. Transcript
|
||||
transcript = youtube.get_transcript(video_id)
|
||||
if not transcript:
|
||||
logger.warning("No transcript for %s, marking done", video_id)
|
||||
youtube.update_video_status(video_db_id, "done")
|
||||
return 0
|
||||
|
||||
youtube.update_video_status(video_db_id, "processing", transcript)
|
||||
|
||||
# 2. LLM extraction
|
||||
restaurants, llm_raw = extractor.extract_restaurants(title, transcript)
|
||||
if not restaurants:
|
||||
logger.info("No restaurants found in %s", video_id)
|
||||
youtube.update_video_status(video_db_id, "done", llm_raw=llm_raw)
|
||||
return 0
|
||||
|
||||
# 3-4. Geocode + save each restaurant
|
||||
count = 0
|
||||
for rest_data in restaurants:
|
||||
name = rest_data.get("name")
|
||||
if not name:
|
||||
continue
|
||||
|
||||
# Geocode
|
||||
geo = geocoding.geocode_restaurant(
|
||||
name,
|
||||
address=rest_data.get("address"),
|
||||
region=rest_data.get("region"),
|
||||
)
|
||||
|
||||
lat = geo["latitude"] if geo else None
|
||||
lng = geo["longitude"] if geo else None
|
||||
addr = geo["formatted_address"] if geo else rest_data.get("address")
|
||||
place_id = geo["google_place_id"] if geo else None
|
||||
|
||||
# Upsert restaurant
|
||||
rest_id = restaurant.upsert(
|
||||
name=name,
|
||||
address=addr,
|
||||
region=rest_data.get("region"),
|
||||
latitude=lat,
|
||||
longitude=lng,
|
||||
cuisine_type=rest_data.get("cuisine_type"),
|
||||
price_range=rest_data.get("price_range"),
|
||||
google_place_id=place_id,
|
||||
)
|
||||
|
||||
# Link video <-> restaurant
|
||||
restaurant.link_video_restaurant(
|
||||
video_db_id=video_db_id,
|
||||
restaurant_id=rest_id,
|
||||
foods=rest_data.get("foods_mentioned"),
|
||||
evaluation=rest_data.get("evaluation"),
|
||||
guests=rest_data.get("guests"),
|
||||
)
|
||||
|
||||
# Vector embeddings
|
||||
chunks = _build_chunks(name, rest_data, title)
|
||||
if chunks:
|
||||
vector.save_restaurant_vectors(rest_id, chunks)
|
||||
|
||||
count += 1
|
||||
logger.info("Saved restaurant: %s (geocoded=%s)", name, bool(geo))
|
||||
|
||||
youtube.update_video_status(video_db_id, "done", llm_raw=llm_raw)
|
||||
logger.info("Video %s done: %d restaurants", video_id, count)
|
||||
return count
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Pipeline error for %s: %s", video_id, e, exc_info=True)
|
||||
youtube.update_video_status(video_db_id, "error")
|
||||
return 0
|
||||
|
||||
|
||||
def _build_chunks(name: str, data: dict, video_title: str) -> list[str]:
|
||||
"""Build text chunks for vector embedding."""
|
||||
parts = [f"식당: {name}"]
|
||||
if data.get("region"):
|
||||
parts.append(f"지역: {data['region']}")
|
||||
if data.get("cuisine_type"):
|
||||
parts.append(f"음식 종류: {data['cuisine_type']}")
|
||||
if data.get("foods_mentioned"):
|
||||
foods = data["foods_mentioned"]
|
||||
if isinstance(foods, list):
|
||||
parts.append(f"메뉴: {', '.join(foods)}")
|
||||
if data.get("evaluation"):
|
||||
parts.append(f"평가: {data['evaluation']}")
|
||||
if data.get("price_range"):
|
||||
parts.append(f"가격대: {data['price_range']}")
|
||||
parts.append(f"영상: {video_title}")
|
||||
|
||||
return ["\n".join(parts)]
|
||||
|
||||
|
||||
def process_pending(limit: int = 5) -> int:
|
||||
"""Process up to `limit` pending videos. Returns total restaurants found."""
|
||||
videos = youtube.get_pending_videos(limit)
|
||||
if not videos:
|
||||
logger.info("No pending videos")
|
||||
return 0
|
||||
|
||||
total = 0
|
||||
for v in videos:
|
||||
total += process_video(v)
|
||||
return total
|
||||
205
backend/core/restaurant.py
Normal file
205
backend/core/restaurant.py
Normal file
@@ -0,0 +1,205 @@
|
||||
"""Restaurant DB operations — save extracted data, link to videos."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
|
||||
import oracledb
|
||||
|
||||
from core.db import conn
|
||||
|
||||
|
||||
def find_by_name(name: str) -> dict | None:
|
||||
"""Find a restaurant by exact name match."""
|
||||
sql = "SELECT id, name, address, region, latitude, longitude FROM restaurants WHERE name = :n"
|
||||
with conn() as c:
|
||||
cur = c.cursor()
|
||||
cur.execute(sql, {"n": name})
|
||||
r = cur.fetchone()
|
||||
if r:
|
||||
return {
|
||||
"id": r[0], "name": r[1], "address": r[2],
|
||||
"region": r[3], "latitude": r[4], "longitude": r[5],
|
||||
}
|
||||
return None
|
||||
|
||||
|
||||
def upsert(
|
||||
name: str,
|
||||
address: str | None = None,
|
||||
region: str | None = None,
|
||||
latitude: float | None = None,
|
||||
longitude: float | None = None,
|
||||
cuisine_type: str | None = None,
|
||||
price_range: str | None = None,
|
||||
google_place_id: str | None = None,
|
||||
) -> str:
|
||||
"""Insert or update a restaurant. Returns row id."""
|
||||
existing = find_by_name(name)
|
||||
if existing:
|
||||
sql = """
|
||||
UPDATE restaurants
|
||||
SET address = COALESCE(:addr, address),
|
||||
region = COALESCE(:reg, region),
|
||||
latitude = COALESCE(:lat, latitude),
|
||||
longitude = COALESCE(:lng, longitude),
|
||||
cuisine_type = COALESCE(:cuisine, cuisine_type),
|
||||
price_range = COALESCE(:price, price_range),
|
||||
google_place_id = COALESCE(:gid, google_place_id),
|
||||
updated_at = SYSTIMESTAMP
|
||||
WHERE id = :id
|
||||
"""
|
||||
with conn() as c:
|
||||
c.cursor().execute(sql, {
|
||||
"addr": address, "reg": region,
|
||||
"lat": latitude, "lng": longitude,
|
||||
"cuisine": cuisine_type, "price": price_range,
|
||||
"gid": google_place_id, "id": existing["id"],
|
||||
})
|
||||
return existing["id"]
|
||||
|
||||
sql = """
|
||||
INSERT INTO restaurants (name, address, region, latitude, longitude,
|
||||
cuisine_type, price_range, google_place_id)
|
||||
VALUES (:name, :addr, :reg, :lat, :lng, :cuisine, :price, :gid)
|
||||
RETURNING id INTO :out_id
|
||||
"""
|
||||
with conn() as c:
|
||||
cur = c.cursor()
|
||||
out_id = cur.var(oracledb.STRING)
|
||||
cur.execute(sql, {
|
||||
"name": name, "addr": address, "reg": region,
|
||||
"lat": latitude, "lng": longitude,
|
||||
"cuisine": cuisine_type, "price": price_range,
|
||||
"gid": google_place_id, "out_id": out_id,
|
||||
})
|
||||
return out_id.getvalue()[0]
|
||||
|
||||
|
||||
def link_video_restaurant(
|
||||
video_db_id: str,
|
||||
restaurant_id: str,
|
||||
foods: list[str] | None = None,
|
||||
evaluation: str | None = None,
|
||||
guests: list[str] | None = None,
|
||||
citation: str | None = None,
|
||||
) -> str | None:
|
||||
"""Create video-restaurant mapping. Returns row id or None if duplicate."""
|
||||
sql = """
|
||||
INSERT INTO video_restaurants
|
||||
(video_id, restaurant_id, foods_mentioned, evaluation, guests, citation_text)
|
||||
VALUES (:vid, :rid, :foods, :eval, :guests, :cite)
|
||||
RETURNING id INTO :out_id
|
||||
"""
|
||||
with conn() as c:
|
||||
cur = c.cursor()
|
||||
out_id = cur.var(oracledb.STRING)
|
||||
try:
|
||||
cur.execute(sql, {
|
||||
"vid": video_db_id,
|
||||
"rid": restaurant_id,
|
||||
"foods": json.dumps(foods or [], ensure_ascii=False),
|
||||
"eval": json.dumps({"text": evaluation} if evaluation else {}, ensure_ascii=False),
|
||||
"guests": json.dumps(guests or [], ensure_ascii=False),
|
||||
"cite": citation,
|
||||
"out_id": out_id,
|
||||
})
|
||||
return out_id.getvalue()[0]
|
||||
except Exception as e:
|
||||
if "UQ_VR_VIDEO_REST" in str(e).upper():
|
||||
return None
|
||||
raise
|
||||
|
||||
|
||||
def get_all(
|
||||
limit: int = 100,
|
||||
offset: int = 0,
|
||||
cuisine: str | None = None,
|
||||
region: str | None = None,
|
||||
) -> list[dict]:
|
||||
"""List restaurants with optional filters."""
|
||||
conditions = ["latitude IS NOT NULL"]
|
||||
params: dict = {"lim": limit, "off": offset}
|
||||
|
||||
if cuisine:
|
||||
conditions.append("cuisine_type = :cuisine")
|
||||
params["cuisine"] = cuisine
|
||||
if region:
|
||||
conditions.append("region LIKE :region")
|
||||
params["region"] = f"%{region}%"
|
||||
|
||||
where = " AND ".join(conditions)
|
||||
sql = f"""
|
||||
SELECT id, name, address, region, latitude, longitude,
|
||||
cuisine_type, price_range, google_place_id
|
||||
FROM restaurants
|
||||
WHERE {where}
|
||||
ORDER BY updated_at DESC
|
||||
OFFSET :off ROWS FETCH NEXT :lim ROWS ONLY
|
||||
"""
|
||||
with conn() as c:
|
||||
cur = c.cursor()
|
||||
cur.execute(sql, params)
|
||||
cols = [d[0].lower() for d in cur.description]
|
||||
return [dict(zip(cols, row)) for row in cur.fetchall()]
|
||||
|
||||
|
||||
def get_by_id(restaurant_id: str) -> dict | None:
|
||||
sql = """
|
||||
SELECT r.id, r.name, r.address, r.region, r.latitude, r.longitude,
|
||||
r.cuisine_type, r.price_range, r.phone, r.website, r.google_place_id
|
||||
FROM restaurants r
|
||||
WHERE r.id = :id
|
||||
"""
|
||||
with conn() as c:
|
||||
cur = c.cursor()
|
||||
cur.execute(sql, {"id": restaurant_id})
|
||||
row = cur.fetchone()
|
||||
if not row:
|
||||
return None
|
||||
cols = [d[0].lower() for d in cur.description]
|
||||
return dict(zip(cols, row))
|
||||
|
||||
|
||||
def get_video_links(restaurant_id: str) -> list[dict]:
|
||||
"""Get all video appearances for a restaurant."""
|
||||
sql = """
|
||||
SELECT v.video_id, v.title, v.url, v.published_at,
|
||||
vr.foods_mentioned, vr.evaluation, vr.guests
|
||||
FROM video_restaurants vr
|
||||
JOIN videos v ON v.id = vr.video_id
|
||||
WHERE vr.restaurant_id = :rid
|
||||
ORDER BY v.published_at DESC
|
||||
"""
|
||||
with conn() as c:
|
||||
cur = c.cursor()
|
||||
cur.execute(sql, {"rid": restaurant_id})
|
||||
results = []
|
||||
for r in cur.fetchall():
|
||||
foods_raw = r[4].read() if hasattr(r[4], "read") else r[4]
|
||||
eval_raw = r[5].read() if hasattr(r[5], "read") else r[5]
|
||||
guests_raw = r[6].read() if hasattr(r[6], "read") else r[6]
|
||||
results.append({
|
||||
"video_id": r[0],
|
||||
"title": r[1],
|
||||
"url": r[2],
|
||||
"published_at": r[3].isoformat() if r[3] else None,
|
||||
"foods_mentioned": _parse_json_field(foods_raw, []),
|
||||
"evaluation": _parse_json_field(eval_raw, {}),
|
||||
"guests": _parse_json_field(guests_raw, []),
|
||||
})
|
||||
return results
|
||||
|
||||
|
||||
def _parse_json_field(val, default):
|
||||
"""Parse a JSON field that may be a string, already-parsed object, or None."""
|
||||
if val is None:
|
||||
return default
|
||||
if isinstance(val, (list, dict)):
|
||||
return val
|
||||
if isinstance(val, str):
|
||||
try:
|
||||
return json.loads(val)
|
||||
except (json.JSONDecodeError, ValueError):
|
||||
return default
|
||||
return default
|
||||
189
backend/core/review.py
Normal file
189
backend/core/review.py
Normal file
@@ -0,0 +1,189 @@
|
||||
"""User review DB operations."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import date
|
||||
|
||||
import oracledb
|
||||
|
||||
from core.db import conn
|
||||
|
||||
|
||||
def create_review(
|
||||
user_id: str,
|
||||
restaurant_id: str,
|
||||
rating: float,
|
||||
review_text: str | None = None,
|
||||
visited_at: date | None = None,
|
||||
) -> dict:
|
||||
"""Create a new review. Returns the created review dict."""
|
||||
sql = """
|
||||
INSERT INTO user_reviews (user_id, restaurant_id, rating, review_text, visited_at)
|
||||
VALUES (:user_id, :restaurant_id, :rating, :review_text, :visited_at)
|
||||
RETURNING id INTO :out_id
|
||||
"""
|
||||
with conn() as c:
|
||||
cur = c.cursor()
|
||||
out_id = cur.var(oracledb.STRING)
|
||||
cur.execute(sql, {
|
||||
"user_id": user_id,
|
||||
"restaurant_id": restaurant_id,
|
||||
"rating": rating,
|
||||
"review_text": review_text,
|
||||
"visited_at": visited_at,
|
||||
"out_id": out_id,
|
||||
})
|
||||
new_id = out_id.getvalue()[0]
|
||||
|
||||
return get_review_by_id(new_id)
|
||||
|
||||
|
||||
def update_review(
|
||||
review_id: str,
|
||||
user_id: str,
|
||||
rating: float | None = None,
|
||||
review_text: str | None = None,
|
||||
visited_at: date | None = None,
|
||||
) -> dict:
|
||||
"""Update an existing review. Only the owner can update.
|
||||
|
||||
Returns the updated review dict, or None if not found / not owner.
|
||||
"""
|
||||
sql = """
|
||||
UPDATE user_reviews
|
||||
SET rating = COALESCE(:rating, rating),
|
||||
review_text = COALESCE(:review_text, review_text),
|
||||
visited_at = COALESCE(:visited_at, visited_at),
|
||||
updated_at = SYSTIMESTAMP
|
||||
WHERE id = :id AND user_id = :user_id
|
||||
"""
|
||||
with conn() as c:
|
||||
cur = c.cursor()
|
||||
cur.execute(sql, {
|
||||
"rating": rating,
|
||||
"review_text": review_text,
|
||||
"visited_at": visited_at,
|
||||
"id": review_id,
|
||||
"user_id": user_id,
|
||||
})
|
||||
if cur.rowcount == 0:
|
||||
return None
|
||||
|
||||
return get_review_by_id(review_id)
|
||||
|
||||
|
||||
def delete_review(review_id: str, user_id: str) -> bool:
|
||||
"""Delete a review. Only the owner can delete. Returns True if deleted."""
|
||||
sql = "DELETE FROM user_reviews WHERE id = :id AND user_id = :user_id"
|
||||
with conn() as c:
|
||||
cur = c.cursor()
|
||||
cur.execute(sql, {"id": review_id, "user_id": user_id})
|
||||
return cur.rowcount > 0
|
||||
|
||||
|
||||
def get_review_by_id(review_id: str) -> dict | None:
|
||||
"""Get a single review by ID."""
|
||||
sql = """
|
||||
SELECT r.id, r.user_id, r.restaurant_id, r.rating, r.review_text,
|
||||
r.visited_at, r.created_at, r.updated_at,
|
||||
u.nickname, u.avatar_url
|
||||
FROM user_reviews r
|
||||
JOIN tasteby_users u ON u.id = r.user_id
|
||||
WHERE r.id = :id
|
||||
"""
|
||||
with conn() as c:
|
||||
cur = c.cursor()
|
||||
cur.execute(sql, {"id": review_id})
|
||||
row = cur.fetchone()
|
||||
if not row:
|
||||
return None
|
||||
return _row_to_dict(row)
|
||||
|
||||
|
||||
def get_reviews_for_restaurant(
|
||||
restaurant_id: str,
|
||||
limit: int = 20,
|
||||
offset: int = 0,
|
||||
) -> list[dict]:
|
||||
"""List reviews for a restaurant, including user nickname/avatar."""
|
||||
sql = """
|
||||
SELECT r.id, r.user_id, r.restaurant_id, r.rating, r.review_text,
|
||||
r.visited_at, r.created_at, r.updated_at,
|
||||
u.nickname, u.avatar_url
|
||||
FROM user_reviews r
|
||||
JOIN tasteby_users u ON u.id = r.user_id
|
||||
WHERE r.restaurant_id = :restaurant_id
|
||||
ORDER BY r.created_at DESC
|
||||
OFFSET :off ROWS FETCH NEXT :lim ROWS ONLY
|
||||
"""
|
||||
with conn() as c:
|
||||
cur = c.cursor()
|
||||
cur.execute(sql, {
|
||||
"restaurant_id": restaurant_id,
|
||||
"off": offset,
|
||||
"lim": limit,
|
||||
})
|
||||
return [_row_to_dict(row) for row in cur.fetchall()]
|
||||
|
||||
|
||||
def get_user_reviews(
|
||||
user_id: str,
|
||||
limit: int = 20,
|
||||
offset: int = 0,
|
||||
) -> list[dict]:
|
||||
"""List reviews by a specific user."""
|
||||
sql = """
|
||||
SELECT r.id, r.user_id, r.restaurant_id, r.rating, r.review_text,
|
||||
r.visited_at, r.created_at, r.updated_at,
|
||||
u.nickname, u.avatar_url
|
||||
FROM user_reviews r
|
||||
JOIN tasteby_users u ON u.id = r.user_id
|
||||
WHERE r.user_id = :user_id
|
||||
ORDER BY r.created_at DESC
|
||||
OFFSET :off ROWS FETCH NEXT :lim ROWS ONLY
|
||||
"""
|
||||
with conn() as c:
|
||||
cur = c.cursor()
|
||||
cur.execute(sql, {
|
||||
"user_id": user_id,
|
||||
"off": offset,
|
||||
"lim": limit,
|
||||
})
|
||||
return [_row_to_dict(row) for row in cur.fetchall()]
|
||||
|
||||
|
||||
def get_restaurant_avg_rating(restaurant_id: str) -> dict:
|
||||
"""Get average rating and review count for a restaurant."""
|
||||
sql = """
|
||||
SELECT ROUND(AVG(rating), 1) AS avg_rating, COUNT(*) AS review_count
|
||||
FROM user_reviews
|
||||
WHERE restaurant_id = :restaurant_id
|
||||
"""
|
||||
with conn() as c:
|
||||
cur = c.cursor()
|
||||
cur.execute(sql, {"restaurant_id": restaurant_id})
|
||||
row = cur.fetchone()
|
||||
return {
|
||||
"avg_rating": float(row[0]) if row[0] else None,
|
||||
"review_count": int(row[1]),
|
||||
}
|
||||
|
||||
|
||||
def _row_to_dict(row) -> dict:
|
||||
"""Convert a review query row to a dict."""
|
||||
review_text = row[4]
|
||||
if hasattr(review_text, "read"):
|
||||
review_text = review_text.read()
|
||||
|
||||
return {
|
||||
"id": row[0],
|
||||
"user_id": row[1],
|
||||
"restaurant_id": row[2],
|
||||
"rating": float(row[3]),
|
||||
"review_text": review_text,
|
||||
"visited_at": row[5].isoformat() if row[5] else None,
|
||||
"created_at": row[6].isoformat() if row[6] else None,
|
||||
"updated_at": row[7].isoformat() if row[7] else None,
|
||||
"user_nickname": row[8],
|
||||
"user_avatar_url": row[9],
|
||||
}
|
||||
97
backend/core/vector.py
Normal file
97
backend/core/vector.py
Normal file
@@ -0,0 +1,97 @@
|
||||
"""Vector embedding generation and storage for restaurant semantic search."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import array
|
||||
import os
|
||||
|
||||
import oci
|
||||
from oci.generative_ai_inference import GenerativeAiInferenceClient
|
||||
from oci.generative_ai_inference.models import (
|
||||
EmbedTextDetails,
|
||||
OnDemandServingMode,
|
||||
)
|
||||
|
||||
from core.db import conn
|
||||
|
||||
|
||||
def _embed_texts(texts: list[str]) -> list[list[float]]:
|
||||
config = oci.config.from_file()
|
||||
client = GenerativeAiInferenceClient(
|
||||
config,
|
||||
service_endpoint=os.environ["OCI_GENAI_ENDPOINT"],
|
||||
)
|
||||
model_id = os.environ.get("OCI_EMBED_MODEL_ID", "cohere.embed-v4.0")
|
||||
compartment_id = os.environ["OCI_COMPARTMENT_ID"]
|
||||
|
||||
details = EmbedTextDetails(
|
||||
inputs=texts,
|
||||
serving_mode=OnDemandServingMode(model_id=model_id),
|
||||
compartment_id=compartment_id,
|
||||
input_type="SEARCH_DOCUMENT",
|
||||
)
|
||||
response = client.embed_text(details)
|
||||
return response.data.embeddings
|
||||
|
||||
|
||||
def _to_vec(embedding: list[float]) -> array.array:
|
||||
return array.array("f", embedding)
|
||||
|
||||
|
||||
def save_restaurant_vectors(restaurant_id: str, chunks: list[str]) -> list[str]:
|
||||
"""Embed and store text chunks for a restaurant.
|
||||
|
||||
Returns list of inserted row IDs.
|
||||
"""
|
||||
if not chunks:
|
||||
return []
|
||||
|
||||
embeddings = _embed_texts(chunks)
|
||||
inserted: list[str] = []
|
||||
|
||||
sql = """
|
||||
INSERT INTO restaurant_vectors (restaurant_id, chunk_text, embedding)
|
||||
VALUES (:rid, :chunk, :emb)
|
||||
RETURNING id INTO :out_id
|
||||
"""
|
||||
import oracledb
|
||||
with conn() as c:
|
||||
cur = c.cursor()
|
||||
for chunk, emb in zip(chunks, embeddings):
|
||||
out_id = cur.var(oracledb.STRING)
|
||||
cur.execute(sql, {
|
||||
"rid": restaurant_id,
|
||||
"chunk": chunk,
|
||||
"emb": _to_vec(emb),
|
||||
"out_id": out_id,
|
||||
})
|
||||
inserted.append(out_id.getvalue()[0])
|
||||
return inserted
|
||||
|
||||
|
||||
def search_similar(query: str, top_k: int = 10) -> list[dict]:
|
||||
"""Semantic search: find restaurants similar to query text.
|
||||
|
||||
Returns list of dicts: restaurant_id, chunk_text, distance.
|
||||
"""
|
||||
embeddings = _embed_texts([query])
|
||||
query_vec = _to_vec(embeddings[0])
|
||||
|
||||
sql = """
|
||||
SELECT rv.restaurant_id, rv.chunk_text,
|
||||
VECTOR_DISTANCE(rv.embedding, :qvec, COSINE) AS dist
|
||||
FROM restaurant_vectors rv
|
||||
ORDER BY dist
|
||||
FETCH FIRST :k ROWS ONLY
|
||||
"""
|
||||
with conn() as c:
|
||||
cur = c.cursor()
|
||||
cur.execute(sql, {"qvec": query_vec, "k": top_k})
|
||||
return [
|
||||
{
|
||||
"restaurant_id": r[0],
|
||||
"chunk_text": r[1].read() if hasattr(r[1], "read") else r[1],
|
||||
"distance": r[2],
|
||||
}
|
||||
for r in cur.fetchall()
|
||||
]
|
||||
221
backend/core/youtube.py
Normal file
221
backend/core/youtube.py
Normal file
@@ -0,0 +1,221 @@
|
||||
"""YouTube channel scanner + transcript extraction.
|
||||
|
||||
Uses YouTube Data API v3 for channel video listing,
|
||||
youtube-transcript-api for transcript extraction.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from datetime import datetime
|
||||
|
||||
import httpx
|
||||
from youtube_transcript_api import YouTubeTranscriptApi
|
||||
|
||||
from core.db import conn
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _api_key() -> str:
|
||||
return os.environ["YOUTUBE_DATA_API_KEY"]
|
||||
|
||||
|
||||
def extract_video_id(url: str) -> str:
|
||||
match = re.search(r"(?:v=|youtu\.be/)([^&?/\s]+)", url)
|
||||
if not match:
|
||||
raise ValueError(f"Cannot extract video ID from URL: {url}")
|
||||
return match.group(1)
|
||||
|
||||
|
||||
# -- Channel operations -------------------------------------------------------
|
||||
|
||||
def add_channel(channel_id: str, channel_name: str) -> str:
|
||||
"""Register a YouTube channel. Returns DB row id."""
|
||||
sql = """
|
||||
INSERT INTO channels (channel_id, channel_name, channel_url)
|
||||
VALUES (:cid, :cname, :curl)
|
||||
RETURNING id INTO :out_id
|
||||
"""
|
||||
with conn() as c:
|
||||
cur = c.cursor()
|
||||
import oracledb
|
||||
out_id = cur.var(oracledb.STRING)
|
||||
cur.execute(sql, {
|
||||
"cid": channel_id,
|
||||
"cname": channel_name,
|
||||
"curl": f"https://www.youtube.com/channel/{channel_id}",
|
||||
"out_id": out_id,
|
||||
})
|
||||
return out_id.getvalue()[0]
|
||||
|
||||
|
||||
def get_active_channels() -> list[dict]:
|
||||
sql = "SELECT id, channel_id, channel_name FROM channels WHERE is_active = 1"
|
||||
with conn() as c:
|
||||
cur = c.cursor()
|
||||
cur.execute(sql)
|
||||
return [
|
||||
{"id": r[0], "channel_id": r[1], "channel_name": r[2]}
|
||||
for r in cur.fetchall()
|
||||
]
|
||||
|
||||
|
||||
# -- Video listing via YouTube Data API v3 ------------------------------------
|
||||
|
||||
def fetch_channel_videos(
|
||||
channel_id: str,
|
||||
max_results: int = 50,
|
||||
published_after: str | None = None,
|
||||
) -> list[dict]:
|
||||
"""Fetch video list from a YouTube channel via Data API v3.
|
||||
|
||||
Returns list of dicts: video_id, title, published_at, url.
|
||||
"""
|
||||
params: dict = {
|
||||
"key": _api_key(),
|
||||
"channelId": channel_id,
|
||||
"part": "snippet",
|
||||
"order": "date",
|
||||
"maxResults": min(max_results, 50),
|
||||
"type": "video",
|
||||
}
|
||||
if published_after:
|
||||
params["publishedAfter"] = published_after
|
||||
|
||||
videos: list[dict] = []
|
||||
next_page = None
|
||||
|
||||
while True:
|
||||
if next_page:
|
||||
params["pageToken"] = next_page
|
||||
|
||||
r = httpx.get(
|
||||
"https://www.googleapis.com/youtube/v3/search",
|
||||
params=params,
|
||||
timeout=15,
|
||||
)
|
||||
r.raise_for_status()
|
||||
data = r.json()
|
||||
|
||||
for item in data.get("items", []):
|
||||
snippet = item["snippet"]
|
||||
vid = item["id"]["videoId"]
|
||||
videos.append({
|
||||
"video_id": vid,
|
||||
"title": snippet["title"],
|
||||
"published_at": snippet["publishedAt"],
|
||||
"url": f"https://www.youtube.com/watch?v={vid}",
|
||||
})
|
||||
|
||||
next_page = data.get("nextPageToken")
|
||||
if not next_page or len(videos) >= max_results:
|
||||
break
|
||||
|
||||
return videos[:max_results]
|
||||
|
||||
|
||||
# -- Transcript extraction ----------------------------------------------------
|
||||
|
||||
def get_transcript(video_id: str) -> str | None:
|
||||
"""Fetch transcript text for a video. Returns None if unavailable."""
|
||||
try:
|
||||
fetched = YouTubeTranscriptApi().fetch(video_id, languages=["ko", "en"])
|
||||
return " ".join(seg.text for seg in fetched)
|
||||
except Exception as e:
|
||||
logger.warning("Transcript unavailable for %s: %s", video_id, e)
|
||||
return None
|
||||
|
||||
|
||||
# -- DB operations for videos -------------------------------------------------
|
||||
|
||||
def save_video(channel_db_id: str, video: dict) -> str | None:
|
||||
"""Insert a video row if not exists. Returns row id or None if duplicate."""
|
||||
sql = """
|
||||
INSERT INTO videos (channel_id, video_id, title, url, published_at, status)
|
||||
VALUES (:ch_id, :vid, :title, :url, :pub_at, 'pending')
|
||||
RETURNING id INTO :out_id
|
||||
"""
|
||||
with conn() as c:
|
||||
cur = c.cursor()
|
||||
import oracledb
|
||||
out_id = cur.var(oracledb.STRING)
|
||||
try:
|
||||
pub_at = None
|
||||
if video.get("published_at"):
|
||||
pub_at = datetime.fromisoformat(
|
||||
video["published_at"].replace("Z", "+00:00")
|
||||
)
|
||||
cur.execute(sql, {
|
||||
"ch_id": channel_db_id,
|
||||
"vid": video["video_id"],
|
||||
"title": video["title"],
|
||||
"url": video["url"],
|
||||
"pub_at": pub_at,
|
||||
"out_id": out_id,
|
||||
})
|
||||
return out_id.getvalue()[0]
|
||||
except Exception as e:
|
||||
if "UQ_VIDEOS_VID" in str(e).upper():
|
||||
return None # duplicate
|
||||
raise
|
||||
|
||||
|
||||
def get_pending_videos(limit: int = 10) -> list[dict]:
|
||||
sql = """
|
||||
SELECT id, video_id, title, url
|
||||
FROM videos
|
||||
WHERE status = 'pending'
|
||||
ORDER BY created_at
|
||||
FETCH FIRST :n ROWS ONLY
|
||||
"""
|
||||
with conn() as c:
|
||||
cur = c.cursor()
|
||||
cur.execute(sql, {"n": limit})
|
||||
return [
|
||||
{"id": r[0], "video_id": r[1], "title": r[2], "url": r[3]}
|
||||
for r in cur.fetchall()
|
||||
]
|
||||
|
||||
|
||||
def update_video_status(
|
||||
video_db_id: str,
|
||||
status: str,
|
||||
transcript: str | None = None,
|
||||
llm_raw: str | None = None,
|
||||
) -> None:
|
||||
sets = ["status = :st", "processed_at = SYSTIMESTAMP"]
|
||||
params: dict = {"st": status, "vid": video_db_id}
|
||||
if transcript:
|
||||
sets.append("transcript_text = :txt")
|
||||
params["txt"] = transcript
|
||||
if llm_raw:
|
||||
sets.append("llm_raw_response = :llm_resp")
|
||||
params["llm_resp"] = llm_raw
|
||||
sql = f"UPDATE videos SET {', '.join(sets)} WHERE id = :vid"
|
||||
with conn() as c:
|
||||
c.cursor().execute(sql, params)
|
||||
|
||||
|
||||
# -- Scan: fetch new videos for all active channels ---------------------------
|
||||
|
||||
def scan_all_channels(max_per_channel: int = 50) -> int:
|
||||
"""Scan all active channels for new videos. Returns count of new videos."""
|
||||
channels = get_active_channels()
|
||||
total_new = 0
|
||||
for ch in channels:
|
||||
try:
|
||||
videos = fetch_channel_videos(ch["channel_id"], max_per_channel)
|
||||
for v in videos:
|
||||
row_id = save_video(ch["id"], v)
|
||||
if row_id:
|
||||
total_new += 1
|
||||
logger.info(
|
||||
"Channel %s: fetched %d videos, %d new",
|
||||
ch["channel_name"], len(videos), total_new,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error("Failed to scan channel %s: %s", ch["channel_name"], e)
|
||||
return total_new
|
||||
Reference in New Issue
Block a user