Add admin features, responsive UI, user reviews, visit stats, and channel-colored markers

- Admin: video management with Google Maps match status, manual restaurant mapping, restaurant remap on name change - Admin: user management tab with favorites/reviews detail - Admin: channel deletion fix for IDs with slashes - Frontend: responsive mobile layout (map top, list bottom, 2-row header) - Frontend: channel-colored map markers with legend - Frontend: my reviews list, favorites toggle, visit counter overlay - Frontend: force light mode for dark theme devices - Backend: visit tracking (site_visits table), user reviews endpoint - Backend: bulk transcript/extract streaming, geocode key fixes - Nginx config for production deployment Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-07 14:52:20 +09:00
parent 36bec10bd0
commit 3694730501
27 changed files with 4346 additions and 189 deletions
--- a/backend/core/extractor.py
+++ b/backend/core/extractor.py
@@ -56,7 +56,7 @@ def _parse_json(raw: str) -> dict | list:
        return json.JSONDecoder(strict=False).decode(raw)
    except json.JSONDecodeError:
        pass
-    # recover truncated array
+    # recover truncated array — extract complete objects one by one
    if raw.lstrip().startswith("["):
        decoder = json.JSONDecoder(strict=False)
        items: list = []
@@ -71,8 +71,19 @@ def _parse_json(raw: str) -> dict | list:
                items.append(obj)
                idx = end
            except json.JSONDecodeError:
+                # Try to recover truncated last object by closing braces
+                remainder = raw[idx:]
+                for fix in ["}", "}]", '"}', '"}'  , '"}]', "null}",  "null}]"]:
+                    try:
+                        patched = remainder.rstrip().rstrip(",") + fix
+                        obj = json.loads(patched)
+                        if isinstance(obj, dict) and obj.get("name"):
+                            items.append(obj)
+                    except (json.JSONDecodeError, ValueError):
+                        continue
                break
        if items:
+            logger.info("Recovered %d restaurants from truncated JSON", len(items))
            return items
    raise ValueError(f"JSON parse failed: {raw[:80]!r}")

@@ -104,7 +115,7 @@ _EXTRACT_PROMPT = """\
 JSON 배열:"""


-def extract_restaurants(title: str, transcript: str) -> tuple[list[dict], str]:
+def extract_restaurants(title: str, transcript: str, custom_prompt: str | None = None) -> tuple[list[dict], str]:
    """Extract restaurant info from a video transcript using LLM.

    Returns (list of restaurant dicts, raw LLM response text).
@@ -113,10 +124,11 @@ def extract_restaurants(title: str, transcript: str) -> tuple[list[dict], str]:
    if len(transcript) > 8000:
        transcript = transcript[:7000] + "\n...(중략)...\n" + transcript[-1000:]

-    prompt = _EXTRACT_PROMPT.format(title=title, transcript=transcript)
+    template = custom_prompt if custom_prompt else _EXTRACT_PROMPT
+    prompt = template.format(title=title, transcript=transcript)

    try:
-        raw = _llm(prompt, max_tokens=4096)
+        raw = _llm(prompt, max_tokens=8192)
        result = _parse_json(raw)
        if isinstance(result, list):
            return result, raw
--- a/backend/core/geocoding.py
+++ b/backend/core/geocoding.py
@@ -57,17 +57,53 @@ def _places_text_search(query: str) -> dict | None:
        if data.get("status") == "OK" and data.get("results"):
            place = data["results"][0]
            loc = place["geometry"]["location"]
-            return {
+            result = {
                "latitude": loc["lat"],
                "longitude": loc["lng"],
                "formatted_address": place.get("formatted_address", ""),
                "google_place_id": place.get("place_id", ""),
+                "business_status": place.get("business_status"),
+                "rating": place.get("rating"),
+                "rating_count": place.get("user_ratings_total"),
            }
+            # Fetch phone/website from Place Details
+            place_id = place.get("place_id")
+            if place_id:
+                details = _place_details(place_id)
+                if details:
+                    result.update(details)
+            return result
    except Exception as e:
        logger.warning("Places text search failed for '%s': %s", query, e)
    return None


+def _place_details(place_id: str) -> dict | None:
+    """Fetch phone and website from Google Place Details API."""
+    try:
+        r = httpx.get(
+            "https://maps.googleapis.com/maps/api/place/details/json",
+            params={
+                "place_id": place_id,
+                "key": _api_key(),
+                "language": "ko",
+                "fields": "formatted_phone_number,website",
+            },
+            timeout=10,
+        )
+        r.raise_for_status()
+        data = r.json()
+        if data.get("status") == "OK" and data.get("result"):
+            res = data["result"]
+            return {
+                "phone": res.get("formatted_phone_number"),
+                "website": res.get("website"),
+            }
+    except Exception as e:
+        logger.warning("Place details failed for '%s': %s", place_id, e)
+    return None
+
+
 def _geocode(query: str) -> dict | None:
    """Geocode an address string."""
    try:
--- a/backend/core/pipeline.py
+++ b/backend/core/pipeline.py
@@ -28,7 +28,7 @@ def process_video(video: dict) -> int:

    try:
        # 1. Transcript
-        transcript = youtube.get_transcript(video_id)
+        transcript, _src = youtube.get_transcript(video_id)
        if not transcript:
            logger.warning("No transcript for %s, marking done", video_id)
            youtube.update_video_status(video_db_id, "done")
@@ -72,6 +72,11 @@ def process_video(video: dict) -> int:
                cuisine_type=rest_data.get("cuisine_type"),
                price_range=rest_data.get("price_range"),
                google_place_id=place_id,
+                phone=geo.get("phone") if geo else None,
+                website=geo.get("website") if geo else None,
+                business_status=geo.get("business_status") if geo else None,
+                rating=geo.get("rating") if geo else None,
+                rating_count=geo.get("rating_count") if geo else None,
            )

            # Link video <-> restaurant
@@ -101,6 +106,76 @@ def process_video(video: dict) -> int:
        return 0


+def process_video_extract(video: dict, transcript: str, custom_prompt: str | None = None) -> int:
+    """Run LLM extraction + geocode + save on an existing transcript.
+    Returns number of restaurants found."""
+    video_db_id = video["id"]
+    title = video["title"]
+
+    logger.info("Extracting restaurants from video: %s", title)
+
+    try:
+        restaurants, llm_raw = extractor.extract_restaurants(title, transcript, custom_prompt=custom_prompt)
+        if not restaurants:
+            youtube.update_video_status(video_db_id, "done", llm_raw=llm_raw)
+            return 0
+
+        count = 0
+        for rest_data in restaurants:
+            name = rest_data.get("name")
+            if not name:
+                continue
+
+            geo = geocoding.geocode_restaurant(
+                name,
+                address=rest_data.get("address"),
+                region=rest_data.get("region"),
+            )
+
+            lat = geo["latitude"] if geo else None
+            lng = geo["longitude"] if geo else None
+            addr = geo["formatted_address"] if geo else rest_data.get("address")
+            place_id = geo["google_place_id"] if geo else None
+
+            rest_id = restaurant.upsert(
+                name=name,
+                address=addr,
+                region=rest_data.get("region"),
+                latitude=lat,
+                longitude=lng,
+                cuisine_type=rest_data.get("cuisine_type"),
+                price_range=rest_data.get("price_range"),
+                google_place_id=place_id,
+                phone=geo.get("phone") if geo else None,
+                website=geo.get("website") if geo else None,
+                business_status=geo.get("business_status") if geo else None,
+                rating=geo.get("rating") if geo else None,
+                rating_count=geo.get("rating_count") if geo else None,
+            )
+
+            restaurant.link_video_restaurant(
+                video_db_id=video_db_id,
+                restaurant_id=rest_id,
+                foods=rest_data.get("foods_mentioned"),
+                evaluation=rest_data.get("evaluation"),
+                guests=rest_data.get("guests"),
+            )
+
+            chunks = _build_chunks(name, rest_data, title)
+            if chunks:
+                vector.save_restaurant_vectors(rest_id, chunks)
+
+            count += 1
+            logger.info("Saved restaurant: %s (geocoded=%s)", name, bool(geo))
+
+        youtube.update_video_status(video_db_id, "done", llm_raw=llm_raw)
+        return count
+
+    except Exception as e:
+        logger.error("Extract error for %s: %s", video["video_id"], e, exc_info=True)
+        return 0
+
+
 def _build_chunks(name: str, data: dict, video_title: str) -> list[str]:
    """Build text chunks for vector embedding."""
    parts = [f"식당: {name}"]
--- a/backend/core/restaurant.py
+++ b/backend/core/restaurant.py
@@ -9,6 +9,16 @@ import oracledb
 from core.db import conn


+def _truncate_bytes(val: str | None, max_bytes: int) -> str | None:
+    """Truncate a string to fit within max_bytes when encoded as UTF-8."""
+    if not val:
+        return val
+    encoded = val.encode("utf-8")
+    if len(encoded) <= max_bytes:
+        return val
+    return encoded[:max_bytes].decode("utf-8", errors="ignore").rstrip()
+
+
 def find_by_name(name: str) -> dict | None:
    """Find a restaurant by exact name match."""
    sql = "SELECT id, name, address, region, latitude, longitude FROM restaurants WHERE name = :n"
@@ -33,8 +43,19 @@ def upsert(
    cuisine_type: str | None = None,
    price_range: str | None = None,
    google_place_id: str | None = None,
+    phone: str | None = None,
+    website: str | None = None,
+    business_status: str | None = None,
+    rating: float | None = None,
+    rating_count: int | None = None,
 ) -> str:
    """Insert or update a restaurant. Returns row id."""
+    # Truncate fields to fit DB column byte limits (VARCHAR2 is byte-based)
+    price_range = _truncate_bytes(price_range, 50)
+    cuisine_type = _truncate_bytes(cuisine_type, 100)
+    region = _truncate_bytes(region, 100)
+    website = _truncate_bytes(website, 500)
+
    existing = find_by_name(name)
    if existing:
        sql = """
@@ -46,6 +67,11 @@ def upsert(
                cuisine_type = COALESCE(:cuisine, cuisine_type),
                price_range = COALESCE(:price, price_range),
                google_place_id = COALESCE(:gid, google_place_id),
+                phone = COALESCE(:phone, phone),
+                website = COALESCE(:web, website),
+                business_status = COALESCE(:bstatus, business_status),
+                rating = COALESCE(:rating, rating),
+                rating_count = COALESCE(:rcnt, rating_count),
                updated_at = SYSTIMESTAMP
            WHERE id = :id
        """
@@ -54,14 +80,18 @@ def upsert(
                "addr": address, "reg": region,
                "lat": latitude, "lng": longitude,
                "cuisine": cuisine_type, "price": price_range,
-                "gid": google_place_id, "id": existing["id"],
+                "gid": google_place_id, "phone": phone, "web": website,
+                "bstatus": business_status, "rating": rating, "rcnt": rating_count,
+                "id": existing["id"],
            })
        return existing["id"]

    sql = """
        INSERT INTO restaurants (name, address, region, latitude, longitude,
-                                 cuisine_type, price_range, google_place_id)
-        VALUES (:name, :addr, :reg, :lat, :lng, :cuisine, :price, :gid)
+                                 cuisine_type, price_range, google_place_id,
+                                 phone, website, business_status, rating, rating_count)
+        VALUES (:name, :addr, :reg, :lat, :lng, :cuisine, :price, :gid,
+                :phone, :web, :bstatus, :rating, :rcnt)
        RETURNING id INTO :out_id
    """
    with conn() as c:
@@ -71,7 +101,9 @@ def upsert(
            "name": name, "addr": address, "reg": region,
            "lat": latitude, "lng": longitude,
            "cuisine": cuisine_type, "price": price_range,
-            "gid": google_place_id, "out_id": out_id,
+            "gid": google_place_id, "phone": phone, "web": website,
+            "bstatus": business_status, "rating": rating, "rcnt": rating_count,
+            "out_id": out_id,
        })
        return out_id.getvalue()[0]

@@ -116,38 +148,83 @@ def get_all(
    offset: int = 0,
    cuisine: str | None = None,
    region: str | None = None,
+    channel: str | None = None,
 ) -> list[dict]:
    """List restaurants with optional filters."""
-    conditions = ["latitude IS NOT NULL"]
+    conditions = [
+        "r.latitude IS NOT NULL",
+        "EXISTS (SELECT 1 FROM video_restaurants vr0 WHERE vr0.restaurant_id = r.id)",
+    ]
    params: dict = {"lim": limit, "off": offset}

    if cuisine:
-        conditions.append("cuisine_type = :cuisine")
+        conditions.append("r.cuisine_type = :cuisine")
        params["cuisine"] = cuisine
    if region:
-        conditions.append("region LIKE :region")
+        conditions.append("r.region LIKE :region")
        params["region"] = f"%{region}%"

+    join_clause = ""
+    if channel:
+        join_clause = """
+            JOIN video_restaurants vr_f ON vr_f.restaurant_id = r.id
+            JOIN videos v_f ON v_f.id = vr_f.video_id
+            JOIN channels c_f ON c_f.id = v_f.channel_id
+        """
+        conditions.append("c_f.channel_name = :channel")
+        params["channel"] = channel
+
    where = " AND ".join(conditions)
    sql = f"""
-        SELECT id, name, address, region, latitude, longitude,
-               cuisine_type, price_range, google_place_id
-        FROM restaurants
+        SELECT DISTINCT r.id, r.name, r.address, r.region, r.latitude, r.longitude,
+               r.cuisine_type, r.price_range, r.google_place_id,
+               r.business_status, r.rating, r.rating_count, r.updated_at
+        FROM restaurants r
+        {join_clause}
        WHERE {where}
-        ORDER BY updated_at DESC
+        ORDER BY r.updated_at DESC
        OFFSET :off ROWS FETCH NEXT :lim ROWS ONLY
    """
    with conn() as c:
        cur = c.cursor()
        cur.execute(sql, params)
        cols = [d[0].lower() for d in cur.description]
-        return [dict(zip(cols, row)) for row in cur.fetchall()]
+        restaurants = [dict(zip(cols, row)) for row in cur.fetchall()]
+        for r in restaurants:
+            r.pop("updated_at", None)
+
+    if not restaurants:
+        return restaurants
+
+    # Attach channel names for each restaurant
+    ids = [r["id"] for r in restaurants]
+    placeholders = ", ".join(f":id{i}" for i in range(len(ids)))
+    ch_sql = f"""
+        SELECT DISTINCT vr.restaurant_id, c.channel_name
+        FROM video_restaurants vr
+        JOIN videos v ON v.id = vr.video_id
+        JOIN channels c ON c.id = v.channel_id
+        WHERE vr.restaurant_id IN ({placeholders})
+    """
+    ch_params = {f"id{i}": rid for i, rid in enumerate(ids)}
+    ch_map: dict[str, list[str]] = {}
+    with conn() as c:
+        cur = c.cursor()
+        cur.execute(ch_sql, ch_params)
+        for row in cur.fetchall():
+            ch_map.setdefault(row[0], []).append(row[1])
+
+    for r in restaurants:
+        r["channels"] = ch_map.get(r["id"], [])
+
+    return restaurants


 def get_by_id(restaurant_id: str) -> dict | None:
    sql = """
        SELECT r.id, r.name, r.address, r.region, r.latitude, r.longitude,
-               r.cuisine_type, r.price_range, r.phone, r.website, r.google_place_id
+               r.cuisine_type, r.price_range, r.phone, r.website, r.google_place_id,
+               r.business_status, r.rating, r.rating_count
        FROM restaurants r
        WHERE r.id = :id
    """
@@ -165,9 +242,11 @@ def get_video_links(restaurant_id: str) -> list[dict]:
    """Get all video appearances for a restaurant."""
    sql = """
        SELECT v.video_id, v.title, v.url, v.published_at,
-               vr.foods_mentioned, vr.evaluation, vr.guests
+               vr.foods_mentioned, vr.evaluation, vr.guests,
+               c.channel_name, c.channel_id
        FROM video_restaurants vr
        JOIN videos v ON v.id = vr.video_id
+        JOIN channels c ON c.id = v.channel_id
        WHERE vr.restaurant_id = :rid
        ORDER BY v.published_at DESC
    """
@@ -187,6 +266,8 @@ def get_video_links(restaurant_id: str) -> list[dict]:
                "foods_mentioned": _parse_json_field(foods_raw, []),
                "evaluation": _parse_json_field(eval_raw, {}),
                "guests": _parse_json_field(guests_raw, []),
+                "channel_name": r[7],
+                "channel_id": r[8],
            })
        return results

--- a/backend/core/review.py
+++ b/backend/core/review.py
@@ -131,13 +131,15 @@ def get_user_reviews(
    limit: int = 20,
    offset: int = 0,
 ) -> list[dict]:
-    """List reviews by a specific user."""
+    """List reviews by a specific user, including restaurant name."""
    sql = """
        SELECT r.id, r.user_id, r.restaurant_id, r.rating, r.review_text,
               r.visited_at, r.created_at, r.updated_at,
-               u.nickname, u.avatar_url
+               u.nickname, u.avatar_url,
+               rest.name AS restaurant_name
        FROM user_reviews r
        JOIN tasteby_users u ON u.id = r.user_id
+        LEFT JOIN restaurants rest ON rest.id = r.restaurant_id
        WHERE r.user_id = :user_id
        ORDER BY r.created_at DESC
        OFFSET :off ROWS FETCH NEXT :lim ROWS ONLY
@@ -149,7 +151,12 @@ def get_user_reviews(
            "off": offset,
            "lim": limit,
        })
-        return [_row_to_dict(row) for row in cur.fetchall()]
+        rows = []
+        for row in cur.fetchall():
+            d = _row_to_dict(row)
+            d["restaurant_name"] = row[10]
+            rows.append(d)
+        return rows


 def get_restaurant_avg_rating(restaurant_id: str) -> dict:
--- a/backend/core/youtube.py
+++ b/backend/core/youtube.py
@@ -32,11 +32,11 @@ def extract_video_id(url: str) -> str:

 # -- Channel operations -------------------------------------------------------

-def add_channel(channel_id: str, channel_name: str) -> str:
+def add_channel(channel_id: str, channel_name: str, title_filter: str | None = None) -> str:
    """Register a YouTube channel. Returns DB row id."""
    sql = """
-        INSERT INTO channels (channel_id, channel_name, channel_url)
-        VALUES (:cid, :cname, :curl)
+        INSERT INTO channels (channel_id, channel_name, channel_url, title_filter)
+        VALUES (:cid, :cname, :curl, :tf)
        RETURNING id INTO :out_id
    """
    with conn() as c:
@@ -47,45 +47,77 @@ def add_channel(channel_id: str, channel_name: str) -> str:
            "cid": channel_id,
            "cname": channel_name,
            "curl": f"https://www.youtube.com/channel/{channel_id}",
+            "tf": title_filter,
            "out_id": out_id,
        })
        return out_id.getvalue()[0]


+def deactivate_channel(channel_id: str) -> bool:
+    """Deactivate a channel by channel_id. Returns True if found."""
+    sql = "UPDATE channels SET is_active = 0 WHERE channel_id = :cid AND is_active = 1"
+    with conn() as c:
+        cur = c.cursor()
+        cur.execute(sql, {"cid": channel_id})
+        return cur.rowcount > 0
+
+
+def deactivate_channel_by_db_id(db_id: str) -> bool:
+    """Deactivate a channel by DB id. Returns True if found."""
+    sql = "UPDATE channels SET is_active = 0 WHERE id = :did AND is_active = 1"
+    with conn() as c:
+        cur = c.cursor()
+        cur.execute(sql, {"did": db_id})
+        return cur.rowcount > 0
+
+
 def get_active_channels() -> list[dict]:
-    sql = "SELECT id, channel_id, channel_name FROM channels WHERE is_active = 1"
+    sql = "SELECT id, channel_id, channel_name, title_filter FROM channels WHERE is_active = 1"
    with conn() as c:
        cur = c.cursor()
        cur.execute(sql)
        return [
-            {"id": r[0], "channel_id": r[1], "channel_name": r[2]}
+            {"id": r[0], "channel_id": r[1], "channel_name": r[2], "title_filter": r[3]}
            for r in cur.fetchall()
        ]


 # -- Video listing via YouTube Data API v3 ------------------------------------

-def fetch_channel_videos(
-    channel_id: str,
-    max_results: int = 50,
-    published_after: str | None = None,
-) -> list[dict]:
-    """Fetch video list from a YouTube channel via Data API v3.
+def get_latest_video_date(channel_db_id: str) -> str | None:
+    """Get the latest published_at for a channel's videos in ISO 8601 format."""
+    sql = """
+        SELECT MAX(published_at) FROM videos
+        WHERE channel_id = :ch_id AND published_at IS NOT NULL
+    """
+    with conn() as c:
+        cur = c.cursor()
+        cur.execute(sql, {"ch_id": channel_db_id})
+        row = cur.fetchone()
+        if row and row[0]:
+            return row[0].strftime("%Y-%m-%dT%H:%M:%SZ")
+    return None

-    Returns list of dicts: video_id, title, published_at, url.
+
+def fetch_channel_videos_iter(
+    channel_id: str,
+    published_after: str | None = None,
+):
+    """Yield pages of videos from a YouTube channel via Data API v3.
+
+    Each yield is a list of dicts for one API page (up to 50).
    """
    params: dict = {
        "key": _api_key(),
        "channelId": channel_id,
        "part": "snippet",
        "order": "date",
-        "maxResults": min(max_results, 50),
+        "maxResults": 50,
        "type": "video",
    }
    if published_after:
        params["publishedAfter"] = published_after

-    videos: list[dict] = []
    next_page = None

    while True:
@@ -100,33 +132,337 @@ def fetch_channel_videos(
        r.raise_for_status()
        data = r.json()

+        page_videos = []
        for item in data.get("items", []):
            snippet = item["snippet"]
            vid = item["id"]["videoId"]
-            videos.append({
+            page_videos.append({
                "video_id": vid,
                "title": snippet["title"],
                "published_at": snippet["publishedAt"],
                "url": f"https://www.youtube.com/watch?v={vid}",
            })

+        if page_videos:
+            yield page_videos
+
        next_page = data.get("nextPageToken")
-        if not next_page or len(videos) >= max_results:
+        if not next_page:
            break

-    return videos[:max_results]
+
+def fetch_channel_videos(
+    channel_id: str,
+    max_results: int = 0,
+    published_after: str | None = None,
+) -> list[dict]:
+    """Fetch video list from a YouTube channel via Data API v3.
+
+    Args:
+        max_results: 0 means fetch all available videos.
+    """
+    videos: list[dict] = []
+    for page in fetch_channel_videos_iter(channel_id, published_after=published_after):
+        videos.extend(page)
+        if max_results > 0 and len(videos) >= max_results:
+            break
+    return videos[:max_results] if max_results > 0 else videos


 # -- Transcript extraction ----------------------------------------------------

-def get_transcript(video_id: str) -> str | None:
-    """Fetch transcript text for a video. Returns None if unavailable."""
+def get_transcript(video_id: str, mode: str = "auto") -> tuple[str | None, str | None]:
+    """Fetch transcript using Playwright (headless browser).
+
+    Args:
+        mode: "manual" = manual only, "generated" = auto-generated only,
+              "auto" = try API first, fallback to browser transcript panel.
+
+    Returns:
+        (transcript_text, source) where source describes origin, or (None, None).
+    """
+    # Try youtube-transcript-api first (fast path)
+    text, source = _get_transcript_api(video_id, mode)
+    if text:
+        return text, source
+
+    # Fallback: Playwright browser
+    logger.warning("API failed for %s, trying Playwright browser", video_id)
+    print(f"[TRANSCRIPT] API failed for {video_id}, trying Playwright browser", flush=True)
+    return _get_transcript_browser(video_id)
+
+
+def _make_ytt() -> YouTubeTranscriptApi:
+    """Create YouTubeTranscriptApi with cookies if available."""
+    cookie_file = os.path.join(os.path.dirname(__file__), "..", "cookies.txt")
+    if os.path.exists(cookie_file):
+        import http.cookiejar
+        import requests
+        jar = http.cookiejar.MozillaCookieJar(cookie_file)
+        jar.load(ignore_discard=True, ignore_expires=True)
+        session = requests.Session()
+        session.cookies = jar
+        return YouTubeTranscriptApi(http_client=session)
+    return YouTubeTranscriptApi()
+
+
+def _get_transcript_api(video_id: str, mode: str = "auto") -> tuple[str | None, str | None]:
+    """Try youtube-transcript-api (fast but may be IP-blocked)."""
+    ytt = _make_ytt()
+    prefer = ["ko", "en"]
+
    try:
-        fetched = YouTubeTranscriptApi().fetch(video_id, languages=["ko", "en"])
-        return " ".join(seg.text for seg in fetched)
+        transcript_list = ytt.list(video_id)
    except Exception as e:
-        logger.warning("Transcript unavailable for %s: %s", video_id, e)
-        return None
+        logger.warning("Cannot list transcripts for %s: %s", video_id, e)
+        return None, None
+
+    all_transcripts = list(transcript_list)
+    manual = [t for t in all_transcripts if not t.is_generated]
+    generated = [t for t in all_transcripts if t.is_generated]
+
+    def _pick(candidates):
+        for lang in prefer:
+            for t in candidates:
+                if t.language_code == lang:
+                    return t
+        return candidates[0] if candidates else None
+
+    def _fetch(t):
+        try:
+            return " ".join(seg.text for seg in t.fetch()), t.language_code
+        except Exception:
+            return None, None
+
+    if mode == "manual":
+        t = _pick(manual)
+        if t:
+            text, lang = _fetch(t)
+            return (text, f"manual ({lang})") if text else (None, None)
+        return None, None
+    elif mode == "generated":
+        t = _pick(generated)
+        if t:
+            text, lang = _fetch(t)
+            return (text, f"generated ({lang})") if text else (None, None)
+        return None, None
+    else:
+        t = _pick(manual)
+        if t:
+            text, lang = _fetch(t)
+            if text:
+                return text, f"manual ({lang})"
+        t = _pick(generated)
+        if t:
+            text, lang = _fetch(t)
+            if text:
+                return text, f"generated ({lang})"
+        return None, None
+
+
+def _get_transcript_browser(video_id: str) -> tuple[str | None, str | None]:
+    """Fetch transcript via Playwright browser (bypasses IP blocks)."""
+    try:
+        from playwright.sync_api import sync_playwright
+    except ImportError:
+        logger.error("playwright not installed")
+        return None, None
+
+    try:
+        with sync_playwright() as p:
+            browser = p.chromium.launch(
+                headless=False,
+                args=["--disable-blink-features=AutomationControlled"],
+            )
+            ctx = browser.new_context(locale="ko-KR", viewport={"width": 1280, "height": 900})
+
+            # Load YouTube cookies if available
+            cookie_file = os.path.join(os.path.dirname(__file__), "..", "cookies.txt")
+            if os.path.exists(cookie_file):
+                import http.cookiejar
+                jar = http.cookiejar.MozillaCookieJar(cookie_file)
+                jar.load(ignore_discard=True, ignore_expires=True)
+                pw_cookies = []
+                for c in jar:
+                    if "youtube" in c.domain or "google" in c.domain:
+                        pw_cookies.append({
+                            "name": c.name, "value": c.value,
+                            "domain": c.domain, "path": c.path,
+                            "secure": c.secure, "httpOnly": False,
+                        })
+                if pw_cookies:
+                    ctx.add_cookies(pw_cookies)
+                    print(f"[TRANSCRIPT] Loaded {len(pw_cookies)} cookies", flush=True)
+
+            page = ctx.new_page()
+            page.add_init_script("Object.defineProperty(navigator, 'webdriver', {get: () => false})")
+
+            print(f"[TRANSCRIPT] Opening YouTube page for {video_id}", flush=True)
+            page.goto(
+                f"https://www.youtube.com/watch?v={video_id}",
+                wait_until="domcontentloaded",
+                timeout=30000,
+            )
+            page.wait_for_timeout(5000)
+
+            # Skip ads if present
+            for ad_wait in range(12):  # up to ~60s for ads
+                ad_status = page.evaluate("""() => {
+                    const skipBtn = document.querySelector('.ytp-skip-ad-button, .ytp-ad-skip-button, .ytp-ad-skip-button-modern, button.ytp-ad-skip-button-modern');
+                    if (skipBtn) { skipBtn.click(); return 'skipped'; }
+                    const adOverlay = document.querySelector('.ytp-ad-player-overlay, .ad-showing');
+                    if (adOverlay) return 'playing';
+                    const adBadge = document.querySelector('.ytp-ad-text');
+                    if (adBadge && adBadge.textContent) return 'badge';
+                    return 'none';
+                }""")
+                if ad_status == "none":
+                    break
+                print(f"[TRANSCRIPT] Ad detected: {ad_status}, waiting...", flush=True)
+                if ad_status == "skipped":
+                    page.wait_for_timeout(2000)
+                    break
+                page.wait_for_timeout(5000)
+
+            page.wait_for_timeout(2000)
+            print(f"[TRANSCRIPT] Page loaded, looking for transcript button", flush=True)
+
+            # Click "더보기" (more actions) button first to reveal transcript option
+            page.evaluate("""
+                () => {
+                    // Try clicking the "...더보기" button in description area
+                    const moreBtn = document.querySelector('tp-yt-paper-button#expand');
+                    if (moreBtn) moreBtn.click();
+                }
+            """)
+            page.wait_for_timeout(2000)
+
+            # Click "스크립트 표시" button via JS
+            clicked = page.evaluate("""
+                () => {
+                    // Method 1: aria-label
+                    for (const label of ['스크립트 표시', 'Show transcript']) {
+                        const btns = document.querySelectorAll(`button[aria-label="${label}"]`);
+                        for (const b of btns) { b.click(); return 'aria-label: ' + label; }
+                    }
+                    // Method 2: search all buttons by text content
+                    const allBtns = document.querySelectorAll('button');
+                    for (const b of allBtns) {
+                        const text = b.textContent.trim();
+                        if (text === '스크립트 표시' || text === 'Show transcript') {
+                            b.click();
+                            return 'text: ' + text;
+                        }
+                    }
+                    // Method 3: look for transcript button in engagement panel
+                    const engBtns = document.querySelectorAll('ytd-button-renderer button, ytd-button-renderer a');
+                    for (const b of engBtns) {
+                        const text = b.textContent.trim().toLowerCase();
+                        if (text.includes('transcript') || text.includes('스크립트')) {
+                            b.click();
+                            return 'engagement: ' + text;
+                        }
+                    }
+                    return false;
+                }
+            """)
+            print(f"[TRANSCRIPT] Clicked transcript button: {clicked}", flush=True)
+            if not clicked:
+                # Dump available buttons for debugging
+                btn_labels = page.evaluate("""
+                    () => {
+                        const btns = document.querySelectorAll('button[aria-label]');
+                        return Array.from(btns).map(b => b.getAttribute('aria-label')).slice(0, 30);
+                    }
+                """)
+                print(f"[TRANSCRIPT] Available buttons: {btn_labels}", flush=True)
+                browser.close()
+                return None, None
+
+            # Wait for transcript panel segments to appear (max ~40s)
+            page.wait_for_timeout(3000)  # initial wait for panel to render
+            for attempt in range(12):
+                page.wait_for_timeout(3000)
+                count = page.evaluate(
+                    "() => document.querySelectorAll('ytd-transcript-segment-renderer').length"
+                )
+                print(f"[TRANSCRIPT] Wait {(attempt+1)*3+3}s: {count} segments", flush=True)
+                if count > 0:
+                    break
+
+            # Select Korean if available (language selector in transcript panel)
+            page.evaluate("""
+                () => {
+                    // Open language dropdown and pick Korean if available
+                    const menu = document.querySelector('ytd-transcript-renderer ytd-menu-renderer yt-dropdown-menu');
+                    if (!menu) return;
+                    const trigger = menu.querySelector('button, tp-yt-paper-button');
+                    if (trigger) trigger.click();
+                }
+            """)
+            page.wait_for_timeout(1000)
+            page.evaluate("""
+                () => {
+                    const items = document.querySelectorAll('tp-yt-paper-listbox a, tp-yt-paper-listbox tp-yt-paper-item');
+                    for (const item of items) {
+                        const text = item.textContent.trim();
+                        if (text.includes('한국어') || text.includes('Korean')) {
+                            item.click();
+                            return;
+                        }
+                    }
+                }
+            """)
+            page.wait_for_timeout(2000)
+
+            # Scroll transcript panel to load all segments
+            segments = page.evaluate("""
+                async () => {
+                    const container = document.querySelector(
+                        'ytd-transcript-segment-list-renderer #segments-container, ' +
+                        'ytd-transcript-renderer #body'
+                    );
+                    if (!container) {
+                        // Fallback: just grab what's there
+                        const segs = document.querySelectorAll('ytd-transcript-segment-renderer');
+                        return Array.from(segs).map(s => {
+                            const txt = s.querySelector('.segment-text, yt-formatted-string.segment-text');
+                            return txt ? txt.textContent.trim() : '';
+                        }).filter(t => t);
+                    }
+
+                    // Scroll to bottom repeatedly to load all virtual segments
+                    let prevCount = 0;
+                    for (let i = 0; i < 50; i++) {
+                        container.scrollTop = container.scrollHeight;
+                        await new Promise(r => setTimeout(r, 300));
+                        const segs = document.querySelectorAll('ytd-transcript-segment-renderer');
+                        if (segs.length === prevCount && i > 3) break;
+                        prevCount = segs.length;
+                    }
+
+                    const segs = document.querySelectorAll('ytd-transcript-segment-renderer');
+                    return Array.from(segs).map(s => {
+                        const txt = s.querySelector('.segment-text, yt-formatted-string.segment-text');
+                        return txt ? txt.textContent.trim() : '';
+                    }).filter(t => t);
+                }
+            """)
+
+            browser.close()
+
+            print(f"[TRANSCRIPT] Got {len(segments) if segments else 0} segments for {video_id}", flush=True)
+            if segments:
+                text = " ".join(segments)
+                print(f"[TRANSCRIPT] Success: {len(text)} chars from {len(segments)} segments", flush=True)
+                return text, "browser"
+            return None, None
+    except Exception as e:
+        logger.error("Playwright transcript failed for %s: %s", video_id, e)
+        print(f"[TRANSCRIPT] Playwright FAILED for {video_id}: {e}", flush=True)
+        import traceback
+        traceback.print_exc()
+        return None, None


 # -- DB operations for videos -------------------------------------------------
@@ -163,6 +499,48 @@ def save_video(channel_db_id: str, video: dict) -> str | None:
            raise


+def get_existing_video_ids(channel_db_id: str) -> set[str]:
+    """Get all video_ids already in DB for a channel."""
+    with conn() as c:
+        cur = c.cursor()
+        cur.execute("SELECT video_id FROM videos WHERE channel_id = :cid", {"cid": channel_db_id})
+        return {r[0] for r in cur.fetchall()}
+
+
+def save_videos_batch(channel_db_id: str, videos: list[dict]) -> int:
+    """Insert multiple videos in a single DB connection. Returns count of new videos."""
+    if not videos:
+        return 0
+    import oracledb
+    sql = """
+        INSERT INTO videos (channel_id, video_id, title, url, published_at, status)
+        VALUES (:ch_id, :vid, :title, :url, :pub_at, 'pending')
+    """
+    new_count = 0
+    with conn() as c:
+        cur = c.cursor()
+        for video in videos:
+            try:
+                pub_at = None
+                if video.get("published_at"):
+                    pub_at = datetime.fromisoformat(
+                        video["published_at"].replace("Z", "+00:00")
+                    )
+                cur.execute(sql, {
+                    "ch_id": channel_db_id,
+                    "vid": video["video_id"],
+                    "title": video["title"],
+                    "url": video["url"],
+                    "pub_at": pub_at,
+                })
+                new_count += 1
+            except Exception as e:
+                if "UQ_VIDEOS_VID" in str(e).upper():
+                    continue
+                raise
+    return new_count
+
+
 def get_pending_videos(limit: int = 10) -> list[dict]:
    sql = """
        SELECT id, video_id, title, url
@@ -201,20 +579,28 @@ def update_video_status(

 # -- Scan: fetch new videos for all active channels ---------------------------

-def scan_all_channels(max_per_channel: int = 50) -> int:
+def scan_all_channels() -> int:
    """Scan all active channels for new videos. Returns count of new videos."""
    channels = get_active_channels()
    total_new = 0
    for ch in channels:
        try:
-            videos = fetch_channel_videos(ch["channel_id"], max_per_channel)
-            for v in videos:
-                row_id = save_video(ch["id"], v)
-                if row_id:
-                    total_new += 1
+            after = get_latest_video_date(ch["id"])
+            title_filter = ch.get("title_filter")
+            new_count = 0
+            fetched = 0
+            for page in fetch_channel_videos_iter(ch["channel_id"], published_after=after):
+                fetched += len(page)
+                for v in page:
+                    if title_filter and title_filter not in v["title"]:
+                        continue
+                    row_id = save_video(ch["id"], v)
+                    if row_id:
+                        new_count += 1
+            total_new += new_count
            logger.info(
-                "Channel %s: fetched %d videos, %d new",
-                ch["channel_name"], len(videos), total_new,
+                "Channel %s: fetched %d videos (after=%s), %d new (filter=%s)",
+                ch["channel_name"], fetched, after or "all", new_count, title_filter or "none",
            )
        except Exception as e:
            logger.error("Failed to scan channel %s: %s", ch["channel_name"], e)