tasteby/backend-java/src/main/java/com/tasteby/service/YouTubeService.java

package com.tasteby.service;

import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import io.github.thoroldvix.api.Transcript;
import io.github.thoroldvix.api.TranscriptContent;
import io.github.thoroldvix.api.TranscriptList;
import io.github.thoroldvix.api.TranscriptApiFactory;
import io.github.thoroldvix.api.YoutubeTranscriptApi;
import com.microsoft.playwright.*;
import com.microsoft.playwright.options.Cookie;
import com.microsoft.playwright.options.WaitUntilState;
import com.tasteby.domain.Channel;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
import org.springframework.web.reactive.function.client.WebClient;

import java.nio.file.Path;
import java.time.Duration;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

@Service
public class YouTubeService {

    private static final Logger log = LoggerFactory.getLogger(YouTubeService.class);
    private static final Pattern DURATION_PATTERN = Pattern.compile("PT(?:(\\d+)H)?(?:(\\d+)M)?(?:(\\d+)S)?");

    private final WebClient webClient;
    private final ObjectMapper mapper;
    private final ChannelService channelService;
    private final VideoService videoService;
    private final String apiKey;

    public YouTubeService(ObjectMapper mapper,
                           ChannelService channelService,
                           VideoService videoService,
                           @Value("${app.google.youtube-api-key}") String apiKey) {
        this.webClient = WebClient.builder()
                .baseUrl("https://www.googleapis.com/youtube/v3")
                .build();
        this.mapper = mapper;
        this.channelService = channelService;
        this.videoService = videoService;
        this.apiKey = apiKey;
    }

    /**
     * Fetch videos from a YouTube channel using the uploads playlist (UC→UU).
     * This returns ALL videos unlike the Search API which caps results.
     * Falls back to Search API if playlist approach fails.
     */
    public List<Map<String, Object>> fetchChannelVideos(String channelId, String publishedAfter, boolean excludeShorts) {
        // Convert channel ID UC... → uploads playlist UU...
        String uploadsPlaylistId = "UU" + channelId.substring(2);
        List<Map<String, Object>> allVideos = new ArrayList<>();
        String nextPage = null;

        try {
            do {
                String pageToken = nextPage;
                String response = webClient.get()
                        .uri(uriBuilder -> {
                            var b = uriBuilder.path("/playlistItems")
                                    .queryParam("key", apiKey)
                                    .queryParam("playlistId", uploadsPlaylistId)
                                    .queryParam("part", "snippet")
                                    .queryParam("maxResults", 50);
                            if (pageToken != null) b.queryParam("pageToken", pageToken);
                            return b.build();
                        })
                        .retrieve()
                        .bodyToMono(String.class)
                        .block(Duration.ofSeconds(30));

                JsonNode data = mapper.readTree(response);
                List<Map<String, Object>> pageVideos = new ArrayList<>();

                for (JsonNode item : data.path("items")) {
                    JsonNode snippet = item.path("snippet");
                    String vid = snippet.path("resourceId").path("videoId").asText();
                    String publishedAt = snippet.path("publishedAt").asText();

                    // publishedAfter 필터: 이미 스캔한 영상 이후만
                    if (publishedAfter != null && publishedAt.compareTo(publishedAfter) <= 0) {
                        // 업로드 재생목록은 최신순이므로 이전 날짜 만나면 중단
                        nextPage = null;
                        break;
                    }

                    pageVideos.add(Map.of(
                            "video_id", vid,
                            "title", snippet.path("title").asText(),
                            "published_at", publishedAt,
                            "url", "https://www.youtube.com/watch?v=" + vid
                    ));
                }

                if (excludeShorts && !pageVideos.isEmpty()) {
                    pageVideos = filterShorts(pageVideos);
                }
                allVideos.addAll(pageVideos);

                if (nextPage != null || data.has("nextPageToken")) {
                    nextPage = data.has("nextPageToken") ? data.path("nextPageToken").asText() : null;
                }
            } while (nextPage != null);
        } catch (Exception e) {
            log.warn("PlaylistItems API failed for {}, falling back to Search API", channelId, e);
            return fetchChannelVideosViaSearch(channelId, publishedAfter, excludeShorts);
        }

        return allVideos;
    }

    /**
     * Fallback: fetch via Search API (may not return all videos).
     */
    private List<Map<String, Object>> fetchChannelVideosViaSearch(String channelId, String publishedAfter, boolean excludeShorts) {
        List<Map<String, Object>> allVideos = new ArrayList<>();
        String nextPage = null;

        do {
            String pageToken = nextPage;
            String response = webClient.get()
                    .uri(uriBuilder -> {
                        var b = uriBuilder.path("/search")
                                .queryParam("key", apiKey)
                                .queryParam("channelId", channelId)
                                .queryParam("part", "snippet")
                                .queryParam("order", "date")
                                .queryParam("maxResults", 50)
                                .queryParam("type", "video");
                        if (publishedAfter != null) b.queryParam("publishedAfter", publishedAfter);
                        if (pageToken != null) b.queryParam("pageToken", pageToken);
                        return b.build();
                    })
                    .retrieve()
                    .bodyToMono(String.class)
                    .block(Duration.ofSeconds(30));

            try {
                JsonNode data = mapper.readTree(response);
                List<Map<String, Object>> pageVideos = new ArrayList<>();

                for (JsonNode item : data.path("items")) {
                    String vid = item.path("id").path("videoId").asText();
                    JsonNode snippet = item.path("snippet");
                    pageVideos.add(Map.of(
                            "video_id", vid,
                            "title", snippet.path("title").asText(),
                            "published_at", snippet.path("publishedAt").asText(),
                            "url", "https://www.youtube.com/watch?v=" + vid
                    ));
                }

                if (excludeShorts && !pageVideos.isEmpty()) {
                    pageVideos = filterShorts(pageVideos);
                }
                allVideos.addAll(pageVideos);

                nextPage = data.has("nextPageToken") ? data.path("nextPageToken").asText() : null;
            } catch (Exception e) {
                log.error("Failed to parse YouTube Search API response", e);
                break;
            }
        } while (nextPage != null);

        return allVideos;
    }

    /**
     * Filter out YouTube Shorts (<=60s duration).
     * YouTube /videos API accepts max 50 IDs per request, so we batch.
     */
    private List<Map<String, Object>> filterShorts(List<Map<String, Object>> videos) {
        Map<String, Integer> durations = new HashMap<>();
        List<String> allIds = videos.stream().map(v -> (String) v.get("video_id")).toList();

        for (int i = 0; i < allIds.size(); i += 50) {
            List<String> batch = allIds.subList(i, Math.min(i + 50, allIds.size()));
            String ids = String.join(",", batch);
            try {
                String response = webClient.get()
                        .uri(uriBuilder -> uriBuilder.path("/videos")
                                .queryParam("key", apiKey)
                                .queryParam("id", ids)
                                .queryParam("part", "contentDetails")
                                .build())
                        .retrieve()
                        .bodyToMono(String.class)
                        .block(Duration.ofSeconds(30));

                JsonNode data = mapper.readTree(response);
                for (JsonNode item : data.path("items")) {
                    String duration = item.path("contentDetails").path("duration").asText();
                    durations.put(item.path("id").asText(), parseDuration(duration));
                }
            } catch (Exception e) {
                log.warn("Failed to fetch video durations for batch starting at {}", i, e);
            }
        }

        return videos.stream()
                .filter(v -> durations.getOrDefault(v.get("video_id"), 61) > 60)
                .toList();
    }

    private int parseDuration(String dur) {
        Matcher m = DURATION_PATTERN.matcher(dur != null ? dur : "");
        if (!m.matches()) return 0;
        int h = m.group(1) != null ? Integer.parseInt(m.group(1)) : 0;
        int min = m.group(2) != null ? Integer.parseInt(m.group(2)) : 0;
        int s = m.group(3) != null ? Integer.parseInt(m.group(3)) : 0;
        return h * 3600 + min * 60 + s;
    }

    /**
     * Scan a single channel for new videos. Returns scan result map.
     */
    public Map<String, Object> scanChannel(String channelId, boolean full) {
        Channel ch = channelService.findByChannelId(channelId);
        if (ch == null) return null;

        String dbId = ch.getId();
        String titleFilter = ch.getTitleFilter();
        String after = full ? null : videoService.getLatestVideoDate(dbId);
        Set<String> existing = videoService.getExistingVideoIds(dbId);

        List<Map<String, Object>> allFetched = fetchChannelVideos(channelId, after, true);
        int totalFetched = allFetched.size();

        List<Map<String, Object>> candidates = new ArrayList<>();
        for (var v : allFetched) {
            if (titleFilter != null && !((String) v.get("title")).contains(titleFilter)) continue;
            if (existing.contains(v.get("video_id"))) continue;
            candidates.add(v);
        }

        int newCount = videoService.saveVideosBatch(dbId, candidates);
        return Map.of(
                "total_fetched", totalFetched,
                "new_videos", newCount,
                "filtered", titleFilter != null ? totalFetched - candidates.size() : 0
        );
    }

    /**
     * Scan all active channels. Returns total new video count.
     */
    public int scanAllChannels() {
        List<Channel> channels = channelService.findAllActive();
        int totalNew = 0;
        for (var ch : channels) {
            try {
                var result = scanChannel(ch.getChannelId(), false);
                if (result != null) {
                    totalNew += ((Number) result.get("new_videos")).intValue();
                }
            } catch (Exception e) {
                log.error("Failed to scan channel {}: {}", ch.getChannelName(), e.getMessage());
            }
        }
        return totalNew;
    }

    public record TranscriptResult(String text, String source) {}

    private static final List<String> PREFERRED_LANGS = List.of("ko", "en");
    private final YoutubeTranscriptApi transcriptApi = TranscriptApiFactory.createDefault();

    /**
     * Fetch transcript for a YouTube video.
     * Tries API first (fast), then falls back to Playwright browser extraction.
     * @param mode "auto" = manual first then generated, "manual" = manual only, "generated" = generated only
     */
    public TranscriptResult getTranscript(String videoId, String mode) {
        if (mode == null) mode = "auto";

        // 1) Playwright headed browser (봇 판정 회피)
        TranscriptResult browserResult = getTranscriptBrowser(videoId);
        if (browserResult != null) return browserResult;

        // 2) Fallback: youtube-transcript-api
        log.warn("Browser failed for {}, trying API", videoId);
        return getTranscriptApi(videoId, mode);
    }

    public TranscriptResult getTranscriptApi(String videoId, String mode) {
        TranscriptList transcriptList;
        try {
            transcriptList = transcriptApi.listTranscripts(videoId);
        } catch (Exception e) {
            log.warn("Cannot list transcripts for {}: {}", videoId, e.getMessage());
            return null;
        }

        String[] langs = PREFERRED_LANGS.toArray(String[]::new);

        return switch (mode) {
            case "manual" -> fetchTranscript(transcriptList, langs, true);
            case "generated" -> fetchTranscript(transcriptList, langs, false);
            default -> {
                // auto: try manual first, then generated
                var result = fetchTranscript(transcriptList, langs, true);
                if (result != null) yield result;
                yield fetchTranscript(transcriptList, langs, false);
            }
        };
    }

    private TranscriptResult fetchTranscript(TranscriptList list, String[] langs, boolean manual) {
        Transcript picked;
        try {
            picked = manual ? list.findManualTranscript(langs) : list.findGeneratedTranscript(langs);
        } catch (Exception e) {
            return null;
        }

        try {
            TranscriptContent content = picked.fetch();
            String text = content.getContent().stream()
                    .map(TranscriptContent.Fragment::getText)
                    .collect(Collectors.joining(" "));
            if (text.isBlank()) return null;
            String label = manual ? "manual" : "generated";
            return new TranscriptResult(text, label + " (" + picked.getLanguageCode() + ")");
        } catch (Exception e) {
            log.warn("Failed to fetch transcript for language {}: {}", picked.getLanguageCode(), e.getMessage());
            return null;
        }
    }

    // ─── Playwright browser ───────────────────────────────────────────────────

    /**
     * Fetch transcript using an existing Playwright Page (for bulk reuse).
     */
    @SuppressWarnings("unchecked")
    public TranscriptResult getTranscriptWithPage(Page page, String videoId) {
        return fetchTranscriptFromPage(page, videoId);
    }

    /**
     * Create a Playwright browser + context + page for transcript fetching.
     * Caller must close the returned resources (Playwright, Browser).
     */
    public record BrowserSession(Playwright playwright, Browser browser, Page page) implements AutoCloseable {
        @Override
        public void close() {
            try { browser.close(); } catch (Exception ignored) {}
            try { playwright.close(); } catch (Exception ignored) {}
        }
    }

    public BrowserSession createBrowserSession() {
        Playwright pw = Playwright.create();
        Browser browser = pw.chromium().launch(new BrowserType.LaunchOptions()
                .setHeadless(false)
                .setArgs(List.of("--disable-blink-features=AutomationControlled")));
        BrowserContext ctx = browser.newContext(new Browser.NewContextOptions()
                .setLocale("ko-KR")
                .setViewportSize(1280, 900));
        loadCookies(ctx);
        Page page = ctx.newPage();
        page.addInitScript("Object.defineProperty(navigator, 'webdriver', {get: () => false})");
        return new BrowserSession(pw, browser, page);
    }

    @SuppressWarnings("unchecked")
    private TranscriptResult getTranscriptBrowser(String videoId) {
        try (BrowserSession session = createBrowserSession()) {
            return fetchTranscriptFromPage(session.page(), videoId);
        } catch (Exception e) {
            log.error("[TRANSCRIPT] Playwright failed for {}: {}", videoId, e.getMessage());
            return null;
        }
    }

    @SuppressWarnings("unchecked")
    private TranscriptResult fetchTranscriptFromPage(Page page, String videoId) {
        try {
            log.info("[TRANSCRIPT] Opening YouTube page for {}", videoId);
            page.navigate("https://www.youtube.com/watch?v=" + videoId,
                    new Page.NavigateOptions().setWaitUntil(WaitUntilState.DOMCONTENTLOADED).setTimeout(30000));
            page.waitForTimeout(3000);

            skipAds(page);

            page.waitForTimeout(1000);
            log.info("[TRANSCRIPT] Page loaded, looking for transcript button");

            // Click "더보기" (expand description)
            page.evaluate("""
                () => {
                    const moreBtn = document.querySelector('tp-yt-paper-button#expand');
                    if (moreBtn) moreBtn.click();
                }
            """);
            page.waitForTimeout(2000);

            // Click transcript button
            Object clicked = page.evaluate("""
                () => {
                    // Method 1: aria-label
                    for (const label of ['스크립트 표시', 'Show transcript']) {
                        const btns = document.querySelectorAll(`button[aria-label="${label}"]`);
                        for (const b of btns) { b.click(); return 'aria-label: ' + label; }
                    }
                    // Method 2: text content
                    const allBtns = document.querySelectorAll('button');
                    for (const b of allBtns) {
                        const text = b.textContent.trim();
                        if (text === '스크립트 표시' || text === 'Show transcript') {
                            b.click();
                            return 'text: ' + text;
                        }
                    }
                    // Method 3: engagement panel buttons
                    const engBtns = document.querySelectorAll('ytd-button-renderer button, ytd-button-renderer a');
                    for (const b of engBtns) {
                        const text = b.textContent.trim().toLowerCase();
                        if (text.includes('transcript') || text.includes('스크립트')) {
                            b.click();
                            return 'engagement: ' + text;
                        }
                    }
                    return false;
                }
            """);
            log.info("[TRANSCRIPT] Clicked transcript button: {}", clicked);

            if (Boolean.FALSE.equals(clicked)) {
                Object btnLabels = page.evaluate("""
                    () => {
                        const btns = document.querySelectorAll('button[aria-label]');
                        return Array.from(btns).map(b => b.getAttribute('aria-label')).slice(0, 30);
                    }
                """);
                log.warn("[TRANSCRIPT] Transcript button not found. Available buttons: {}", btnLabels);
                return null;
            }

            // Wait for transcript segments to appear (max ~15s)
            page.waitForTimeout(2000);
            for (int attempt = 0; attempt < 10; attempt++) {
                page.waitForTimeout(1500);
                Object count = page.evaluate(
                        "() => document.querySelectorAll('ytd-transcript-segment-renderer').length");
                int segCount = count instanceof Number n ? n.intValue() : 0;
                log.info("[TRANSCRIPT] Wait {}s: {} segments", (attempt + 1) * 1.5 + 2, segCount);
                if (segCount > 0) break;
            }

            selectKorean(page);

            // Scroll transcript panel and collect segments
            Object segmentsObj = page.evaluate("""
                async () => {
                    const container = document.querySelector(
                        'ytd-transcript-segment-list-renderer #segments-container, ' +
                        'ytd-transcript-renderer #body'
                    );
                    if (!container) {
                        const segs = document.querySelectorAll('ytd-transcript-segment-renderer');
                        return Array.from(segs).map(s => {
                            const txt = s.querySelector('.segment-text, yt-formatted-string.segment-text');
                            return txt ? txt.textContent.trim() : '';
                        }).filter(t => t);
                    }

                    let prevCount = 0;
                    for (let i = 0; i < 50; i++) {
                        container.scrollTop = container.scrollHeight;
                        await new Promise(r => setTimeout(r, 300));
                        const segs = document.querySelectorAll('ytd-transcript-segment-renderer');
                        if (segs.length === prevCount && i > 3) break;
                        prevCount = segs.length;
                    }

                    const segs = document.querySelectorAll('ytd-transcript-segment-renderer');
                    return Array.from(segs).map(s => {
                        const txt = s.querySelector('.segment-text, yt-formatted-string.segment-text');
                        return txt ? txt.textContent.trim() : '';
                    }).filter(t => t);
                }
            """);

            if (segmentsObj instanceof List<?> segments && !segments.isEmpty()) {
                String text = segments.stream()
                        .map(Object::toString)
                        .collect(Collectors.joining(" "));
                log.info("[TRANSCRIPT] Browser success: {} chars from {} segments", text.length(), segments.size());
                return new TranscriptResult(text, "browser");
            }

            log.warn("[TRANSCRIPT] No segments found via browser for {}", videoId);
            return null;
        } catch (Exception e) {
            log.error("[TRANSCRIPT] Page fetch failed for {}: {}", videoId, e.getMessage());
            return null;
        }
    }

    private void skipAds(Page page) {
        for (int i = 0; i < 30; i++) {
            Object adStatus = page.evaluate("""
                () => {
                    const skipBtn = document.querySelector('.ytp-skip-ad-button, .ytp-ad-skip-button, .ytp-ad-skip-button-modern, button.ytp-ad-skip-button-modern');
                    if (skipBtn) { skipBtn.click(); return 'skipped'; }
                    const adOverlay = document.querySelector('.ytp-ad-player-overlay, .ad-showing');
                    if (adOverlay) {
                        // 광고 중: 뮤트 + 끝으로 이동 시도
                        const video = document.querySelector('video');
                        if (video) {
                            video.muted = true;
                            if (video.duration && isFinite(video.duration)) {
                                video.currentTime = video.duration;
                            }
                        }
                        return 'playing';
                    }
                    const adBadge = document.querySelector('.ytp-ad-text');
                    if (adBadge && adBadge.textContent) return 'badge';
                    return 'none';
                }
            """);
            String status = String.valueOf(adStatus);
            if ("none".equals(status)) break;
            log.info("[TRANSCRIPT] Ad detected: {}, waiting...", status);
            if ("skipped".equals(status)) {
                page.waitForTimeout(1000);
                break;
            }
            page.waitForTimeout(1000);
        }
    }

    private void selectKorean(Page page) {
        page.evaluate("""
            () => {
                const menu = document.querySelector('ytd-transcript-renderer ytd-menu-renderer yt-dropdown-menu');
                if (!menu) return;
                const trigger = menu.querySelector('button, tp-yt-paper-button');
                if (trigger) trigger.click();
            }
        """);
        page.waitForTimeout(1000);
        page.evaluate("""
            () => {
                const items = document.querySelectorAll('tp-yt-paper-listbox a, tp-yt-paper-listbox tp-yt-paper-item');
                for (const item of items) {
                    const text = item.textContent.trim();
                    if (text.includes('한국어') || text.includes('Korean')) {
                        item.click();
                        return;
                    }
                }
            }
        """);
        page.waitForTimeout(2000);
    }

    private void loadCookies(BrowserContext ctx) {
        try {
            Path cookieFile = Path.of(System.getProperty("user.dir"), "cookies.txt");
            if (!cookieFile.toFile().exists()) return;

            List<String> lines = java.nio.file.Files.readAllLines(cookieFile);
            List<Cookie> cookies = new ArrayList<>();
            for (String line : lines) {
                if (line.startsWith("#") || line.isBlank()) continue;
                String[] parts = line.split("\t");
                if (parts.length < 7) continue;
                String domain = parts[0];
                if (!domain.contains("youtube") && !domain.contains("google")) continue;
                cookies.add(new Cookie(parts[5], parts[6])
                        .setDomain(domain)
                        .setPath(parts[2])
                        .setSecure("TRUE".equalsIgnoreCase(parts[3]))
                        .setHttpOnly(false));
            }
            if (!cookies.isEmpty()) {
                ctx.addCookies(cookies);
                log.info("[TRANSCRIPT] Loaded {} cookies", cookies.size());
            }
        } catch (Exception e) {
            log.debug("Failed to load cookies: {}", e.getMessage());
        }
    }
}