- 벌크 자막: 브라우저 우선 + API fallback, 광고 즉시 skip, 대기 시간 단축 - 벌크 자막/추출: 선택한 영상만 처리 가능 (체크박스 선택 후 실행) - 자막 실패 시 no_transcript 상태 마킹하여 재시도 방지 - 검색 시 필터 조건 무시 (채널/장르/가격/지역/영역 초기화) - 리셋 버튼 클릭 시 검색어 입력란 초기화 - RestaurantMapper updateFields에 google_place_id, rating 등 geocoding 필드 추가 - SearchMapper에 tabling_url, catchtable_url, phone, website 필드 추가 - 식당 상세에 네이버 지도 링크 추가 - YouTubeService.getTranscriptApi public 전환 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
596 lines
26 KiB
Java
596 lines
26 KiB
Java
package com.tasteby.service;
|
|
|
|
import com.fasterxml.jackson.databind.JsonNode;
|
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
|
import io.github.thoroldvix.api.Transcript;
|
|
import io.github.thoroldvix.api.TranscriptContent;
|
|
import io.github.thoroldvix.api.TranscriptList;
|
|
import io.github.thoroldvix.api.TranscriptApiFactory;
|
|
import io.github.thoroldvix.api.YoutubeTranscriptApi;
|
|
import com.microsoft.playwright.*;
|
|
import com.microsoft.playwright.options.Cookie;
|
|
import com.microsoft.playwright.options.WaitUntilState;
|
|
import com.tasteby.domain.Channel;
|
|
import org.slf4j.Logger;
|
|
import org.slf4j.LoggerFactory;
|
|
import org.springframework.beans.factory.annotation.Value;
|
|
import org.springframework.stereotype.Service;
|
|
import org.springframework.web.reactive.function.client.WebClient;
|
|
|
|
import java.nio.file.Path;
|
|
import java.time.Duration;
|
|
import java.util.*;
|
|
import java.util.regex.Matcher;
|
|
import java.util.regex.Pattern;
|
|
import java.util.stream.Collectors;
|
|
|
|
@Service
|
|
public class YouTubeService {
|
|
|
|
private static final Logger log = LoggerFactory.getLogger(YouTubeService.class);
|
|
private static final Pattern DURATION_PATTERN = Pattern.compile("PT(?:(\\d+)H)?(?:(\\d+)M)?(?:(\\d+)S)?");
|
|
|
|
private final WebClient webClient;
|
|
private final ObjectMapper mapper;
|
|
private final ChannelService channelService;
|
|
private final VideoService videoService;
|
|
private final String apiKey;
|
|
|
|
public YouTubeService(ObjectMapper mapper,
|
|
ChannelService channelService,
|
|
VideoService videoService,
|
|
@Value("${app.google.youtube-api-key}") String apiKey) {
|
|
this.webClient = WebClient.builder()
|
|
.baseUrl("https://www.googleapis.com/youtube/v3")
|
|
.build();
|
|
this.mapper = mapper;
|
|
this.channelService = channelService;
|
|
this.videoService = videoService;
|
|
this.apiKey = apiKey;
|
|
}
|
|
|
|
/**
|
|
* Fetch videos from a YouTube channel using the uploads playlist (UC→UU).
|
|
* This returns ALL videos unlike the Search API which caps results.
|
|
* Falls back to Search API if playlist approach fails.
|
|
*/
|
|
public List<Map<String, Object>> fetchChannelVideos(String channelId, String publishedAfter, boolean excludeShorts) {
|
|
// Convert channel ID UC... → uploads playlist UU...
|
|
String uploadsPlaylistId = "UU" + channelId.substring(2);
|
|
List<Map<String, Object>> allVideos = new ArrayList<>();
|
|
String nextPage = null;
|
|
|
|
try {
|
|
do {
|
|
String pageToken = nextPage;
|
|
String response = webClient.get()
|
|
.uri(uriBuilder -> {
|
|
var b = uriBuilder.path("/playlistItems")
|
|
.queryParam("key", apiKey)
|
|
.queryParam("playlistId", uploadsPlaylistId)
|
|
.queryParam("part", "snippet")
|
|
.queryParam("maxResults", 50);
|
|
if (pageToken != null) b.queryParam("pageToken", pageToken);
|
|
return b.build();
|
|
})
|
|
.retrieve()
|
|
.bodyToMono(String.class)
|
|
.block(Duration.ofSeconds(30));
|
|
|
|
JsonNode data = mapper.readTree(response);
|
|
List<Map<String, Object>> pageVideos = new ArrayList<>();
|
|
|
|
for (JsonNode item : data.path("items")) {
|
|
JsonNode snippet = item.path("snippet");
|
|
String vid = snippet.path("resourceId").path("videoId").asText();
|
|
String publishedAt = snippet.path("publishedAt").asText();
|
|
|
|
// publishedAfter 필터: 이미 스캔한 영상 이후만
|
|
if (publishedAfter != null && publishedAt.compareTo(publishedAfter) <= 0) {
|
|
// 업로드 재생목록은 최신순이므로 이전 날짜 만나면 중단
|
|
nextPage = null;
|
|
break;
|
|
}
|
|
|
|
pageVideos.add(Map.of(
|
|
"video_id", vid,
|
|
"title", snippet.path("title").asText(),
|
|
"published_at", publishedAt,
|
|
"url", "https://www.youtube.com/watch?v=" + vid
|
|
));
|
|
}
|
|
|
|
if (excludeShorts && !pageVideos.isEmpty()) {
|
|
pageVideos = filterShorts(pageVideos);
|
|
}
|
|
allVideos.addAll(pageVideos);
|
|
|
|
if (nextPage != null || data.has("nextPageToken")) {
|
|
nextPage = data.has("nextPageToken") ? data.path("nextPageToken").asText() : null;
|
|
}
|
|
} while (nextPage != null);
|
|
} catch (Exception e) {
|
|
log.warn("PlaylistItems API failed for {}, falling back to Search API", channelId, e);
|
|
return fetchChannelVideosViaSearch(channelId, publishedAfter, excludeShorts);
|
|
}
|
|
|
|
return allVideos;
|
|
}
|
|
|
|
/**
|
|
* Fallback: fetch via Search API (may not return all videos).
|
|
*/
|
|
private List<Map<String, Object>> fetchChannelVideosViaSearch(String channelId, String publishedAfter, boolean excludeShorts) {
|
|
List<Map<String, Object>> allVideos = new ArrayList<>();
|
|
String nextPage = null;
|
|
|
|
do {
|
|
String pageToken = nextPage;
|
|
String response = webClient.get()
|
|
.uri(uriBuilder -> {
|
|
var b = uriBuilder.path("/search")
|
|
.queryParam("key", apiKey)
|
|
.queryParam("channelId", channelId)
|
|
.queryParam("part", "snippet")
|
|
.queryParam("order", "date")
|
|
.queryParam("maxResults", 50)
|
|
.queryParam("type", "video");
|
|
if (publishedAfter != null) b.queryParam("publishedAfter", publishedAfter);
|
|
if (pageToken != null) b.queryParam("pageToken", pageToken);
|
|
return b.build();
|
|
})
|
|
.retrieve()
|
|
.bodyToMono(String.class)
|
|
.block(Duration.ofSeconds(30));
|
|
|
|
try {
|
|
JsonNode data = mapper.readTree(response);
|
|
List<Map<String, Object>> pageVideos = new ArrayList<>();
|
|
|
|
for (JsonNode item : data.path("items")) {
|
|
String vid = item.path("id").path("videoId").asText();
|
|
JsonNode snippet = item.path("snippet");
|
|
pageVideos.add(Map.of(
|
|
"video_id", vid,
|
|
"title", snippet.path("title").asText(),
|
|
"published_at", snippet.path("publishedAt").asText(),
|
|
"url", "https://www.youtube.com/watch?v=" + vid
|
|
));
|
|
}
|
|
|
|
if (excludeShorts && !pageVideos.isEmpty()) {
|
|
pageVideos = filterShorts(pageVideos);
|
|
}
|
|
allVideos.addAll(pageVideos);
|
|
|
|
nextPage = data.has("nextPageToken") ? data.path("nextPageToken").asText() : null;
|
|
} catch (Exception e) {
|
|
log.error("Failed to parse YouTube Search API response", e);
|
|
break;
|
|
}
|
|
} while (nextPage != null);
|
|
|
|
return allVideos;
|
|
}
|
|
|
|
/**
|
|
* Filter out YouTube Shorts (<=60s duration).
|
|
* YouTube /videos API accepts max 50 IDs per request, so we batch.
|
|
*/
|
|
private List<Map<String, Object>> filterShorts(List<Map<String, Object>> videos) {
|
|
Map<String, Integer> durations = new HashMap<>();
|
|
List<String> allIds = videos.stream().map(v -> (String) v.get("video_id")).toList();
|
|
|
|
for (int i = 0; i < allIds.size(); i += 50) {
|
|
List<String> batch = allIds.subList(i, Math.min(i + 50, allIds.size()));
|
|
String ids = String.join(",", batch);
|
|
try {
|
|
String response = webClient.get()
|
|
.uri(uriBuilder -> uriBuilder.path("/videos")
|
|
.queryParam("key", apiKey)
|
|
.queryParam("id", ids)
|
|
.queryParam("part", "contentDetails")
|
|
.build())
|
|
.retrieve()
|
|
.bodyToMono(String.class)
|
|
.block(Duration.ofSeconds(30));
|
|
|
|
JsonNode data = mapper.readTree(response);
|
|
for (JsonNode item : data.path("items")) {
|
|
String duration = item.path("contentDetails").path("duration").asText();
|
|
durations.put(item.path("id").asText(), parseDuration(duration));
|
|
}
|
|
} catch (Exception e) {
|
|
log.warn("Failed to fetch video durations for batch starting at {}", i, e);
|
|
}
|
|
}
|
|
|
|
return videos.stream()
|
|
.filter(v -> durations.getOrDefault(v.get("video_id"), 61) > 60)
|
|
.toList();
|
|
}
|
|
|
|
private int parseDuration(String dur) {
|
|
Matcher m = DURATION_PATTERN.matcher(dur != null ? dur : "");
|
|
if (!m.matches()) return 0;
|
|
int h = m.group(1) != null ? Integer.parseInt(m.group(1)) : 0;
|
|
int min = m.group(2) != null ? Integer.parseInt(m.group(2)) : 0;
|
|
int s = m.group(3) != null ? Integer.parseInt(m.group(3)) : 0;
|
|
return h * 3600 + min * 60 + s;
|
|
}
|
|
|
|
/**
|
|
* Scan a single channel for new videos. Returns scan result map.
|
|
*/
|
|
public Map<String, Object> scanChannel(String channelId, boolean full) {
|
|
Channel ch = channelService.findByChannelId(channelId);
|
|
if (ch == null) return null;
|
|
|
|
String dbId = ch.getId();
|
|
String titleFilter = ch.getTitleFilter();
|
|
String after = full ? null : videoService.getLatestVideoDate(dbId);
|
|
Set<String> existing = videoService.getExistingVideoIds(dbId);
|
|
|
|
List<Map<String, Object>> allFetched = fetchChannelVideos(channelId, after, true);
|
|
int totalFetched = allFetched.size();
|
|
|
|
List<Map<String, Object>> candidates = new ArrayList<>();
|
|
for (var v : allFetched) {
|
|
if (titleFilter != null && !((String) v.get("title")).contains(titleFilter)) continue;
|
|
if (existing.contains(v.get("video_id"))) continue;
|
|
candidates.add(v);
|
|
}
|
|
|
|
int newCount = videoService.saveVideosBatch(dbId, candidates);
|
|
return Map.of(
|
|
"total_fetched", totalFetched,
|
|
"new_videos", newCount,
|
|
"filtered", titleFilter != null ? totalFetched - candidates.size() : 0
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Scan all active channels. Returns total new video count.
|
|
*/
|
|
public int scanAllChannels() {
|
|
List<Channel> channels = channelService.findAllActive();
|
|
int totalNew = 0;
|
|
for (var ch : channels) {
|
|
try {
|
|
var result = scanChannel(ch.getChannelId(), false);
|
|
if (result != null) {
|
|
totalNew += ((Number) result.get("new_videos")).intValue();
|
|
}
|
|
} catch (Exception e) {
|
|
log.error("Failed to scan channel {}: {}", ch.getChannelName(), e.getMessage());
|
|
}
|
|
}
|
|
return totalNew;
|
|
}
|
|
|
|
public record TranscriptResult(String text, String source) {}
|
|
|
|
private static final List<String> PREFERRED_LANGS = List.of("ko", "en");
|
|
private final YoutubeTranscriptApi transcriptApi = TranscriptApiFactory.createDefault();
|
|
|
|
/**
|
|
* Fetch transcript for a YouTube video.
|
|
* Tries API first (fast), then falls back to Playwright browser extraction.
|
|
* @param mode "auto" = manual first then generated, "manual" = manual only, "generated" = generated only
|
|
*/
|
|
public TranscriptResult getTranscript(String videoId, String mode) {
|
|
if (mode == null) mode = "auto";
|
|
|
|
// 1) Playwright headed browser (봇 판정 회피)
|
|
TranscriptResult browserResult = getTranscriptBrowser(videoId);
|
|
if (browserResult != null) return browserResult;
|
|
|
|
// 2) Fallback: youtube-transcript-api
|
|
log.warn("Browser failed for {}, trying API", videoId);
|
|
return getTranscriptApi(videoId, mode);
|
|
}
|
|
|
|
public TranscriptResult getTranscriptApi(String videoId, String mode) {
|
|
TranscriptList transcriptList;
|
|
try {
|
|
transcriptList = transcriptApi.listTranscripts(videoId);
|
|
} catch (Exception e) {
|
|
log.warn("Cannot list transcripts for {}: {}", videoId, e.getMessage());
|
|
return null;
|
|
}
|
|
|
|
String[] langs = PREFERRED_LANGS.toArray(String[]::new);
|
|
|
|
return switch (mode) {
|
|
case "manual" -> fetchTranscript(transcriptList, langs, true);
|
|
case "generated" -> fetchTranscript(transcriptList, langs, false);
|
|
default -> {
|
|
// auto: try manual first, then generated
|
|
var result = fetchTranscript(transcriptList, langs, true);
|
|
if (result != null) yield result;
|
|
yield fetchTranscript(transcriptList, langs, false);
|
|
}
|
|
};
|
|
}
|
|
|
|
private TranscriptResult fetchTranscript(TranscriptList list, String[] langs, boolean manual) {
|
|
Transcript picked;
|
|
try {
|
|
picked = manual ? list.findManualTranscript(langs) : list.findGeneratedTranscript(langs);
|
|
} catch (Exception e) {
|
|
return null;
|
|
}
|
|
|
|
try {
|
|
TranscriptContent content = picked.fetch();
|
|
String text = content.getContent().stream()
|
|
.map(TranscriptContent.Fragment::getText)
|
|
.collect(Collectors.joining(" "));
|
|
if (text.isBlank()) return null;
|
|
String label = manual ? "manual" : "generated";
|
|
return new TranscriptResult(text, label + " (" + picked.getLanguageCode() + ")");
|
|
} catch (Exception e) {
|
|
log.warn("Failed to fetch transcript for language {}: {}", picked.getLanguageCode(), e.getMessage());
|
|
return null;
|
|
}
|
|
}
|
|
|
|
// ─── Playwright browser ───────────────────────────────────────────────────
|
|
|
|
/**
|
|
* Fetch transcript using an existing Playwright Page (for bulk reuse).
|
|
*/
|
|
@SuppressWarnings("unchecked")
|
|
public TranscriptResult getTranscriptWithPage(Page page, String videoId) {
|
|
return fetchTranscriptFromPage(page, videoId);
|
|
}
|
|
|
|
/**
|
|
* Create a Playwright browser + context + page for transcript fetching.
|
|
* Caller must close the returned resources (Playwright, Browser).
|
|
*/
|
|
public record BrowserSession(Playwright playwright, Browser browser, Page page) implements AutoCloseable {
|
|
@Override
|
|
public void close() {
|
|
try { browser.close(); } catch (Exception ignored) {}
|
|
try { playwright.close(); } catch (Exception ignored) {}
|
|
}
|
|
}
|
|
|
|
public BrowserSession createBrowserSession() {
|
|
Playwright pw = Playwright.create();
|
|
Browser browser = pw.chromium().launch(new BrowserType.LaunchOptions()
|
|
.setHeadless(false)
|
|
.setArgs(List.of("--disable-blink-features=AutomationControlled")));
|
|
BrowserContext ctx = browser.newContext(new Browser.NewContextOptions()
|
|
.setLocale("ko-KR")
|
|
.setViewportSize(1280, 900));
|
|
loadCookies(ctx);
|
|
Page page = ctx.newPage();
|
|
page.addInitScript("Object.defineProperty(navigator, 'webdriver', {get: () => false})");
|
|
return new BrowserSession(pw, browser, page);
|
|
}
|
|
|
|
@SuppressWarnings("unchecked")
|
|
private TranscriptResult getTranscriptBrowser(String videoId) {
|
|
try (BrowserSession session = createBrowserSession()) {
|
|
return fetchTranscriptFromPage(session.page(), videoId);
|
|
} catch (Exception e) {
|
|
log.error("[TRANSCRIPT] Playwright failed for {}: {}", videoId, e.getMessage());
|
|
return null;
|
|
}
|
|
}
|
|
|
|
@SuppressWarnings("unchecked")
|
|
private TranscriptResult fetchTranscriptFromPage(Page page, String videoId) {
|
|
try {
|
|
log.info("[TRANSCRIPT] Opening YouTube page for {}", videoId);
|
|
page.navigate("https://www.youtube.com/watch?v=" + videoId,
|
|
new Page.NavigateOptions().setWaitUntil(WaitUntilState.DOMCONTENTLOADED).setTimeout(30000));
|
|
page.waitForTimeout(3000);
|
|
|
|
skipAds(page);
|
|
|
|
page.waitForTimeout(1000);
|
|
log.info("[TRANSCRIPT] Page loaded, looking for transcript button");
|
|
|
|
// Click "더보기" (expand description)
|
|
page.evaluate("""
|
|
() => {
|
|
const moreBtn = document.querySelector('tp-yt-paper-button#expand');
|
|
if (moreBtn) moreBtn.click();
|
|
}
|
|
""");
|
|
page.waitForTimeout(2000);
|
|
|
|
// Click transcript button
|
|
Object clicked = page.evaluate("""
|
|
() => {
|
|
// Method 1: aria-label
|
|
for (const label of ['스크립트 표시', 'Show transcript']) {
|
|
const btns = document.querySelectorAll(`button[aria-label="${label}"]`);
|
|
for (const b of btns) { b.click(); return 'aria-label: ' + label; }
|
|
}
|
|
// Method 2: text content
|
|
const allBtns = document.querySelectorAll('button');
|
|
for (const b of allBtns) {
|
|
const text = b.textContent.trim();
|
|
if (text === '스크립트 표시' || text === 'Show transcript') {
|
|
b.click();
|
|
return 'text: ' + text;
|
|
}
|
|
}
|
|
// Method 3: engagement panel buttons
|
|
const engBtns = document.querySelectorAll('ytd-button-renderer button, ytd-button-renderer a');
|
|
for (const b of engBtns) {
|
|
const text = b.textContent.trim().toLowerCase();
|
|
if (text.includes('transcript') || text.includes('스크립트')) {
|
|
b.click();
|
|
return 'engagement: ' + text;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
""");
|
|
log.info("[TRANSCRIPT] Clicked transcript button: {}", clicked);
|
|
|
|
if (Boolean.FALSE.equals(clicked)) {
|
|
Object btnLabels = page.evaluate("""
|
|
() => {
|
|
const btns = document.querySelectorAll('button[aria-label]');
|
|
return Array.from(btns).map(b => b.getAttribute('aria-label')).slice(0, 30);
|
|
}
|
|
""");
|
|
log.warn("[TRANSCRIPT] Transcript button not found. Available buttons: {}", btnLabels);
|
|
return null;
|
|
}
|
|
|
|
// Wait for transcript segments to appear (max ~15s)
|
|
page.waitForTimeout(2000);
|
|
for (int attempt = 0; attempt < 10; attempt++) {
|
|
page.waitForTimeout(1500);
|
|
Object count = page.evaluate(
|
|
"() => document.querySelectorAll('ytd-transcript-segment-renderer').length");
|
|
int segCount = count instanceof Number n ? n.intValue() : 0;
|
|
log.info("[TRANSCRIPT] Wait {}s: {} segments", (attempt + 1) * 1.5 + 2, segCount);
|
|
if (segCount > 0) break;
|
|
}
|
|
|
|
selectKorean(page);
|
|
|
|
// Scroll transcript panel and collect segments
|
|
Object segmentsObj = page.evaluate("""
|
|
async () => {
|
|
const container = document.querySelector(
|
|
'ytd-transcript-segment-list-renderer #segments-container, ' +
|
|
'ytd-transcript-renderer #body'
|
|
);
|
|
if (!container) {
|
|
const segs = document.querySelectorAll('ytd-transcript-segment-renderer');
|
|
return Array.from(segs).map(s => {
|
|
const txt = s.querySelector('.segment-text, yt-formatted-string.segment-text');
|
|
return txt ? txt.textContent.trim() : '';
|
|
}).filter(t => t);
|
|
}
|
|
|
|
let prevCount = 0;
|
|
for (let i = 0; i < 50; i++) {
|
|
container.scrollTop = container.scrollHeight;
|
|
await new Promise(r => setTimeout(r, 300));
|
|
const segs = document.querySelectorAll('ytd-transcript-segment-renderer');
|
|
if (segs.length === prevCount && i > 3) break;
|
|
prevCount = segs.length;
|
|
}
|
|
|
|
const segs = document.querySelectorAll('ytd-transcript-segment-renderer');
|
|
return Array.from(segs).map(s => {
|
|
const txt = s.querySelector('.segment-text, yt-formatted-string.segment-text');
|
|
return txt ? txt.textContent.trim() : '';
|
|
}).filter(t => t);
|
|
}
|
|
""");
|
|
|
|
if (segmentsObj instanceof List<?> segments && !segments.isEmpty()) {
|
|
String text = segments.stream()
|
|
.map(Object::toString)
|
|
.collect(Collectors.joining(" "));
|
|
log.info("[TRANSCRIPT] Browser success: {} chars from {} segments", text.length(), segments.size());
|
|
return new TranscriptResult(text, "browser");
|
|
}
|
|
|
|
log.warn("[TRANSCRIPT] No segments found via browser for {}", videoId);
|
|
return null;
|
|
} catch (Exception e) {
|
|
log.error("[TRANSCRIPT] Page fetch failed for {}: {}", videoId, e.getMessage());
|
|
return null;
|
|
}
|
|
}
|
|
|
|
private void skipAds(Page page) {
|
|
for (int i = 0; i < 30; i++) {
|
|
Object adStatus = page.evaluate("""
|
|
() => {
|
|
const skipBtn = document.querySelector('.ytp-skip-ad-button, .ytp-ad-skip-button, .ytp-ad-skip-button-modern, button.ytp-ad-skip-button-modern');
|
|
if (skipBtn) { skipBtn.click(); return 'skipped'; }
|
|
const adOverlay = document.querySelector('.ytp-ad-player-overlay, .ad-showing');
|
|
if (adOverlay) {
|
|
// 광고 중: 뮤트 + 끝으로 이동 시도
|
|
const video = document.querySelector('video');
|
|
if (video) {
|
|
video.muted = true;
|
|
if (video.duration && isFinite(video.duration)) {
|
|
video.currentTime = video.duration;
|
|
}
|
|
}
|
|
return 'playing';
|
|
}
|
|
const adBadge = document.querySelector('.ytp-ad-text');
|
|
if (adBadge && adBadge.textContent) return 'badge';
|
|
return 'none';
|
|
}
|
|
""");
|
|
String status = String.valueOf(adStatus);
|
|
if ("none".equals(status)) break;
|
|
log.info("[TRANSCRIPT] Ad detected: {}, waiting...", status);
|
|
if ("skipped".equals(status)) {
|
|
page.waitForTimeout(1000);
|
|
break;
|
|
}
|
|
page.waitForTimeout(1000);
|
|
}
|
|
}
|
|
|
|
private void selectKorean(Page page) {
|
|
page.evaluate("""
|
|
() => {
|
|
const menu = document.querySelector('ytd-transcript-renderer ytd-menu-renderer yt-dropdown-menu');
|
|
if (!menu) return;
|
|
const trigger = menu.querySelector('button, tp-yt-paper-button');
|
|
if (trigger) trigger.click();
|
|
}
|
|
""");
|
|
page.waitForTimeout(1000);
|
|
page.evaluate("""
|
|
() => {
|
|
const items = document.querySelectorAll('tp-yt-paper-listbox a, tp-yt-paper-listbox tp-yt-paper-item');
|
|
for (const item of items) {
|
|
const text = item.textContent.trim();
|
|
if (text.includes('한국어') || text.includes('Korean')) {
|
|
item.click();
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
""");
|
|
page.waitForTimeout(2000);
|
|
}
|
|
|
|
private void loadCookies(BrowserContext ctx) {
|
|
try {
|
|
Path cookieFile = Path.of(System.getProperty("user.dir"), "cookies.txt");
|
|
if (!cookieFile.toFile().exists()) return;
|
|
|
|
List<String> lines = java.nio.file.Files.readAllLines(cookieFile);
|
|
List<Cookie> cookies = new ArrayList<>();
|
|
for (String line : lines) {
|
|
if (line.startsWith("#") || line.isBlank()) continue;
|
|
String[] parts = line.split("\t");
|
|
if (parts.length < 7) continue;
|
|
String domain = parts[0];
|
|
if (!domain.contains("youtube") && !domain.contains("google")) continue;
|
|
cookies.add(new Cookie(parts[5], parts[6])
|
|
.setDomain(domain)
|
|
.setPath(parts[2])
|
|
.setSecure("TRUE".equalsIgnoreCase(parts[3]))
|
|
.setHttpOnly(false));
|
|
}
|
|
if (!cookies.isEmpty()) {
|
|
ctx.addCookies(cookies);
|
|
log.info("[TRANSCRIPT] Loaded {} cookies", cookies.size());
|
|
}
|
|
} catch (Exception e) {
|
|
log.debug("Failed to load cookies: {}", e.getMessage());
|
|
}
|
|
}
|
|
}
|