Add YouTube cookie support to Playwright fallback for bot bypass
Load cookies.txt (Netscape format) into Playwright browser context before navigating to YouTube, enabling authenticated access to bypass bot detection that blocks transcript retrieval. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -14,6 +14,9 @@ import org.springframework.stereotype.Service;
|
||||
import java.io.IOException;
|
||||
import java.net.URLDecoder;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
@@ -118,6 +121,10 @@ public class YouTubeTranscriptService {
|
||||
BrowserContext context = browser.newContext(new Browser.NewContextOptions()
|
||||
.setUserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36")
|
||||
.setLocale("ko-KR"));
|
||||
|
||||
// YouTube 쿠키 로딩 (봇 차단 우회)
|
||||
loadCookies(context);
|
||||
|
||||
Page page = context.newPage();
|
||||
|
||||
page.navigate(watchUrl, new Page.NavigateOptions()
|
||||
@@ -218,6 +225,37 @@ public class YouTubeTranscriptService {
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
private void loadCookies(BrowserContext context) {
|
||||
Path cookieFile = Path.of(System.getProperty("user.dir"), "cookies.txt");
|
||||
if (!Files.exists(cookieFile)) {
|
||||
log.warn("cookies.txt not found at: {}", cookieFile);
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
List<String> lines = Files.readAllLines(cookieFile);
|
||||
List<com.microsoft.playwright.options.Cookie> cookies = new ArrayList<>();
|
||||
for (String line : lines) {
|
||||
if (line.startsWith("#") || line.isBlank()) continue;
|
||||
String[] parts = line.split("\t");
|
||||
if (parts.length < 7) continue;
|
||||
String domain = parts[0];
|
||||
if (!domain.contains("youtube") && !domain.contains("google")) continue;
|
||||
cookies.add(new com.microsoft.playwright.options.Cookie(parts[5], parts[6])
|
||||
.setDomain(domain)
|
||||
.setPath(parts[2])
|
||||
.setSecure("TRUE".equalsIgnoreCase(parts[3]))
|
||||
.setHttpOnly(false));
|
||||
}
|
||||
if (!cookies.isEmpty()) {
|
||||
context.addCookies(cookies);
|
||||
log.info("Loaded {} YouTube cookies", cookies.size());
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.warn("Failed to load cookies: {}", e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
private String extractVideoId(String url) {
|
||||
if (url == null || url.isBlank()) return null;
|
||||
try {
|
||||
|
||||
Reference in New Issue
Block a user