Switch to user Chrome CDP for YouTube transcript, fix auth and ads
- Replace Playwright standalone browser with CDP connection to user Chrome (bypasses YouTube bot detection by using logged-in Chrome session) - Add video playback, ad detection/skip, and play confirmation before transcript extraction - Extract transcript JS to separate resource files (fix SyntaxError in evaluate) - Add ytInitialPlayerResponse-based transcript extraction as primary method - Fix token refresh: retry on network error during backend restart - Fix null userId logout, CLOB type hint for structured_content - Disable XFCE screen lock/screensaver - Add troubleshooting entries (#10-12) and YouTube transcript guide Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,33 @@
|
||||
() => {
|
||||
var selectors = [
|
||||
'ytd-transcript-segment-renderer .segment-text',
|
||||
'yt-formatted-string.segment-text',
|
||||
'#segments-container yt-formatted-string',
|
||||
'ytd-transcript-segment-list-renderer .segment-text',
|
||||
'ytd-transcript-segment-renderer yt-formatted-string',
|
||||
'ytd-engagement-panel-section-list-renderer yt-formatted-string.ytd-transcript-segment-renderer'
|
||||
];
|
||||
for (var s = 0; s < selectors.length; s++) {
|
||||
try {
|
||||
var segs = document.querySelectorAll(selectors[s]);
|
||||
if (segs.length > 0) {
|
||||
var texts = [];
|
||||
for (var i = 0; i < segs.length; i++) {
|
||||
var txt = segs[i].textContent.trim();
|
||||
if (txt.length > 0) texts.push(txt);
|
||||
}
|
||||
if (texts.length > 0) return texts.join(' ');
|
||||
}
|
||||
} catch(e) {}
|
||||
}
|
||||
// 최후 수단: engagement panel 내 모든 텍스트
|
||||
var panel = document.querySelector('ytd-engagement-panel-section-list-renderer[target-id="engagement-panel-searchable-transcript"]');
|
||||
if (panel) {
|
||||
var body = panel.querySelector('#body, #content');
|
||||
if (body) {
|
||||
var allText = body.innerText.trim();
|
||||
if (allText.length > 100) return allText;
|
||||
}
|
||||
}
|
||||
return '';
|
||||
}
|
||||
@@ -0,0 +1,72 @@
|
||||
() => {
|
||||
try {
|
||||
// 방법 1: ytInitialPlayerResponse에서 직접 자막 데이터 추출
|
||||
var playerResponse = null;
|
||||
if (typeof ytInitialPlayerResponse !== 'undefined') {
|
||||
playerResponse = ytInitialPlayerResponse;
|
||||
}
|
||||
if (!playerResponse) {
|
||||
// 페이지 소스에서 추출 시도
|
||||
var scripts = document.querySelectorAll('script');
|
||||
for (var i = 0; i < scripts.length; i++) {
|
||||
var text = scripts[i].textContent;
|
||||
if (text && text.indexOf('ytInitialPlayerResponse') !== -1) {
|
||||
var match = text.match(/ytInitialPlayerResponse\s*=\s*(\{.*?\});/s);
|
||||
if (match) {
|
||||
try { playerResponse = JSON.parse(match[1]); } catch(e) {}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!playerResponse) return JSON.stringify({error: 'no_player_response'});
|
||||
|
||||
// captionTracks 추출
|
||||
var captions = playerResponse.captions;
|
||||
if (!captions) return JSON.stringify({error: 'no_captions'});
|
||||
|
||||
var renderer = captions.playerCaptionsTracklistRenderer;
|
||||
if (!renderer) return JSON.stringify({error: 'no_renderer'});
|
||||
|
||||
var tracks = renderer.captionTracks;
|
||||
if (!tracks || tracks.length === 0) return JSON.stringify({error: 'no_tracks'});
|
||||
|
||||
// 언어 우선순위: ko > en > 첫 번째
|
||||
var selectedTrack = null;
|
||||
for (var t = 0; t < tracks.length; t++) {
|
||||
if (tracks[t].languageCode === 'ko') { selectedTrack = tracks[t]; break; }
|
||||
}
|
||||
if (!selectedTrack) {
|
||||
for (var t = 0; t < tracks.length; t++) {
|
||||
if (tracks[t].languageCode === 'en') { selectedTrack = tracks[t]; break; }
|
||||
}
|
||||
}
|
||||
if (!selectedTrack) selectedTrack = tracks[0];
|
||||
|
||||
var baseUrl = selectedTrack.baseUrl;
|
||||
if (!baseUrl) return JSON.stringify({error: 'no_base_url'});
|
||||
|
||||
// fmt=json3 추가
|
||||
if (baseUrl.indexOf('fmt=') === -1) {
|
||||
baseUrl += '&fmt=json3';
|
||||
}
|
||||
|
||||
// 브라우저 내에서 fetch로 자막 데이터 가져오기
|
||||
return fetch(baseUrl, {credentials: 'include'})
|
||||
.then(function(res) { return res.text(); })
|
||||
.then(function(text) {
|
||||
if (!text || text.length === 0) {
|
||||
// fmt=json3 실패 시 fmt 없이 재시도 (XML)
|
||||
var xmlUrl = baseUrl.replace('&fmt=json3', '');
|
||||
return fetch(xmlUrl, {credentials: 'include'})
|
||||
.then(function(res2) { return res2.text(); })
|
||||
.then(function(xmlText) {
|
||||
return JSON.stringify({type: 'xml', data: xmlText, lang: selectedTrack.languageCode});
|
||||
});
|
||||
}
|
||||
return JSON.stringify({type: 'json3', data: text, lang: selectedTrack.languageCode});
|
||||
});
|
||||
} catch(e) {
|
||||
return JSON.stringify({error: e.message});
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user