벌크 자막/추출 개선, 검색 필터 무시, geocoding 필드 수정, 네이버맵 링크

- 벌크 자막: 브라우저 우선 + API fallback, 광고 즉시 skip, 대기 시간 단축
- 벌크 자막/추출: 선택한 영상만 처리 가능 (체크박스 선택 후 실행)
- 자막 실패 시 no_transcript 상태 마킹하여 재시도 방지
- 검색 시 필터 조건 무시 (채널/장르/가격/지역/영역 초기화)
- 리셋 버튼 클릭 시 검색어 입력란 초기화
- RestaurantMapper updateFields에 google_place_id, rating 등 geocoding 필드 추가
- SearchMapper에 tabling_url, catchtable_url, phone, website 필드 추가
- 식당 상세에 네이버 지도 링크 추가
- YouTubeService.getTranscriptApi public 전환

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
joungmin
2026-03-11 09:00:40 +09:00
parent cdee37e341
commit 0f985d52a9
13 changed files with 405 additions and 76 deletions

View File

@@ -6,6 +6,7 @@ import com.microsoft.playwright.*;
import com.tasteby.domain.Restaurant;
import com.tasteby.security.AuthUtil;
import com.tasteby.service.CacheService;
import com.tasteby.service.GeocodingService;
import com.tasteby.service.RestaurantService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -31,12 +32,14 @@ public class RestaurantController {
private static final Logger log = LoggerFactory.getLogger(RestaurantController.class);
private final RestaurantService restaurantService;
private final GeocodingService geocodingService;
private final CacheService cache;
private final ObjectMapper objectMapper;
private final ExecutorService executor = Executors.newVirtualThreadPerTaskExecutor();
public RestaurantController(RestaurantService restaurantService, CacheService cache, ObjectMapper objectMapper) {
public RestaurantController(RestaurantService restaurantService, GeocodingService geocodingService, CacheService cache, ObjectMapper objectMapper) {
this.restaurantService = restaurantService;
this.geocodingService = geocodingService;
this.cache = cache;
this.objectMapper = objectMapper;
}
@@ -82,11 +85,43 @@ public class RestaurantController {
AuthUtil.requireAdmin();
var r = restaurantService.findById(id);
if (r == null) throw new ResponseStatusException(HttpStatus.NOT_FOUND, "Restaurant not found");
// Re-geocode if name or address changed
String newName = (String) body.get("name");
String newAddress = (String) body.get("address");
boolean nameChanged = newName != null && !newName.equals(r.getName());
boolean addressChanged = newAddress != null && !newAddress.equals(r.getAddress());
if (nameChanged || addressChanged) {
String geoName = newName != null ? newName : r.getName();
String geoAddr = newAddress != null ? newAddress : r.getAddress();
var geo = geocodingService.geocodeRestaurant(geoName, geoAddr);
if (geo != null) {
body.put("latitude", geo.get("latitude"));
body.put("longitude", geo.get("longitude"));
body.put("google_place_id", geo.get("google_place_id"));
if (geo.containsKey("formatted_address")) {
body.put("address", geo.get("formatted_address"));
}
if (geo.containsKey("rating")) body.put("rating", geo.get("rating"));
if (geo.containsKey("rating_count")) body.put("rating_count", geo.get("rating_count"));
if (geo.containsKey("phone")) body.put("phone", geo.get("phone"));
if (geo.containsKey("business_status")) body.put("business_status", geo.get("business_status"));
// formatted_address에서 region 파싱 (예: "대한민국 서울특별시 강남구 ..." → "한국|서울|강남구")
String addr = (String) geo.get("formatted_address");
if (addr != null) {
body.put("region", GeocodingService.parseRegionFromAddress(addr));
}
}
}
restaurantService.update(id, body);
cache.flush();
return Map.of("ok", true);
var updated = restaurantService.findById(id);
return Map.of("ok", true, "restaurant", updated);
}
@DeleteMapping("/{id}")
public Map<String, Object> delete(@PathVariable String id) {
AuthUtil.requireAdmin();

View File

@@ -252,6 +252,34 @@ public class VideoController {
if (body.containsKey(key)) restFields.put(key, body.get(key));
}
if (!restFields.isEmpty()) {
// Re-geocode if name or address changed
var existing = restaurantService.findById(restaurantId);
String newName = (String) restFields.get("name");
String newAddr = (String) restFields.get("address");
boolean nameChanged = newName != null && existing != null && !newName.equals(existing.getName());
boolean addrChanged = newAddr != null && existing != null && !newAddr.equals(existing.getAddress());
if (nameChanged || addrChanged) {
String geoName = newName != null ? newName : existing.getName();
String geoAddr = newAddr != null ? newAddr : existing.getAddress();
var geo = geocodingService.geocodeRestaurant(geoName, geoAddr);
if (geo != null) {
restFields.put("latitude", geo.get("latitude"));
restFields.put("longitude", geo.get("longitude"));
restFields.put("google_place_id", geo.get("google_place_id"));
if (geo.containsKey("formatted_address")) {
restFields.put("address", geo.get("formatted_address"));
}
if (geo.containsKey("rating")) restFields.put("rating", geo.get("rating"));
if (geo.containsKey("rating_count")) restFields.put("rating_count", geo.get("rating_count"));
if (geo.containsKey("phone")) restFields.put("phone", geo.get("phone"));
if (geo.containsKey("business_status")) restFields.put("business_status", geo.get("business_status"));
// Parse region from address
String addr = (String) geo.get("formatted_address");
if (addr != null) {
restFields.put("region", GeocodingService.parseRegionFromAddress(addr));
}
}
}
restaurantService.update(restaurantId, restFields);
}

View File

@@ -50,13 +50,20 @@ public class VideoSseController {
}
@PostMapping("/bulk-transcript")
public SseEmitter bulkTranscript() {
public SseEmitter bulkTranscript(@RequestBody(required = false) Map<String, Object> body) {
AuthUtil.requireAdmin();
SseEmitter emitter = new SseEmitter(1_800_000L); // 30 min timeout
@SuppressWarnings("unchecked")
List<String> selectedIds = body != null && body.containsKey("ids")
? ((List<?>) body.get("ids")).stream().map(Object::toString).toList()
: null;
executor.execute(() -> {
try {
var videos = videoService.findVideosWithoutTranscript();
var videos = selectedIds != null && !selectedIds.isEmpty()
? videoService.findVideosByIds(selectedIds)
: videoService.findVideosWithoutTranscript();
int total = videos.size();
emit(emitter, Map.of("type", "start", "total", total));
@@ -69,6 +76,8 @@ public class VideoSseController {
int success = 0;
int failed = 0;
// Pass 1: 브라우저 우선 (봇 탐지 회피)
var apiNeeded = new ArrayList<Integer>();
try (var session = youTubeService.createBrowserSession()) {
for (int i = 0; i < total; i++) {
var v = videos.get(i);
@@ -76,18 +85,48 @@ public class VideoSseController {
String title = (String) v.get("title");
String id = (String) v.get("id");
emit(emitter, Map.of("type", "processing", "index", i, "title", title));
emit(emitter, Map.of("type", "processing", "index", i, "title", title, "method", "browser"));
try {
// Playwright browser first (reuse page)
var result = youTubeService.getTranscriptWithPage(session.page(), videoId);
// Fallback: thoroldvix API
if (result == null) {
log.warn("[BULK-TRANSCRIPT] Browser failed for {}, trying API", videoId);
result = youTubeService.getTranscript(videoId, "auto");
if (result != null) {
videoService.updateTranscript(id, result.text());
success++;
emit(emitter, Map.of("type", "done", "index", i,
"title", title, "source", result.source(),
"length", result.text().length()));
} else {
apiNeeded.add(i);
emit(emitter, Map.of("type", "skip", "index", i,
"title", title, "message", "브라우저 실패, API로 재시도 예정"));
}
} catch (Exception e) {
apiNeeded.add(i);
log.warn("[BULK-TRANSCRIPT] Browser failed for {}: {}", videoId, e.getMessage());
}
// 봇 판정 방지 랜덤 딜레이 (3~8초)
if (i < total - 1) {
int delay = ThreadLocalRandom.current().nextInt(3000, 8001);
log.info("[BULK-TRANSCRIPT] Waiting {}ms before next...", delay);
session.page().waitForTimeout(delay);
}
}
}
// Pass 2: 브라우저 실패분만 API로 재시도
if (!apiNeeded.isEmpty()) {
emit(emitter, Map.of("type", "api_pass", "count", apiNeeded.size()));
for (int i : apiNeeded) {
var v = videos.get(i);
String videoId = (String) v.get("video_id");
String title = (String) v.get("title");
String id = (String) v.get("id");
emit(emitter, Map.of("type", "processing", "index", i, "title", title, "method", "api"));
try {
var result = youTubeService.getTranscriptApi(videoId, "auto");
if (result != null) {
videoService.updateTranscript(id, result.text());
success++;
@@ -96,22 +135,17 @@ public class VideoSseController {
"length", result.text().length()));
} else {
failed++;
videoService.updateStatus(id, "no_transcript");
emit(emitter, Map.of("type", "error", "index", i,
"title", title, "message", "자막을 찾을 수 없음"));
}
} catch (Exception e) {
failed++;
log.error("[BULK-TRANSCRIPT] Error for {}: {}", videoId, e.getMessage());
videoService.updateStatus(id, "no_transcript");
log.error("[BULK-TRANSCRIPT] API error for {}: {}", videoId, e.getMessage());
emit(emitter, Map.of("type", "error", "index", i,
"title", title, "message", e.getMessage()));
}
// 봇 판정 방지 랜덤 딜레이 (5~15초)
if (i < total - 1) {
int delay = ThreadLocalRandom.current().nextInt(5000, 15001);
log.info("[BULK-TRANSCRIPT] Waiting {}ms before next...", delay);
session.page().waitForTimeout(delay);
}
}
}
@@ -126,13 +160,20 @@ public class VideoSseController {
}
@PostMapping("/bulk-extract")
public SseEmitter bulkExtract() {
public SseEmitter bulkExtract(@RequestBody(required = false) Map<String, Object> body) {
AuthUtil.requireAdmin();
SseEmitter emitter = new SseEmitter(600_000L);
@SuppressWarnings("unchecked")
List<String> selectedIds = body != null && body.containsKey("ids")
? ((List<?>) body.get("ids")).stream().map(Object::toString).toList()
: null;
executor.execute(() -> {
try {
var rows = videoService.findVideosForBulkExtract();
var rows = selectedIds != null && !selectedIds.isEmpty()
? videoService.findVideosForExtractByIds(selectedIds)
: videoService.findVideosForBulkExtract();
int total = rows.size();
int totalRestaurants = 0;

View File

@@ -68,6 +68,10 @@ public interface VideoMapper {
List<Map<String, Object>> findVideosWithoutTranscript();
List<Map<String, Object>> findVideosByIds(@Param("ids") List<String> ids);
List<Map<String, Object>> findVideosForExtractByIds(@Param("ids") List<String> ids);
void updateVideoRestaurantFields(@Param("videoId") String videoId,
@Param("restaurantId") String restaurantId,
@Param("foodsJson") String foodsJson,

View File

@@ -131,6 +131,34 @@ public class GeocodingService {
}
}
/**
* Parse Korean address into region format "나라|시/도|구/군".
* Example: "대한민국 서울특별시 강남구 역삼동 123" → "한국|서울|강남구"
*/
public static String parseRegionFromAddress(String address) {
if (address == null || address.isBlank()) return null;
String[] parts = address.split("\\s+");
String country = "";
String city = "";
String district = "";
for (String p : parts) {
if (p.equals("대한민국") || p.equals("South Korea")) {
country = "한국";
} else if (p.endsWith("특별시") || p.endsWith("광역시") || p.endsWith("특별자치시")) {
city = p.replace("특별시", "").replace("광역시", "").replace("특별자치시", "");
} else if (p.endsWith("") && !p.endsWith("") && p.length() <= 5) {
city = p;
} else if (p.endsWith("") || p.endsWith("") || (p.endsWith("") && !city.isEmpty())) {
if (district.isEmpty()) district = p;
}
}
if (country.isEmpty() && !city.isEmpty()) country = "한국";
if (country.isEmpty()) return null;
return country + "|" + city + "|" + district;
}
private Map<String, Object> geocode(String query) {
try {
String response = webClient.get()

View File

@@ -111,6 +111,22 @@ public class VideoService {
return rows.stream().map(JsonUtil::lowerKeys).toList();
}
public List<Map<String, Object>> findVideosByIds(List<String> ids) {
var rows = mapper.findVideosByIds(ids);
return rows.stream().map(JsonUtil::lowerKeys).toList();
}
public List<Map<String, Object>> findVideosForExtractByIds(List<String> ids) {
var rows = mapper.findVideosForExtractByIds(ids);
return rows.stream().map(row -> {
var r = JsonUtil.lowerKeys(row);
Object transcript = r.get("transcript_text");
r.put("transcript", JsonUtil.readClob(transcript));
r.remove("transcript_text");
return r;
}).toList();
}
public void updateVideoRestaurantFields(String videoId, String restaurantId,
String foodsJson, String evaluation, String guestsJson) {
mapper.updateVideoRestaurantFields(videoId, restaurantId, foodsJson, evaluation, guestsJson);

View File

@@ -50,10 +50,77 @@ public class YouTubeService {
}
/**
* Fetch videos from a YouTube channel, page by page.
* Returns all pages merged into one list.
* Fetch videos from a YouTube channel using the uploads playlist (UC→UU).
* This returns ALL videos unlike the Search API which caps results.
* Falls back to Search API if playlist approach fails.
*/
public List<Map<String, Object>> fetchChannelVideos(String channelId, String publishedAfter, boolean excludeShorts) {
// Convert channel ID UC... → uploads playlist UU...
String uploadsPlaylistId = "UU" + channelId.substring(2);
List<Map<String, Object>> allVideos = new ArrayList<>();
String nextPage = null;
try {
do {
String pageToken = nextPage;
String response = webClient.get()
.uri(uriBuilder -> {
var b = uriBuilder.path("/playlistItems")
.queryParam("key", apiKey)
.queryParam("playlistId", uploadsPlaylistId)
.queryParam("part", "snippet")
.queryParam("maxResults", 50);
if (pageToken != null) b.queryParam("pageToken", pageToken);
return b.build();
})
.retrieve()
.bodyToMono(String.class)
.block(Duration.ofSeconds(30));
JsonNode data = mapper.readTree(response);
List<Map<String, Object>> pageVideos = new ArrayList<>();
for (JsonNode item : data.path("items")) {
JsonNode snippet = item.path("snippet");
String vid = snippet.path("resourceId").path("videoId").asText();
String publishedAt = snippet.path("publishedAt").asText();
// publishedAfter 필터: 이미 스캔한 영상 이후만
if (publishedAfter != null && publishedAt.compareTo(publishedAfter) <= 0) {
// 업로드 재생목록은 최신순이므로 이전 날짜 만나면 중단
nextPage = null;
break;
}
pageVideos.add(Map.of(
"video_id", vid,
"title", snippet.path("title").asText(),
"published_at", publishedAt,
"url", "https://www.youtube.com/watch?v=" + vid
));
}
if (excludeShorts && !pageVideos.isEmpty()) {
pageVideos = filterShorts(pageVideos);
}
allVideos.addAll(pageVideos);
if (nextPage != null || data.has("nextPageToken")) {
nextPage = data.has("nextPageToken") ? data.path("nextPageToken").asText() : null;
}
} while (nextPage != null);
} catch (Exception e) {
log.warn("PlaylistItems API failed for {}, falling back to Search API", channelId, e);
return fetchChannelVideosViaSearch(channelId, publishedAfter, excludeShorts);
}
return allVideos;
}
/**
* Fallback: fetch via Search API (may not return all videos).
*/
private List<Map<String, Object>> fetchChannelVideosViaSearch(String channelId, String publishedAfter, boolean excludeShorts) {
List<Map<String, Object>> allVideos = new ArrayList<>();
String nextPage = null;
@@ -98,7 +165,7 @@ public class YouTubeService {
nextPage = data.has("nextPageToken") ? data.path("nextPageToken").asText() : null;
} catch (Exception e) {
log.error("Failed to parse YouTube API response", e);
log.error("Failed to parse YouTube Search API response", e);
break;
}
} while (nextPage != null);
@@ -108,33 +175,39 @@ public class YouTubeService {
/**
* Filter out YouTube Shorts (<=60s duration).
* YouTube /videos API accepts max 50 IDs per request, so we batch.
*/
private List<Map<String, Object>> filterShorts(List<Map<String, Object>> videos) {
String ids = String.join(",", videos.stream().map(v -> (String) v.get("video_id")).toList());
String response = webClient.get()
.uri(uriBuilder -> uriBuilder.path("/videos")
.queryParam("key", apiKey)
.queryParam("id", ids)
.queryParam("part", "contentDetails")
.build())
.retrieve()
.bodyToMono(String.class)
.block(Duration.ofSeconds(30));
Map<String, Integer> durations = new HashMap<>();
List<String> allIds = videos.stream().map(v -> (String) v.get("video_id")).toList();
try {
JsonNode data = mapper.readTree(response);
Map<String, Integer> durations = new HashMap<>();
for (JsonNode item : data.path("items")) {
String duration = item.path("contentDetails").path("duration").asText();
durations.put(item.path("id").asText(), parseDuration(duration));
for (int i = 0; i < allIds.size(); i += 50) {
List<String> batch = allIds.subList(i, Math.min(i + 50, allIds.size()));
String ids = String.join(",", batch);
try {
String response = webClient.get()
.uri(uriBuilder -> uriBuilder.path("/videos")
.queryParam("key", apiKey)
.queryParam("id", ids)
.queryParam("part", "contentDetails")
.build())
.retrieve()
.bodyToMono(String.class)
.block(Duration.ofSeconds(30));
JsonNode data = mapper.readTree(response);
for (JsonNode item : data.path("items")) {
String duration = item.path("contentDetails").path("duration").asText();
durations.put(item.path("id").asText(), parseDuration(duration));
}
} catch (Exception e) {
log.warn("Failed to fetch video durations for batch starting at {}", i, e);
}
return videos.stream()
.filter(v -> durations.getOrDefault(v.get("video_id"), 0) > 60)
.toList();
} catch (Exception e) {
log.warn("Failed to filter shorts", e);
return videos;
}
return videos.stream()
.filter(v -> durations.getOrDefault(v.get("video_id"), 61) > 60)
.toList();
}
private int parseDuration(String dur) {
@@ -217,7 +290,7 @@ public class YouTubeService {
return getTranscriptApi(videoId, mode);
}
private TranscriptResult getTranscriptApi(String videoId, String mode) {
public TranscriptResult getTranscriptApi(String videoId, String mode) {
TranscriptList transcriptList;
try {
transcriptList = transcriptApi.listTranscripts(videoId);
@@ -314,11 +387,11 @@ public class YouTubeService {
log.info("[TRANSCRIPT] Opening YouTube page for {}", videoId);
page.navigate("https://www.youtube.com/watch?v=" + videoId,
new Page.NavigateOptions().setWaitUntil(WaitUntilState.DOMCONTENTLOADED).setTimeout(30000));
page.waitForTimeout(5000);
page.waitForTimeout(3000);
skipAds(page);
page.waitForTimeout(2000);
page.waitForTimeout(1000);
log.info("[TRANSCRIPT] Page loaded, looking for transcript button");
// Click "더보기" (expand description)
@@ -372,14 +445,14 @@ public class YouTubeService {
return null;
}
// Wait for transcript segments to appear (max ~40s)
page.waitForTimeout(3000);
for (int attempt = 0; attempt < 12; attempt++) {
page.waitForTimeout(3000);
// Wait for transcript segments to appear (max ~15s)
page.waitForTimeout(2000);
for (int attempt = 0; attempt < 10; attempt++) {
page.waitForTimeout(1500);
Object count = page.evaluate(
"() => document.querySelectorAll('ytd-transcript-segment-renderer').length");
int segCount = count instanceof Number n ? n.intValue() : 0;
log.info("[TRANSCRIPT] Wait {}s: {} segments", (attempt + 1) * 3 + 3, segCount);
log.info("[TRANSCRIPT] Wait {}s: {} segments", (attempt + 1) * 1.5 + 2, segCount);
if (segCount > 0) break;
}
@@ -434,13 +507,23 @@ public class YouTubeService {
}
private void skipAds(Page page) {
for (int i = 0; i < 12; i++) {
for (int i = 0; i < 30; i++) {
Object adStatus = page.evaluate("""
() => {
const skipBtn = document.querySelector('.ytp-skip-ad-button, .ytp-ad-skip-button, .ytp-ad-skip-button-modern, button.ytp-ad-skip-button-modern');
if (skipBtn) { skipBtn.click(); return 'skipped'; }
const adOverlay = document.querySelector('.ytp-ad-player-overlay, .ad-showing');
if (adOverlay) return 'playing';
if (adOverlay) {
// 광고 중: 뮤트 + 끝으로 이동 시도
const video = document.querySelector('video');
if (video) {
video.muted = true;
if (video.duration && isFinite(video.duration)) {
video.currentTime = video.duration;
}
}
return 'playing';
}
const adBadge = document.querySelector('.ytp-ad-text');
if (adBadge && adBadge.textContent) return 'badge';
return 'none';
@@ -450,10 +533,10 @@ public class YouTubeService {
if ("none".equals(status)) break;
log.info("[TRANSCRIPT] Ad detected: {}, waiting...", status);
if ("skipped".equals(status)) {
page.waitForTimeout(2000);
page.waitForTimeout(1000);
break;
}
page.waitForTimeout(5000);
page.waitForTimeout(1000);
}
}

View File

@@ -143,6 +143,18 @@
<if test="fields.containsKey('longitude')">
longitude = #{fields.longitude},
</if>
<if test="fields.containsKey('google_place_id')">
google_place_id = #{fields.google_place_id},
</if>
<if test="fields.containsKey('business_status')">
business_status = #{fields.business_status},
</if>
<if test="fields.containsKey('rating')">
rating = #{fields.rating},
</if>
<if test="fields.containsKey('rating_count')">
rating_count = #{fields.rating_count},
</if>
updated_at = SYSTIMESTAMP,
</trim>
WHERE id = #{id}

View File

@@ -11,7 +11,11 @@
<result property="longitude" column="longitude"/>
<result property="cuisineType" column="cuisine_type"/>
<result property="priceRange" column="price_range"/>
<result property="phone" column="phone"/>
<result property="website" column="website"/>
<result property="googlePlaceId" column="google_place_id"/>
<result property="tablingUrl" column="tabling_url"/>
<result property="catchtableUrl" column="catchtable_url"/>
<result property="businessStatus" column="business_status"/>
<result property="rating" column="rating"/>
<result property="ratingCount" column="rating_count"/>
@@ -19,7 +23,8 @@
<select id="keywordSearch" resultMap="restaurantMap">
SELECT DISTINCT r.id, r.name, r.address, r.region, r.latitude, r.longitude,
r.cuisine_type, r.price_range, r.google_place_id,
r.cuisine_type, r.price_range, r.phone, r.website, r.google_place_id,
r.tabling_url, r.catchtable_url,
r.business_status, r.rating, r.rating_count
FROM restaurants r
JOIN video_restaurants vr ON vr.restaurant_id = r.id

View File

@@ -221,10 +221,30 @@
SELECT id, video_id, title, url
FROM videos
WHERE (transcript_text IS NULL OR dbms_lob.getlength(transcript_text) = 0)
AND status != 'skip'
AND status NOT IN ('skip', 'no_transcript')
ORDER BY created_at
</select>
<select id="findVideosByIds" resultType="map">
SELECT id, video_id, title, url
FROM videos
WHERE id IN
<foreach item="id" collection="ids" open="(" separator="," close=")">
#{id}
</foreach>
ORDER BY created_at
</select>
<select id="findVideosForExtractByIds" resultType="map">
SELECT v.id, v.video_id, v.title, v.url, v.transcript_text
FROM videos v
WHERE v.id IN
<foreach item="id" collection="ids" open="(" separator="," close=")">
#{id}
</foreach>
ORDER BY v.published_at DESC
</select>
<update id="updateVideoRestaurantFields">
UPDATE video_restaurants
SET foods_mentioned = #{foodsJson,jdbcType=CLOB},