Files
tasteby/backend-java/src/main/java/com/tasteby/controller/VideoSseController.java
joungmin 0f985d52a9 벌크 자막/추출 개선, 검색 필터 무시, geocoding 필드 수정, 네이버맵 링크
- 벌크 자막: 브라우저 우선 + API fallback, 광고 즉시 skip, 대기 시간 단축
- 벌크 자막/추출: 선택한 영상만 처리 가능 (체크박스 선택 후 실행)
- 자막 실패 시 no_transcript 상태 마킹하여 재시도 방지
- 검색 시 필터 조건 무시 (채널/장르/가격/지역/영역 초기화)
- 리셋 버튼 클릭 시 검색어 입력란 초기화
- RestaurantMapper updateFields에 google_place_id, rating 등 geocoding 필드 추가
- SearchMapper에 tabling_url, catchtable_url, phone, website 필드 추가
- 식당 상세에 네이버 지도 링크 추가
- YouTubeService.getTranscriptApi public 전환

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-11 09:00:40 +09:00

488 lines
22 KiB
Java

package com.tasteby.controller;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.tasteby.security.AuthUtil;
import com.tasteby.service.*;
import com.tasteby.util.CuisineTypes;
import com.tasteby.util.JsonUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.web.bind.annotation.*;
import org.springframework.web.servlet.mvc.method.annotation.SseEmitter;
import java.util.*;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.ThreadLocalRandom;
/**
* SSE streaming endpoints for bulk operations.
*/
@RestController
@RequestMapping("/api/videos")
public class VideoSseController {
private static final Logger log = LoggerFactory.getLogger(VideoSseController.class);
private final VideoService videoService;
private final RestaurantService restaurantService;
private final PipelineService pipelineService;
private final YouTubeService youTubeService;
private final OciGenAiService genAi;
private final CacheService cache;
private final ObjectMapper mapper;
private final ExecutorService executor = Executors.newVirtualThreadPerTaskExecutor();
public VideoSseController(VideoService videoService,
RestaurantService restaurantService,
PipelineService pipelineService,
YouTubeService youTubeService,
OciGenAiService genAi,
CacheService cache,
ObjectMapper mapper) {
this.videoService = videoService;
this.restaurantService = restaurantService;
this.pipelineService = pipelineService;
this.youTubeService = youTubeService;
this.genAi = genAi;
this.cache = cache;
this.mapper = mapper;
}
@PostMapping("/bulk-transcript")
public SseEmitter bulkTranscript(@RequestBody(required = false) Map<String, Object> body) {
AuthUtil.requireAdmin();
SseEmitter emitter = new SseEmitter(1_800_000L); // 30 min timeout
@SuppressWarnings("unchecked")
List<String> selectedIds = body != null && body.containsKey("ids")
? ((List<?>) body.get("ids")).stream().map(Object::toString).toList()
: null;
executor.execute(() -> {
try {
var videos = selectedIds != null && !selectedIds.isEmpty()
? videoService.findVideosByIds(selectedIds)
: videoService.findVideosWithoutTranscript();
int total = videos.size();
emit(emitter, Map.of("type", "start", "total", total));
if (total == 0) {
emit(emitter, Map.of("type", "complete", "total", 0, "success", 0, "failed", 0));
emitter.complete();
return;
}
int success = 0;
int failed = 0;
// Pass 1: 브라우저 우선 (봇 탐지 회피)
var apiNeeded = new ArrayList<Integer>();
try (var session = youTubeService.createBrowserSession()) {
for (int i = 0; i < total; i++) {
var v = videos.get(i);
String videoId = (String) v.get("video_id");
String title = (String) v.get("title");
String id = (String) v.get("id");
emit(emitter, Map.of("type", "processing", "index", i, "title", title, "method", "browser"));
try {
var result = youTubeService.getTranscriptWithPage(session.page(), videoId);
if (result != null) {
videoService.updateTranscript(id, result.text());
success++;
emit(emitter, Map.of("type", "done", "index", i,
"title", title, "source", result.source(),
"length", result.text().length()));
} else {
apiNeeded.add(i);
emit(emitter, Map.of("type", "skip", "index", i,
"title", title, "message", "브라우저 실패, API로 재시도 예정"));
}
} catch (Exception e) {
apiNeeded.add(i);
log.warn("[BULK-TRANSCRIPT] Browser failed for {}: {}", videoId, e.getMessage());
}
// 봇 판정 방지 랜덤 딜레이 (3~8초)
if (i < total - 1) {
int delay = ThreadLocalRandom.current().nextInt(3000, 8001);
log.info("[BULK-TRANSCRIPT] Waiting {}ms before next...", delay);
session.page().waitForTimeout(delay);
}
}
}
// Pass 2: 브라우저 실패분만 API로 재시도
if (!apiNeeded.isEmpty()) {
emit(emitter, Map.of("type", "api_pass", "count", apiNeeded.size()));
for (int i : apiNeeded) {
var v = videos.get(i);
String videoId = (String) v.get("video_id");
String title = (String) v.get("title");
String id = (String) v.get("id");
emit(emitter, Map.of("type", "processing", "index", i, "title", title, "method", "api"));
try {
var result = youTubeService.getTranscriptApi(videoId, "auto");
if (result != null) {
videoService.updateTranscript(id, result.text());
success++;
emit(emitter, Map.of("type", "done", "index", i,
"title", title, "source", result.source(),
"length", result.text().length()));
} else {
failed++;
videoService.updateStatus(id, "no_transcript");
emit(emitter, Map.of("type", "error", "index", i,
"title", title, "message", "자막을 찾을 수 없음"));
}
} catch (Exception e) {
failed++;
videoService.updateStatus(id, "no_transcript");
log.error("[BULK-TRANSCRIPT] API error for {}: {}", videoId, e.getMessage());
emit(emitter, Map.of("type", "error", "index", i,
"title", title, "message", e.getMessage()));
}
}
}
emit(emitter, Map.of("type", "complete", "total", total, "success", success, "failed", failed));
emitter.complete();
} catch (Exception e) {
log.error("Bulk transcript error", e);
emitter.completeWithError(e);
}
});
return emitter;
}
@PostMapping("/bulk-extract")
public SseEmitter bulkExtract(@RequestBody(required = false) Map<String, Object> body) {
AuthUtil.requireAdmin();
SseEmitter emitter = new SseEmitter(600_000L);
@SuppressWarnings("unchecked")
List<String> selectedIds = body != null && body.containsKey("ids")
? ((List<?>) body.get("ids")).stream().map(Object::toString).toList()
: null;
executor.execute(() -> {
try {
var rows = selectedIds != null && !selectedIds.isEmpty()
? videoService.findVideosForExtractByIds(selectedIds)
: videoService.findVideosForBulkExtract();
int total = rows.size();
int totalRestaurants = 0;
emit(emitter, Map.of("type", "start", "total", total));
for (int i = 0; i < total; i++) {
var v = rows.get(i);
if (i > 0) {
long delay = (long) (3000 + Math.random() * 5000);
emit(emitter, Map.of("type", "wait", "index", i, "delay", delay / 1000.0));
Thread.sleep(delay);
}
emit(emitter, Map.of("type", "processing", "index", i, "title", v.get("title")));
try {
int count = pipelineService.processExtract(v, (String) v.get("transcript"), null);
totalRestaurants += count;
emit(emitter, Map.of("type", "done", "index", i, "title", v.get("title"), "restaurants", count));
} catch (Exception e) {
log.error("Bulk extract error for {}: {}", v.get("video_id"), e.getMessage());
emit(emitter, Map.of("type", "error", "index", i, "title", v.get("title"), "message", e.getMessage()));
}
}
if (totalRestaurants > 0) cache.flush();
emit(emitter, Map.of("type", "complete", "total", total, "total_restaurants", totalRestaurants));
emitter.complete();
} catch (Exception e) {
log.error("Bulk extract error", e);
emitter.completeWithError(e);
}
});
return emitter;
}
@PostMapping("/remap-cuisine")
@SuppressWarnings("unchecked")
public SseEmitter remapCuisine() {
AuthUtil.requireAdmin();
SseEmitter emitter = new SseEmitter(600_000L);
int BATCH = 20;
executor.execute(() -> {
try {
var rows = restaurantService.findForRemapCuisine();
rows = rows.stream().map(JsonUtil::lowerKeys).toList();
int total = rows.size();
emit(emitter, Map.of("type", "start", "total", total));
int updated = 0;
var allMissed = new ArrayList<Map<String, Object>>();
// Pass 1
for (int i = 0; i < total; i += BATCH) {
var batch = rows.subList(i, Math.min(i + BATCH, total));
emit(emitter, Map.of("type", "processing", "current", Math.min(i + BATCH, total), "total", total, "pass", 1));
try {
var result = applyRemapBatch(batch);
updated += result.updated;
allMissed.addAll(result.missed);
emit(emitter, Map.of("type", "batch_done", "current", Math.min(i + BATCH, total), "total", total, "updated", updated, "missed", allMissed.size()));
} catch (Exception e) {
allMissed.addAll(batch);
emit(emitter, Map.of("type", "error", "message", e.getMessage(), "current", i));
}
}
// Pass 2: retry missed (up to 3 attempts with smaller batches)
if (!allMissed.isEmpty()) {
emit(emitter, Map.of("type", "retry", "missed", allMissed.size()));
for (int attempt = 0; attempt < 3 && !allMissed.isEmpty(); attempt++) {
var retryList = new ArrayList<>(allMissed);
allMissed.clear();
for (int i = 0; i < retryList.size(); i += 5) {
var batch = retryList.subList(i, Math.min(i + 5, retryList.size()));
try {
var result = applyRemapBatch(batch);
updated += result.updated;
allMissed.addAll(result.missed);
} catch (Exception e) {
log.warn("Remap cuisine retry failed (attempt {}): {}", attempt + 1, e.getMessage());
allMissed.addAll(batch);
}
}
if (!allMissed.isEmpty()) {
emit(emitter, Map.of("type", "retry", "attempt", attempt + 2, "missed", allMissed.size()));
}
}
}
cache.flush();
emit(emitter, Map.of("type", "complete", "total", total, "updated", updated, "missed", allMissed.size()));
emitter.complete();
} catch (Exception e) {
emitter.completeWithError(e);
}
});
return emitter;
}
@PostMapping("/remap-foods")
@SuppressWarnings("unchecked")
public SseEmitter remapFoods() {
AuthUtil.requireAdmin();
SseEmitter emitter = new SseEmitter(600_000L);
int BATCH = 15;
executor.execute(() -> {
try {
var rows = restaurantService.findForRemapFoods();
rows = rows.stream().map(r -> {
var m = JsonUtil.lowerKeys(r);
// foods_mentioned is now TO_CHAR'd in SQL, parse as string
Object fm = m.get("foods_mentioned");
m.put("foods", JsonUtil.parseStringList(fm));
return m;
}).toList();
int total = rows.size();
emit(emitter, Map.of("type", "start", "total", total));
int updated = 0;
var allMissed = new ArrayList<Map<String, Object>>();
for (int i = 0; i < total; i += BATCH) {
var batch = rows.subList(i, Math.min(i + BATCH, total));
emit(emitter, Map.of("type", "processing", "current", Math.min(i + BATCH, total), "total", total));
try {
var result = applyFoodsBatch(batch);
updated += result.updated;
allMissed.addAll(result.missed);
emit(emitter, Map.of("type", "batch_done", "current", Math.min(i + BATCH, total), "total", total, "updated", updated));
} catch (Exception e) {
allMissed.addAll(batch);
log.warn("Remap foods batch error at {}: {}", i, e.getMessage());
emit(emitter, Map.of("type", "error", "message", e.getMessage(), "current", i));
}
}
// Retry missed (up to 3 attempts with smaller batches)
if (!allMissed.isEmpty()) {
emit(emitter, Map.of("type", "retry", "missed", allMissed.size()));
for (int attempt = 0; attempt < 3 && !allMissed.isEmpty(); attempt++) {
var retryList = new ArrayList<>(allMissed);
allMissed.clear();
for (int i = 0; i < retryList.size(); i += 5) {
var batch = retryList.subList(i, Math.min(i + 5, retryList.size()));
try {
var r = applyFoodsBatch(batch);
updated += r.updated;
allMissed.addAll(r.missed);
} catch (Exception e) {
log.warn("Remap foods retry failed (attempt {}): {}", attempt + 1, e.getMessage());
allMissed.addAll(batch);
}
}
if (!allMissed.isEmpty()) {
emit(emitter, Map.of("type", "retry", "attempt", attempt + 2, "missed", allMissed.size()));
}
}
}
cache.flush();
emit(emitter, Map.of("type", "complete", "total", total, "updated", updated, "missed", allMissed.size()));
emitter.complete();
} catch (Exception e) {
emitter.completeWithError(e);
}
});
return emitter;
}
@PostMapping("/rebuild-vectors")
public SseEmitter rebuildVectors() {
AuthUtil.requireAdmin();
SseEmitter emitter = new SseEmitter(600_000L);
executor.execute(() -> {
try {
emit(emitter, Map.of("type", "start"));
// TODO: Implement full vector rebuild using VectorService
emit(emitter, Map.of("type", "complete", "total", 0));
emitter.complete();
} catch (Exception e) {
emitter.completeWithError(e);
}
});
return emitter;
}
@PostMapping("/process")
public Map<String, Object> process(@RequestParam(defaultValue = "5") int limit) {
AuthUtil.requireAdmin();
int count = pipelineService.processPending(limit);
if (count > 0) cache.flush();
return Map.of("restaurants_extracted", count);
}
// --- Helpers ---
private record BatchResult(int updated, List<Map<String, Object>> missed) {}
@SuppressWarnings("unchecked")
private BatchResult applyRemapBatch(List<Map<String, Object>> batch) throws Exception {
var items = batch.stream().map(b -> Map.of(
"id", b.get("id"), "name", b.get("name"),
"current_cuisine_type", b.get("cuisine_type"),
"foods_mentioned", b.get("foods_mentioned")
)).toList();
String prompt = """
아래 식당들의 cuisine_type을 표준 분류로 매핑하세요.
표준 분류 목록 (반드시 이 중 하나를 선택):
%s
식당 목록:
%s
규칙:
- 모든 식당에 대해 빠짐없이 결과를 반환 (총 %d개 모두 반환해야 함)
- 반드시 위 표준 분류 목록의 값을 그대로 복사하여 사용 (오타 금지)
- JSON 배열만 반환, 설명 없음
- 형식: [{"id": "식당ID", "cuisine_type": "한식|국밥/해장국"}, ...]
JSON 배열:""".formatted(CuisineTypes.CUISINE_LIST_TEXT, mapper.writeValueAsString(items), items.size());
String raw = genAi.chat(prompt, 4096);
Object parsed = genAi.parseJson(raw);
List<Map<String, Object>> results = parsed instanceof List<?> ? (List<Map<String, Object>>) parsed : List.of();
Map<String, String> resultMap = new HashMap<>();
for (var item : results) {
String id = (String) item.get("id");
String type = (String) item.get("cuisine_type");
if (id != null && type != null) resultMap.put(id, type);
}
int updated = 0;
var missed = new ArrayList<Map<String, Object>>();
for (var b : batch) {
String id = (String) b.get("id");
String newType = resultMap.get(id);
if (newType == null || !CuisineTypes.isValid(newType)) {
missed.add(b);
continue;
}
restaurantService.updateCuisineType(id, newType);
updated++;
}
return new BatchResult(updated, missed);
}
@SuppressWarnings("unchecked")
private BatchResult applyFoodsBatch(List<Map<String, Object>> batch) throws Exception {
var items = batch.stream().map(b -> Map.of(
"id", b.get("id"), "name", b.get("name"),
"current_foods", b.get("foods"), "cuisine_type", b.get("cuisine_type")
)).toList();
String prompt = """
아래 식당들의 대표 메뉴 태그를 다시 만들어주세요.
규칙:
- 반드시 한글로 작성
- 각 식당당 최대 10개의 대표 메뉴/음식 태그
- 우선순위: 시그니처 메뉴 > 자주 언급된 메뉴 > 일반 메뉴
- 너무 일반적인 태그(밥, 반찬 등)는 제외
- 모든 식당에 대해 빠짐없이 결과 반환 (총 %d개)
- JSON 배열만 반환, 설명 없음
- 형식: [{"id": "식당ID", "foods": ["메뉴1", "메뉴2", ...]}]
식당 목록:
%s
JSON 배열:""".formatted(items.size(), mapper.writeValueAsString(items));
String raw = genAi.chat(prompt, 4096);
Object parsed = genAi.parseJson(raw);
List<Map<String, Object>> results = parsed instanceof List<?> ? (List<Map<String, Object>>) parsed : List.of();
Map<String, List<String>> resultMap = new HashMap<>();
for (var item : results) {
String id = (String) item.get("id");
Object foods = item.get("foods");
if (id != null && foods instanceof List<?> list) {
resultMap.put(id, list.stream().map(Object::toString).limit(10).toList());
}
}
int updated = 0;
var missed = new ArrayList<Map<String, Object>>();
for (var b : batch) {
String id = (String) b.get("id");
List<String> newFoods = resultMap.get(id);
if (newFoods == null) {
missed.add(b);
continue;
}
restaurantService.updateFoodsMentioned(id, mapper.writeValueAsString(newFoods));
updated++;
}
return new BatchResult(updated, missed);
}
private void emit(SseEmitter emitter, Map<String, Object> data) {
try {
emitter.send(SseEmitter.event().data(mapper.writeValueAsString(data)));
} catch (Exception e) {
log.debug("SSE emit failed: {}", e.getMessage());
}
}
}