- 벌크 자막: 브라우저 우선 + API fallback, 광고 즉시 skip, 대기 시간 단축 - 벌크 자막/추출: 선택한 영상만 처리 가능 (체크박스 선택 후 실행) - 자막 실패 시 no_transcript 상태 마킹하여 재시도 방지 - 검색 시 필터 조건 무시 (채널/장르/가격/지역/영역 초기화) - 리셋 버튼 클릭 시 검색어 입력란 초기화 - RestaurantMapper updateFields에 google_place_id, rating 등 geocoding 필드 추가 - SearchMapper에 tabling_url, catchtable_url, phone, website 필드 추가 - 식당 상세에 네이버 지도 링크 추가 - YouTubeService.getTranscriptApi public 전환 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
488 lines
22 KiB
Java
488 lines
22 KiB
Java
package com.tasteby.controller;
|
|
|
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
|
import com.tasteby.security.AuthUtil;
|
|
import com.tasteby.service.*;
|
|
import com.tasteby.util.CuisineTypes;
|
|
import com.tasteby.util.JsonUtil;
|
|
import org.slf4j.Logger;
|
|
import org.slf4j.LoggerFactory;
|
|
import org.springframework.web.bind.annotation.*;
|
|
import org.springframework.web.servlet.mvc.method.annotation.SseEmitter;
|
|
|
|
import java.util.*;
|
|
import java.util.concurrent.ExecutorService;
|
|
import java.util.concurrent.Executors;
|
|
import java.util.concurrent.ThreadLocalRandom;
|
|
|
|
/**
|
|
* SSE streaming endpoints for bulk operations.
|
|
*/
|
|
@RestController
|
|
@RequestMapping("/api/videos")
|
|
public class VideoSseController {
|
|
|
|
private static final Logger log = LoggerFactory.getLogger(VideoSseController.class);
|
|
|
|
private final VideoService videoService;
|
|
private final RestaurantService restaurantService;
|
|
private final PipelineService pipelineService;
|
|
private final YouTubeService youTubeService;
|
|
private final OciGenAiService genAi;
|
|
private final CacheService cache;
|
|
private final ObjectMapper mapper;
|
|
private final ExecutorService executor = Executors.newVirtualThreadPerTaskExecutor();
|
|
|
|
public VideoSseController(VideoService videoService,
|
|
RestaurantService restaurantService,
|
|
PipelineService pipelineService,
|
|
YouTubeService youTubeService,
|
|
OciGenAiService genAi,
|
|
CacheService cache,
|
|
ObjectMapper mapper) {
|
|
this.videoService = videoService;
|
|
this.restaurantService = restaurantService;
|
|
this.pipelineService = pipelineService;
|
|
this.youTubeService = youTubeService;
|
|
this.genAi = genAi;
|
|
this.cache = cache;
|
|
this.mapper = mapper;
|
|
}
|
|
|
|
@PostMapping("/bulk-transcript")
|
|
public SseEmitter bulkTranscript(@RequestBody(required = false) Map<String, Object> body) {
|
|
AuthUtil.requireAdmin();
|
|
SseEmitter emitter = new SseEmitter(1_800_000L); // 30 min timeout
|
|
|
|
@SuppressWarnings("unchecked")
|
|
List<String> selectedIds = body != null && body.containsKey("ids")
|
|
? ((List<?>) body.get("ids")).stream().map(Object::toString).toList()
|
|
: null;
|
|
|
|
executor.execute(() -> {
|
|
try {
|
|
var videos = selectedIds != null && !selectedIds.isEmpty()
|
|
? videoService.findVideosByIds(selectedIds)
|
|
: videoService.findVideosWithoutTranscript();
|
|
int total = videos.size();
|
|
emit(emitter, Map.of("type", "start", "total", total));
|
|
|
|
if (total == 0) {
|
|
emit(emitter, Map.of("type", "complete", "total", 0, "success", 0, "failed", 0));
|
|
emitter.complete();
|
|
return;
|
|
}
|
|
|
|
int success = 0;
|
|
int failed = 0;
|
|
|
|
// Pass 1: 브라우저 우선 (봇 탐지 회피)
|
|
var apiNeeded = new ArrayList<Integer>();
|
|
try (var session = youTubeService.createBrowserSession()) {
|
|
for (int i = 0; i < total; i++) {
|
|
var v = videos.get(i);
|
|
String videoId = (String) v.get("video_id");
|
|
String title = (String) v.get("title");
|
|
String id = (String) v.get("id");
|
|
|
|
emit(emitter, Map.of("type", "processing", "index", i, "title", title, "method", "browser"));
|
|
|
|
try {
|
|
var result = youTubeService.getTranscriptWithPage(session.page(), videoId);
|
|
if (result != null) {
|
|
videoService.updateTranscript(id, result.text());
|
|
success++;
|
|
emit(emitter, Map.of("type", "done", "index", i,
|
|
"title", title, "source", result.source(),
|
|
"length", result.text().length()));
|
|
} else {
|
|
apiNeeded.add(i);
|
|
emit(emitter, Map.of("type", "skip", "index", i,
|
|
"title", title, "message", "브라우저 실패, API로 재시도 예정"));
|
|
}
|
|
} catch (Exception e) {
|
|
apiNeeded.add(i);
|
|
log.warn("[BULK-TRANSCRIPT] Browser failed for {}: {}", videoId, e.getMessage());
|
|
}
|
|
|
|
// 봇 판정 방지 랜덤 딜레이 (3~8초)
|
|
if (i < total - 1) {
|
|
int delay = ThreadLocalRandom.current().nextInt(3000, 8001);
|
|
log.info("[BULK-TRANSCRIPT] Waiting {}ms before next...", delay);
|
|
session.page().waitForTimeout(delay);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Pass 2: 브라우저 실패분만 API로 재시도
|
|
if (!apiNeeded.isEmpty()) {
|
|
emit(emitter, Map.of("type", "api_pass", "count", apiNeeded.size()));
|
|
for (int i : apiNeeded) {
|
|
var v = videos.get(i);
|
|
String videoId = (String) v.get("video_id");
|
|
String title = (String) v.get("title");
|
|
String id = (String) v.get("id");
|
|
|
|
emit(emitter, Map.of("type", "processing", "index", i, "title", title, "method", "api"));
|
|
|
|
try {
|
|
var result = youTubeService.getTranscriptApi(videoId, "auto");
|
|
if (result != null) {
|
|
videoService.updateTranscript(id, result.text());
|
|
success++;
|
|
emit(emitter, Map.of("type", "done", "index", i,
|
|
"title", title, "source", result.source(),
|
|
"length", result.text().length()));
|
|
} else {
|
|
failed++;
|
|
videoService.updateStatus(id, "no_transcript");
|
|
emit(emitter, Map.of("type", "error", "index", i,
|
|
"title", title, "message", "자막을 찾을 수 없음"));
|
|
}
|
|
} catch (Exception e) {
|
|
failed++;
|
|
videoService.updateStatus(id, "no_transcript");
|
|
log.error("[BULK-TRANSCRIPT] API error for {}: {}", videoId, e.getMessage());
|
|
emit(emitter, Map.of("type", "error", "index", i,
|
|
"title", title, "message", e.getMessage()));
|
|
}
|
|
}
|
|
}
|
|
|
|
emit(emitter, Map.of("type", "complete", "total", total, "success", success, "failed", failed));
|
|
emitter.complete();
|
|
} catch (Exception e) {
|
|
log.error("Bulk transcript error", e);
|
|
emitter.completeWithError(e);
|
|
}
|
|
});
|
|
return emitter;
|
|
}
|
|
|
|
@PostMapping("/bulk-extract")
|
|
public SseEmitter bulkExtract(@RequestBody(required = false) Map<String, Object> body) {
|
|
AuthUtil.requireAdmin();
|
|
SseEmitter emitter = new SseEmitter(600_000L);
|
|
|
|
@SuppressWarnings("unchecked")
|
|
List<String> selectedIds = body != null && body.containsKey("ids")
|
|
? ((List<?>) body.get("ids")).stream().map(Object::toString).toList()
|
|
: null;
|
|
|
|
executor.execute(() -> {
|
|
try {
|
|
var rows = selectedIds != null && !selectedIds.isEmpty()
|
|
? videoService.findVideosForExtractByIds(selectedIds)
|
|
: videoService.findVideosForBulkExtract();
|
|
|
|
int total = rows.size();
|
|
int totalRestaurants = 0;
|
|
emit(emitter, Map.of("type", "start", "total", total));
|
|
|
|
for (int i = 0; i < total; i++) {
|
|
var v = rows.get(i);
|
|
if (i > 0) {
|
|
long delay = (long) (3000 + Math.random() * 5000);
|
|
emit(emitter, Map.of("type", "wait", "index", i, "delay", delay / 1000.0));
|
|
Thread.sleep(delay);
|
|
}
|
|
emit(emitter, Map.of("type", "processing", "index", i, "title", v.get("title")));
|
|
try {
|
|
int count = pipelineService.processExtract(v, (String) v.get("transcript"), null);
|
|
totalRestaurants += count;
|
|
emit(emitter, Map.of("type", "done", "index", i, "title", v.get("title"), "restaurants", count));
|
|
} catch (Exception e) {
|
|
log.error("Bulk extract error for {}: {}", v.get("video_id"), e.getMessage());
|
|
emit(emitter, Map.of("type", "error", "index", i, "title", v.get("title"), "message", e.getMessage()));
|
|
}
|
|
}
|
|
|
|
if (totalRestaurants > 0) cache.flush();
|
|
emit(emitter, Map.of("type", "complete", "total", total, "total_restaurants", totalRestaurants));
|
|
emitter.complete();
|
|
} catch (Exception e) {
|
|
log.error("Bulk extract error", e);
|
|
emitter.completeWithError(e);
|
|
}
|
|
});
|
|
return emitter;
|
|
}
|
|
|
|
@PostMapping("/remap-cuisine")
|
|
@SuppressWarnings("unchecked")
|
|
public SseEmitter remapCuisine() {
|
|
AuthUtil.requireAdmin();
|
|
SseEmitter emitter = new SseEmitter(600_000L);
|
|
int BATCH = 20;
|
|
|
|
executor.execute(() -> {
|
|
try {
|
|
var rows = restaurantService.findForRemapCuisine();
|
|
rows = rows.stream().map(JsonUtil::lowerKeys).toList();
|
|
|
|
int total = rows.size();
|
|
emit(emitter, Map.of("type", "start", "total", total));
|
|
int updated = 0;
|
|
var allMissed = new ArrayList<Map<String, Object>>();
|
|
|
|
// Pass 1
|
|
for (int i = 0; i < total; i += BATCH) {
|
|
var batch = rows.subList(i, Math.min(i + BATCH, total));
|
|
emit(emitter, Map.of("type", "processing", "current", Math.min(i + BATCH, total), "total", total, "pass", 1));
|
|
try {
|
|
var result = applyRemapBatch(batch);
|
|
updated += result.updated;
|
|
allMissed.addAll(result.missed);
|
|
emit(emitter, Map.of("type", "batch_done", "current", Math.min(i + BATCH, total), "total", total, "updated", updated, "missed", allMissed.size()));
|
|
} catch (Exception e) {
|
|
allMissed.addAll(batch);
|
|
emit(emitter, Map.of("type", "error", "message", e.getMessage(), "current", i));
|
|
}
|
|
}
|
|
|
|
// Pass 2: retry missed (up to 3 attempts with smaller batches)
|
|
if (!allMissed.isEmpty()) {
|
|
emit(emitter, Map.of("type", "retry", "missed", allMissed.size()));
|
|
for (int attempt = 0; attempt < 3 && !allMissed.isEmpty(); attempt++) {
|
|
var retryList = new ArrayList<>(allMissed);
|
|
allMissed.clear();
|
|
for (int i = 0; i < retryList.size(); i += 5) {
|
|
var batch = retryList.subList(i, Math.min(i + 5, retryList.size()));
|
|
try {
|
|
var result = applyRemapBatch(batch);
|
|
updated += result.updated;
|
|
allMissed.addAll(result.missed);
|
|
} catch (Exception e) {
|
|
log.warn("Remap cuisine retry failed (attempt {}): {}", attempt + 1, e.getMessage());
|
|
allMissed.addAll(batch);
|
|
}
|
|
}
|
|
if (!allMissed.isEmpty()) {
|
|
emit(emitter, Map.of("type", "retry", "attempt", attempt + 2, "missed", allMissed.size()));
|
|
}
|
|
}
|
|
}
|
|
|
|
cache.flush();
|
|
emit(emitter, Map.of("type", "complete", "total", total, "updated", updated, "missed", allMissed.size()));
|
|
emitter.complete();
|
|
} catch (Exception e) {
|
|
emitter.completeWithError(e);
|
|
}
|
|
});
|
|
return emitter;
|
|
}
|
|
|
|
@PostMapping("/remap-foods")
|
|
@SuppressWarnings("unchecked")
|
|
public SseEmitter remapFoods() {
|
|
AuthUtil.requireAdmin();
|
|
SseEmitter emitter = new SseEmitter(600_000L);
|
|
int BATCH = 15;
|
|
|
|
executor.execute(() -> {
|
|
try {
|
|
var rows = restaurantService.findForRemapFoods();
|
|
rows = rows.stream().map(r -> {
|
|
var m = JsonUtil.lowerKeys(r);
|
|
// foods_mentioned is now TO_CHAR'd in SQL, parse as string
|
|
Object fm = m.get("foods_mentioned");
|
|
m.put("foods", JsonUtil.parseStringList(fm));
|
|
return m;
|
|
}).toList();
|
|
|
|
int total = rows.size();
|
|
emit(emitter, Map.of("type", "start", "total", total));
|
|
int updated = 0;
|
|
var allMissed = new ArrayList<Map<String, Object>>();
|
|
|
|
for (int i = 0; i < total; i += BATCH) {
|
|
var batch = rows.subList(i, Math.min(i + BATCH, total));
|
|
emit(emitter, Map.of("type", "processing", "current", Math.min(i + BATCH, total), "total", total));
|
|
try {
|
|
var result = applyFoodsBatch(batch);
|
|
updated += result.updated;
|
|
allMissed.addAll(result.missed);
|
|
emit(emitter, Map.of("type", "batch_done", "current", Math.min(i + BATCH, total), "total", total, "updated", updated));
|
|
} catch (Exception e) {
|
|
allMissed.addAll(batch);
|
|
log.warn("Remap foods batch error at {}: {}", i, e.getMessage());
|
|
emit(emitter, Map.of("type", "error", "message", e.getMessage(), "current", i));
|
|
}
|
|
}
|
|
|
|
// Retry missed (up to 3 attempts with smaller batches)
|
|
if (!allMissed.isEmpty()) {
|
|
emit(emitter, Map.of("type", "retry", "missed", allMissed.size()));
|
|
for (int attempt = 0; attempt < 3 && !allMissed.isEmpty(); attempt++) {
|
|
var retryList = new ArrayList<>(allMissed);
|
|
allMissed.clear();
|
|
for (int i = 0; i < retryList.size(); i += 5) {
|
|
var batch = retryList.subList(i, Math.min(i + 5, retryList.size()));
|
|
try {
|
|
var r = applyFoodsBatch(batch);
|
|
updated += r.updated;
|
|
allMissed.addAll(r.missed);
|
|
} catch (Exception e) {
|
|
log.warn("Remap foods retry failed (attempt {}): {}", attempt + 1, e.getMessage());
|
|
allMissed.addAll(batch);
|
|
}
|
|
}
|
|
if (!allMissed.isEmpty()) {
|
|
emit(emitter, Map.of("type", "retry", "attempt", attempt + 2, "missed", allMissed.size()));
|
|
}
|
|
}
|
|
}
|
|
|
|
cache.flush();
|
|
emit(emitter, Map.of("type", "complete", "total", total, "updated", updated, "missed", allMissed.size()));
|
|
emitter.complete();
|
|
} catch (Exception e) {
|
|
emitter.completeWithError(e);
|
|
}
|
|
});
|
|
return emitter;
|
|
}
|
|
|
|
@PostMapping("/rebuild-vectors")
|
|
public SseEmitter rebuildVectors() {
|
|
AuthUtil.requireAdmin();
|
|
SseEmitter emitter = new SseEmitter(600_000L);
|
|
|
|
executor.execute(() -> {
|
|
try {
|
|
emit(emitter, Map.of("type", "start"));
|
|
// TODO: Implement full vector rebuild using VectorService
|
|
emit(emitter, Map.of("type", "complete", "total", 0));
|
|
emitter.complete();
|
|
} catch (Exception e) {
|
|
emitter.completeWithError(e);
|
|
}
|
|
});
|
|
return emitter;
|
|
}
|
|
|
|
@PostMapping("/process")
|
|
public Map<String, Object> process(@RequestParam(defaultValue = "5") int limit) {
|
|
AuthUtil.requireAdmin();
|
|
int count = pipelineService.processPending(limit);
|
|
if (count > 0) cache.flush();
|
|
return Map.of("restaurants_extracted", count);
|
|
}
|
|
|
|
// --- Helpers ---
|
|
|
|
private record BatchResult(int updated, List<Map<String, Object>> missed) {}
|
|
|
|
@SuppressWarnings("unchecked")
|
|
private BatchResult applyRemapBatch(List<Map<String, Object>> batch) throws Exception {
|
|
var items = batch.stream().map(b -> Map.of(
|
|
"id", b.get("id"), "name", b.get("name"),
|
|
"current_cuisine_type", b.get("cuisine_type"),
|
|
"foods_mentioned", b.get("foods_mentioned")
|
|
)).toList();
|
|
|
|
String prompt = """
|
|
아래 식당들의 cuisine_type을 표준 분류로 매핑하세요.
|
|
|
|
표준 분류 목록 (반드시 이 중 하나를 선택):
|
|
%s
|
|
|
|
식당 목록:
|
|
%s
|
|
|
|
규칙:
|
|
- 모든 식당에 대해 빠짐없이 결과를 반환 (총 %d개 모두 반환해야 함)
|
|
- 반드시 위 표준 분류 목록의 값을 그대로 복사하여 사용 (오타 금지)
|
|
- JSON 배열만 반환, 설명 없음
|
|
- 형식: [{"id": "식당ID", "cuisine_type": "한식|국밥/해장국"}, ...]
|
|
|
|
JSON 배열:""".formatted(CuisineTypes.CUISINE_LIST_TEXT, mapper.writeValueAsString(items), items.size());
|
|
|
|
String raw = genAi.chat(prompt, 4096);
|
|
Object parsed = genAi.parseJson(raw);
|
|
List<Map<String, Object>> results = parsed instanceof List<?> ? (List<Map<String, Object>>) parsed : List.of();
|
|
|
|
Map<String, String> resultMap = new HashMap<>();
|
|
for (var item : results) {
|
|
String id = (String) item.get("id");
|
|
String type = (String) item.get("cuisine_type");
|
|
if (id != null && type != null) resultMap.put(id, type);
|
|
}
|
|
|
|
int updated = 0;
|
|
var missed = new ArrayList<Map<String, Object>>();
|
|
for (var b : batch) {
|
|
String id = (String) b.get("id");
|
|
String newType = resultMap.get(id);
|
|
if (newType == null || !CuisineTypes.isValid(newType)) {
|
|
missed.add(b);
|
|
continue;
|
|
}
|
|
restaurantService.updateCuisineType(id, newType);
|
|
updated++;
|
|
}
|
|
return new BatchResult(updated, missed);
|
|
}
|
|
|
|
@SuppressWarnings("unchecked")
|
|
private BatchResult applyFoodsBatch(List<Map<String, Object>> batch) throws Exception {
|
|
var items = batch.stream().map(b -> Map.of(
|
|
"id", b.get("id"), "name", b.get("name"),
|
|
"current_foods", b.get("foods"), "cuisine_type", b.get("cuisine_type")
|
|
)).toList();
|
|
|
|
String prompt = """
|
|
아래 식당들의 대표 메뉴 태그를 다시 만들어주세요.
|
|
|
|
규칙:
|
|
- 반드시 한글로 작성
|
|
- 각 식당당 최대 10개의 대표 메뉴/음식 태그
|
|
- 우선순위: 시그니처 메뉴 > 자주 언급된 메뉴 > 일반 메뉴
|
|
- 너무 일반적인 태그(밥, 반찬 등)는 제외
|
|
- 모든 식당에 대해 빠짐없이 결과 반환 (총 %d개)
|
|
- JSON 배열만 반환, 설명 없음
|
|
- 형식: [{"id": "식당ID", "foods": ["메뉴1", "메뉴2", ...]}]
|
|
|
|
식당 목록:
|
|
%s
|
|
|
|
JSON 배열:""".formatted(items.size(), mapper.writeValueAsString(items));
|
|
|
|
String raw = genAi.chat(prompt, 4096);
|
|
Object parsed = genAi.parseJson(raw);
|
|
List<Map<String, Object>> results = parsed instanceof List<?> ? (List<Map<String, Object>>) parsed : List.of();
|
|
|
|
Map<String, List<String>> resultMap = new HashMap<>();
|
|
for (var item : results) {
|
|
String id = (String) item.get("id");
|
|
Object foods = item.get("foods");
|
|
if (id != null && foods instanceof List<?> list) {
|
|
resultMap.put(id, list.stream().map(Object::toString).limit(10).toList());
|
|
}
|
|
}
|
|
|
|
int updated = 0;
|
|
var missed = new ArrayList<Map<String, Object>>();
|
|
for (var b : batch) {
|
|
String id = (String) b.get("id");
|
|
List<String> newFoods = resultMap.get(id);
|
|
if (newFoods == null) {
|
|
missed.add(b);
|
|
continue;
|
|
}
|
|
restaurantService.updateFoodsMentioned(id, mapper.writeValueAsString(newFoods));
|
|
updated++;
|
|
}
|
|
return new BatchResult(updated, missed);
|
|
}
|
|
|
|
private void emit(SseEmitter emitter, Map<String, Object> data) {
|
|
try {
|
|
emitter.send(SseEmitter.event().data(mapper.writeValueAsString(data)));
|
|
} catch (Exception e) {
|
|
log.debug("SSE emit failed: {}", e.getMessage());
|
|
}
|
|
}
|
|
}
|