Files
tasteby/backend-java/src/main/java/com/tasteby/controller/RestaurantController.java
joungmin 3815221535 feat(util): #348 isNameSimilar 한국어 자모 + Sørensen-Dice
- HangulSimilarity 유틸 신규
  - decompose: Unicode NFD 분해 (한글 음절 → 초성/중성/종성)
  - 공백·구두점 제거 + 소문자화
  - bigram multiset 기반 Sørensen-Dice 계수
  - 빈 입력/포함 관계 가드
- RestaurantController.isNameSimilar 임계값 0.45 (이전 Jaccard 0.4와 유사 보수성)
- 기존 normalize 헬퍼 제거 (HangulSimilarity 내부로 이동)

DDG/DTO/UNIQUE는 별도 후속:
- 외부 검색 API 선정 (Naver/Kakao/Google CSE)
- RestaurantUpdateDTO + @Valid
- google_place_id 중복 정리 후 UNIQUE 제약

설계서: docs/design/348-name-similarity/README.md

Refs: #348 (Developer 단계)
2026-06-15 16:10:44 +09:00

543 lines
24 KiB
Java

package com.tasteby.controller;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.tasteby.domain.Restaurant;
import com.tasteby.security.AuthUtil;
import com.tasteby.service.CacheService;
import com.tasteby.service.GeocodingService;
import com.tasteby.service.RestaurantService;
import jakarta.annotation.PreDestroy;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.http.HttpStatus;
import org.springframework.web.bind.annotation.*;
import org.springframework.web.server.ResponseStatusException;
import org.springframework.web.servlet.mvc.method.annotation.SseEmitter;
import java.net.URI;
import java.net.URLDecoder;
import java.net.URLEncoder;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;
import java.nio.charset.StandardCharsets;
import java.util.*;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.ThreadLocalRandom;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@RestController
@RequestMapping("/api/restaurants")
public class RestaurantController {
private static final Logger log = LoggerFactory.getLogger(RestaurantController.class);
private final RestaurantService restaurantService;
private final GeocodingService geocodingService;
private final CacheService cache;
private final ObjectMapper objectMapper;
private final ExecutorService executor = Executors.newVirtualThreadPerTaskExecutor();
public RestaurantController(RestaurantService restaurantService, GeocodingService geocodingService, CacheService cache, ObjectMapper objectMapper) {
this.restaurantService = restaurantService;
this.geocodingService = geocodingService;
this.cache = cache;
this.objectMapper = objectMapper;
}
// #290 — Bean 종료 시 virtual thread executor를 정리하여 리소스 누수 방지.
@PreDestroy
public void shutdownExecutor() {
executor.shutdown();
}
@GetMapping
public List<Restaurant> list(
@RequestParam(defaultValue = "100") int limit,
@RequestParam(defaultValue = "0") int offset,
@RequestParam(required = false) String cuisine,
@RequestParam(required = false) String region,
@RequestParam(required = false) String channel) {
if (limit > 500) limit = 500;
String key = cache.makeKey("restaurants", "l=" + limit, "o=" + offset,
"c=" + cuisine, "r=" + region, "ch=" + channel);
String cached = cache.getRaw(key);
if (cached != null) {
try {
return objectMapper.readValue(cached, new TypeReference<List<Restaurant>>() {});
} catch (Exception e) { log.warn("Cache deserialize failed, evicting: {}", e.getMessage()); cache.del(key); }
}
var result = restaurantService.findAll(limit, offset, cuisine, region, channel);
cache.set(key, result);
return result;
}
@GetMapping("/{id}")
public Restaurant get(@PathVariable String id) {
String key = cache.makeKey("restaurant", id);
String cached = cache.getRaw(key);
if (cached != null) {
try {
return objectMapper.readValue(cached, Restaurant.class);
} catch (Exception e) { log.warn("Cache deserialize failed, evicting: {}", e.getMessage()); cache.del(key); }
}
var r = restaurantService.findById(id);
if (r == null) throw new ResponseStatusException(HttpStatus.NOT_FOUND, "Restaurant not found");
cache.set(key, r);
return r;
}
// #332 — Restaurant 업데이트 화이트리스트 (SQL updateFields의 컬럼 가드와 1:1).
// 허용되지 않은 키는 무시(silent drop). DTO 도입은 후속 작업.
private static final java.util.Set<String> ALLOWED_UPDATE_FIELDS = java.util.Set.of(
"name", "address", "region", "cuisine_type", "price_range",
"phone", "website", "tabling_url", "catchtable_url",
"latitude", "longitude", "google_place_id",
"business_status", "rating", "rating_count"
);
@PutMapping("/{id}")
public Map<String, Object> update(@PathVariable String id, @RequestBody Map<String, Object> body) {
AuthUtil.requireAdmin();
var r = restaurantService.findById(id);
if (r == null) throw new ResponseStatusException(HttpStatus.NOT_FOUND, "Restaurant not found");
// #332 — 입력 body를 허용 키만 통과시킨 가변 Map으로 정규화
Map<String, Object> sanitized = new java.util.LinkedHashMap<>();
for (var e : body.entrySet()) {
if (ALLOWED_UPDATE_FIELDS.contains(e.getKey())) {
sanitized.put(e.getKey(), e.getValue());
} else {
log.debug("Ignoring non-whitelisted update field: {}", e.getKey());
}
}
// Re-geocode if name or address changed
String newName = (String) sanitized.get("name");
String newAddress = (String) sanitized.get("address");
boolean nameChanged = newName != null && !newName.equals(r.getName());
boolean addressChanged = newAddress != null && !newAddress.equals(r.getAddress());
if (nameChanged || addressChanged) {
String geoName = newName != null ? newName : r.getName();
String geoAddr = newAddress != null ? newAddress : r.getAddress();
var geo = geocodingService.geocodeRestaurant(geoName, geoAddr);
if (geo != null) {
sanitized.put("latitude", geo.get("latitude"));
sanitized.put("longitude", geo.get("longitude"));
sanitized.put("google_place_id", geo.get("google_place_id"));
if (geo.containsKey("formatted_address")) {
sanitized.put("address", geo.get("formatted_address"));
}
if (geo.containsKey("rating")) sanitized.put("rating", geo.get("rating"));
if (geo.containsKey("rating_count")) sanitized.put("rating_count", geo.get("rating_count"));
if (geo.containsKey("phone")) sanitized.put("phone", geo.get("phone"));
if (geo.containsKey("business_status")) sanitized.put("business_status", geo.get("business_status"));
String addr = (String) geo.get("formatted_address");
if (addr != null) {
sanitized.put("region", GeocodingService.parseRegionFromAddress(addr));
}
}
}
if (sanitized.isEmpty()) {
// 허용 키가 하나도 없으면 no-op
return Map.of("ok", true, "restaurant", r);
}
restaurantService.update(id, sanitized);
cache.flush();
var updated = restaurantService.findById(id);
return Map.of("ok", true, "restaurant", updated);
}
@DeleteMapping("/{id}")
public Map<String, Object> delete(@PathVariable String id) {
AuthUtil.requireAdmin();
var r = restaurantService.findById(id);
if (r == null) throw new ResponseStatusException(HttpStatus.NOT_FOUND, "Restaurant not found");
restaurantService.delete(id);
cache.flush();
return Map.of("ok", true);
}
/** 단건 테이블링 URL 검색 */
@GetMapping("/{id}/tabling-search")
public List<Map<String, Object>> tablingSearch(@PathVariable String id) {
AuthUtil.requireAdmin();
var r = restaurantService.findById(id);
if (r == null) throw new ResponseStatusException(HttpStatus.NOT_FOUND);
try {
return searchTabling(r.getName());
} catch (Exception e) {
log.error("[TABLING] Search failed for '{}': {}", r.getName(), e.getMessage());
throw new ResponseStatusException(HttpStatus.BAD_GATEWAY, "Search failed: " + e.getMessage());
}
}
/** 테이블링 미연결 식당 목록 */
@GetMapping("/tabling-pending")
public Map<String, Object> tablingPending() {
AuthUtil.requireAdmin();
var list = restaurantService.findWithoutTabling();
var summary = list.stream()
.map(r -> Map.of("id", (Object) r.getId(), "name", (Object) r.getName()))
.toList();
return Map.of("count", list.size(), "restaurants", summary);
}
/** 벌크 테이블링 검색 (SSE) */
@PostMapping("/bulk-tabling")
public SseEmitter bulkTabling() {
AuthUtil.requireAdmin();
SseEmitter emitter = new SseEmitter(600_000L);
executor.execute(() -> {
try {
var restaurants = restaurantService.findWithoutTabling();
int total = restaurants.size();
emit(emitter, Map.of("type", "start", "total", total));
if (total == 0) {
emit(emitter, Map.of("type", "complete", "total", 0, "linked", 0, "notFound", 0));
emitter.complete();
return;
}
int linked = 0;
int notFound = 0;
for (int i = 0; i < total; i++) {
var r = restaurants.get(i);
emit(emitter, Map.of("type", "processing", "current", i + 1,
"total", total, "name", r.getName()));
try {
var results = searchTabling(r.getName());
if (!results.isEmpty()) {
String url = String.valueOf(results.get(0).get("url"));
String title = String.valueOf(results.get(0).get("title"));
if (isNameSimilar(r.getName(), title)) {
restaurantService.update(r.getId(), Map.of("tabling_url", url));
linked++;
emit(emitter, Map.of("type", "done", "current", i + 1,
"name", r.getName(), "url", url, "title", title));
} else {
restaurantService.update(r.getId(), Map.of("tabling_url", "NONE"));
notFound++;
log.info("[TABLING] Name mismatch: '{}' vs '{}', skipping", r.getName(), title);
emit(emitter, Map.of("type", "notfound", "current", i + 1,
"name", r.getName(), "reason", "이름 불일치: " + title));
}
} else {
restaurantService.update(r.getId(), Map.of("tabling_url", "NONE"));
notFound++;
emit(emitter, Map.of("type", "notfound", "current", i + 1,
"name", r.getName()));
}
} catch (Exception e) {
notFound++;
emit(emitter, Map.of("type", "error", "current", i + 1,
"name", r.getName(), "message", e.getMessage()));
}
// 랜덤 딜레이 (2~5초)
int delay = ThreadLocalRandom.current().nextInt(2000, 5001);
log.info("[TABLING] Waiting {}ms before next search...", delay);
Thread.sleep(delay);
}
cache.flush();
emit(emitter, Map.of("type", "complete", "total", total, "linked", linked, "notFound", notFound));
emitter.complete();
} catch (Exception e) {
log.error("[TABLING] Bulk search error", e);
emitter.completeWithError(e);
}
});
return emitter;
}
/** 테이블링 URL 저장 */
@PutMapping("/{id}/tabling-url")
public Map<String, Object> setTablingUrl(@PathVariable String id, @RequestBody Map<String, String> body) {
AuthUtil.requireAdmin();
var r = restaurantService.findById(id);
if (r == null) throw new ResponseStatusException(HttpStatus.NOT_FOUND);
String url = body.get("tabling_url");
// #290 — javascript:/외부 악성 URL 차단. 빈 문자열은 매핑 해제로 허용.
if (url != null && !url.isBlank() && !url.startsWith("https://tabling.co.kr/")) {
throw new ResponseStatusException(HttpStatus.BAD_REQUEST, "테이블링 URL은 https://tabling.co.kr/ 만 허용");
}
restaurantService.update(id, Map.of("tabling_url", url != null ? url : ""));
cache.flush();
return Map.of("ok", true);
}
/** 테이블링/캐치테이블 매핑 초기화 */
@DeleteMapping("/reset-tabling")
public Map<String, Object> resetTabling() {
AuthUtil.requireAdmin();
restaurantService.resetTablingUrls();
cache.flush();
return Map.of("ok", true);
}
@DeleteMapping("/reset-catchtable")
public Map<String, Object> resetCatchtable() {
AuthUtil.requireAdmin();
restaurantService.resetCatchtableUrls();
cache.flush();
return Map.of("ok", true);
}
/** 단건 캐치테이블 URL 검색 */
@GetMapping("/{id}/catchtable-search")
public List<Map<String, Object>> catchtableSearch(@PathVariable String id) {
AuthUtil.requireAdmin();
var r = restaurantService.findById(id);
if (r == null) throw new ResponseStatusException(HttpStatus.NOT_FOUND);
try {
return searchCatchtable(r.getName());
} catch (Exception e) {
log.error("[CATCHTABLE] Search failed for '{}': {}", r.getName(), e.getMessage());
throw new ResponseStatusException(HttpStatus.BAD_GATEWAY, "Search failed: " + e.getMessage());
}
}
/** 캐치테이블 미연결 식당 목록 */
@GetMapping("/catchtable-pending")
public Map<String, Object> catchtablePending() {
AuthUtil.requireAdmin();
var list = restaurantService.findWithoutCatchtable();
var summary = list.stream()
.map(r -> Map.of("id", (Object) r.getId(), "name", (Object) r.getName()))
.toList();
return Map.of("count", list.size(), "restaurants", summary);
}
/** 벌크 캐치테이블 검색 (SSE) */
@PostMapping("/bulk-catchtable")
public SseEmitter bulkCatchtable() {
AuthUtil.requireAdmin();
SseEmitter emitter = new SseEmitter(600_000L);
executor.execute(() -> {
try {
var restaurants = restaurantService.findWithoutCatchtable();
int total = restaurants.size();
emit(emitter, Map.of("type", "start", "total", total));
if (total == 0) {
emit(emitter, Map.of("type", "complete", "total", 0, "linked", 0, "notFound", 0));
emitter.complete();
return;
}
int linked = 0;
int notFound = 0;
for (int i = 0; i < total; i++) {
var r = restaurants.get(i);
emit(emitter, Map.of("type", "processing", "current", i + 1,
"total", total, "name", r.getName()));
try {
var results = searchCatchtable(r.getName());
if (!results.isEmpty()) {
String url = String.valueOf(results.get(0).get("url"));
String title = String.valueOf(results.get(0).get("title"));
if (isNameSimilar(r.getName(), title)) {
restaurantService.update(r.getId(), Map.of("catchtable_url", url));
linked++;
emit(emitter, Map.of("type", "done", "current", i + 1,
"name", r.getName(), "url", url, "title", title));
} else {
restaurantService.update(r.getId(), Map.of("catchtable_url", "NONE"));
notFound++;
log.info("[CATCHTABLE] Name mismatch: '{}' vs '{}', skipping", r.getName(), title);
emit(emitter, Map.of("type", "notfound", "current", i + 1,
"name", r.getName(), "reason", "이름 불일치: " + title));
}
} else {
restaurantService.update(r.getId(), Map.of("catchtable_url", "NONE"));
notFound++;
emit(emitter, Map.of("type", "notfound", "current", i + 1,
"name", r.getName()));
}
} catch (Exception e) {
notFound++;
emit(emitter, Map.of("type", "error", "current", i + 1,
"name", r.getName(), "message", e.getMessage()));
}
int delay = ThreadLocalRandom.current().nextInt(2000, 5001);
log.info("[CATCHTABLE] Waiting {}ms before next search...", delay);
Thread.sleep(delay);
}
cache.flush();
emit(emitter, Map.of("type", "complete", "total", total, "linked", linked, "notFound", notFound));
emitter.complete();
} catch (Exception e) {
log.error("[CATCHTABLE] Bulk search error", e);
emitter.completeWithError(e);
}
});
return emitter;
}
/** 캐치테이블 URL 저장 */
@PutMapping("/{id}/catchtable-url")
public Map<String, Object> setCatchtableUrl(@PathVariable String id, @RequestBody Map<String, String> body) {
AuthUtil.requireAdmin();
var r = restaurantService.findById(id);
if (r == null) throw new ResponseStatusException(HttpStatus.NOT_FOUND);
String url = body.get("catchtable_url");
// #290 — javascript:/외부 악성 URL 차단. 빈 문자열은 매핑 해제로 허용.
if (url != null && !url.isBlank()
&& !url.startsWith("https://app.catchtable.co.kr/")
&& !url.startsWith("https://www.catchtable.co.kr/")) {
throw new ResponseStatusException(HttpStatus.BAD_REQUEST, "캐치테이블 URL은 https://(app|www).catchtable.co.kr/ 만 허용");
}
restaurantService.update(id, Map.of("catchtable_url", url != null ? url : ""));
cache.flush();
return Map.of("ok", true);
}
@GetMapping("/{id}/videos")
public List<Map<String, Object>> videos(@PathVariable String id) {
String key = cache.makeKey("restaurant_videos", id);
String cached = cache.getRaw(key);
if (cached != null) {
try {
return objectMapper.readValue(cached, new TypeReference<List<Map<String, Object>>>() {});
} catch (Exception e) { log.warn("Cache deserialize failed, evicting: {}", e.getMessage()); cache.del(key); }
}
var r = restaurantService.findById(id);
if (r == null) throw new ResponseStatusException(HttpStatus.NOT_FOUND, "Restaurant not found");
var result = restaurantService.findVideoLinks(id);
cache.set(key, result);
return result;
}
// ─── DuckDuckGo HTML search helpers ─────────────────────────────────
private static final HttpClient httpClient = HttpClient.newBuilder()
.followRedirects(HttpClient.Redirect.NORMAL)
.build();
private static final Pattern DDG_RESULT_PATTERN = Pattern.compile(
"<a[^>]+class=\"result__a\"[^>]+href=\"([^\"]+)\"[^>]*>(.*?)</a>",
Pattern.DOTALL
);
/**
* DuckDuckGo HTML 검색을 통해 특정 사이트의 URL을 찾는다.
* html.duckduckgo.com은 서버사이드 렌더링이라 봇 판정 없이 HTTP 요청만으로 결과를 파싱할 수 있다.
*/
private List<Map<String, Object>> searchDuckDuckGo(String query, String... urlPatterns) throws Exception {
String encoded = URLEncoder.encode(query, StandardCharsets.UTF_8);
String searchUrl = "https://html.duckduckgo.com/html/?q=" + encoded;
log.info("[DDG] Searching: {}", query);
HttpRequest request = HttpRequest.newBuilder()
.uri(URI.create(searchUrl))
.header("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36")
.header("Accept", "text/html,application/xhtml+xml")
.header("Accept-Language", "ko-KR,ko;q=0.9")
.GET()
.build();
HttpResponse<String> response = httpClient.send(request, HttpResponse.BodyHandlers.ofString());
String html = response.body();
List<Map<String, Object>> results = new ArrayList<>();
Set<String> seen = new HashSet<>();
Matcher matcher = DDG_RESULT_PATTERN.matcher(html);
while (matcher.find() && results.size() < 5) {
String href = matcher.group(1);
String title = matcher.group(2).replaceAll("<[^>]+>", "").trim();
// DDG 링크에서 실제 URL 추출 (uddg 파라미터)
String actualUrl = extractDdgUrl(href);
if (actualUrl == null) continue;
boolean matches = false;
for (String pattern : urlPatterns) {
if (actualUrl.contains(pattern)) {
matches = true;
break;
}
}
if (matches && !seen.contains(actualUrl)) {
seen.add(actualUrl);
results.add(Map.of("title", title, "url", actualUrl));
}
}
log.info("[DDG] Found {} results for '{}'", results.size(), query);
return results;
}
/** DDG 리다이렉트 URL에서 실제 URL 추출 */
private String extractDdgUrl(String ddgHref) {
try {
// //duckduckgo.com/l/?uddg=ENCODED_URL&rut=...
if (ddgHref.contains("uddg=")) {
String uddgParam = ddgHref.substring(ddgHref.indexOf("uddg=") + 5);
int ampIdx = uddgParam.indexOf('&');
if (ampIdx > 0) uddgParam = uddgParam.substring(0, ampIdx);
return URLDecoder.decode(uddgParam, StandardCharsets.UTF_8);
}
// 직접 URL인 경우
if (ddgHref.startsWith("http")) return ddgHref;
} catch (Exception e) {
log.debug("[DDG] Failed to extract URL from: {}", ddgHref);
}
return null;
}
private List<Map<String, Object>> searchTabling(String restaurantName) throws Exception {
return searchDuckDuckGo(
"site:tabling.co.kr " + restaurantName,
"tabling.co.kr/restaurant/", "tabling.co.kr/place/"
);
}
private List<Map<String, Object>> searchCatchtable(String restaurantName) throws Exception {
return searchDuckDuckGo(
"site:app.catchtable.co.kr " + restaurantName,
"catchtable.co.kr/dining/", "catchtable.co.kr/shop/"
);
}
/**
* 식당 이름과 검색 결과 제목의 유사도 검사.
* 한쪽 이름이 다른쪽에 포함되거나, 공통 글자 비율이 40% 이상이면 유사하다고 판단.
*/
/**
* #348 — 한국어 자모 분해 + Sørensen-Dice bigram 유사도(임계값 0.45).
* 짧은 한국어 이름에서 이전 Jaccard-like(set 비율) 방식보다 정확.
*/
private boolean isNameSimilar(String restaurantName, String resultTitle) {
return com.tasteby.util.HangulSimilarity.similarity(restaurantName, resultTitle) >= 0.45;
}
private void emit(SseEmitter emitter, Map<String, Object> data) {
try {
emitter.send(SseEmitter.event().data(objectMapper.writeValueAsString(data)));
} catch (Exception e) {
log.debug("SSE emit error: {}", e.getMessage());
}
}
}