feat(backend): #357 DDG → Naver Search 정식 API + DDG 폴백

- WebSearchService 신규 (Naver webkr.json 우선, 키 미설정/실패 시 DDG)
- RestaurantController.searchTabling/searchCatchtable 내부 호출 교체
- 인라인 DDG 80줄 제거, 미사용 import 정리
- app.naver.client-id/secret 추가 (env: NAVER_CLIENT_ID/SECRET)
- k8s secrets template에 NAVER 키 항목

Refs: #357 (close)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
joungmin
2026-06-15 20:16:14 +09:00
parent f1164b63c5
commit a504bf8ee5
5 changed files with 181 additions and 92 deletions

View File

@@ -6,6 +6,15 @@
## 2026-06-15
### 🔎 #357 DDG → Naver Search 정식 API + DDG 폴백 (v0.1.44)
- WebSearchService 신규 (Naver webkr.json 우선, 키 미설정/실패 시 DDG 폴백)
- RestaurantController.searchTabling/searchCatchtable 내부 호출 교체, DDG 인라인 80줄 제거
- application.yml: app.naver.client-id/secret (NAVER_CLIENT_ID/SECRET 환경변수)
- k8s/secrets.yaml.template에 NAVER_CLIENT_ID/SECRET 항목 추가
- 미사용 import 정리 (HttpClient/URI/URLEncoder/Pattern 등 RestaurantController에서)
- 설계서: docs/design/357-web-search-api/README.md
- Refs: #357 (close)
### 🎯 #356 영상-식당 관련도 LLM 평가 (v0.1.43)
- DB: video_restaurants 컬럼 추가 (relevance/relevance_reason/relevance_evaluated_at) + idx_vr_relevance
- VideoRelevanceService 신규 (#322 RestaurantVerifyService 패턴 모방, @Async verifyAsync/verify/verifyAll)

View File

@@ -7,6 +7,7 @@ import com.tasteby.security.AuthUtil;
import com.tasteby.service.CacheService;
import com.tasteby.service.GeocodingService;
import com.tasteby.service.RestaurantService;
import com.tasteby.service.WebSearchService;
import jakarta.annotation.PreDestroy;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -15,19 +16,10 @@ import org.springframework.web.bind.annotation.*;
import org.springframework.web.server.ResponseStatusException;
import org.springframework.web.servlet.mvc.method.annotation.SseEmitter;
import java.net.URI;
import java.net.URLDecoder;
import java.net.URLEncoder;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;
import java.nio.charset.StandardCharsets;
import java.util.*;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.ThreadLocalRandom;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@RestController
@RequestMapping("/api/restaurants")
@@ -39,13 +31,15 @@ public class RestaurantController {
private final GeocodingService geocodingService;
private final CacheService cache;
private final ObjectMapper objectMapper;
private final WebSearchService webSearch;
private final ExecutorService executor = Executors.newVirtualThreadPerTaskExecutor();
public RestaurantController(RestaurantService restaurantService, GeocodingService geocodingService, CacheService cache, ObjectMapper objectMapper) {
public RestaurantController(RestaurantService restaurantService, GeocodingService geocodingService, CacheService cache, ObjectMapper objectMapper, WebSearchService webSearch) {
this.restaurantService = restaurantService;
this.geocodingService = geocodingService;
this.cache = cache;
this.objectMapper = objectMapper;
this.webSearch = webSearch;
}
// #290 — Bean 종료 시 virtual thread executor를 정리하여 리소스 누수 방지.
@@ -430,93 +424,17 @@ public class RestaurantController {
return result;
}
// ─── DuckDuckGo HTML search helpers ─────────────────────────────────
// ─── 예약 사이트 URL 검색 (#357 WebSearchService: Naver primary + DDG fallback) ───
private static final HttpClient httpClient = HttpClient.newBuilder()
.followRedirects(HttpClient.Redirect.NORMAL)
.build();
private static final Pattern DDG_RESULT_PATTERN = Pattern.compile(
"<a[^>]+class=\"result__a\"[^>]+href=\"([^\"]+)\"[^>]*>(.*?)</a>",
Pattern.DOTALL
);
/**
* DuckDuckGo HTML 검색을 통해 특정 사이트의 URL을 찾는다.
* html.duckduckgo.com은 서버사이드 렌더링이라 봇 판정 없이 HTTP 요청만으로 결과를 파싱할 수 있다.
*/
private List<Map<String, Object>> searchDuckDuckGo(String query, String... urlPatterns) throws Exception {
String encoded = URLEncoder.encode(query, StandardCharsets.UTF_8);
String searchUrl = "https://html.duckduckgo.com/html/?q=" + encoded;
log.info("[DDG] Searching: {}", query);
HttpRequest request = HttpRequest.newBuilder()
.uri(URI.create(searchUrl))
.header("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36")
.header("Accept", "text/html,application/xhtml+xml")
.header("Accept-Language", "ko-KR,ko;q=0.9")
.GET()
.build();
HttpResponse<String> response = httpClient.send(request, HttpResponse.BodyHandlers.ofString());
String html = response.body();
List<Map<String, Object>> results = new ArrayList<>();
Set<String> seen = new HashSet<>();
Matcher matcher = DDG_RESULT_PATTERN.matcher(html);
while (matcher.find() && results.size() < 5) {
String href = matcher.group(1);
String title = matcher.group(2).replaceAll("<[^>]+>", "").trim();
// DDG 링크에서 실제 URL 추출 (uddg 파라미터)
String actualUrl = extractDdgUrl(href);
if (actualUrl == null) continue;
boolean matches = false;
for (String pattern : urlPatterns) {
if (actualUrl.contains(pattern)) {
matches = true;
break;
}
}
if (matches && !seen.contains(actualUrl)) {
seen.add(actualUrl);
results.add(Map.of("title", title, "url", actualUrl));
}
}
log.info("[DDG] Found {} results for '{}'", results.size(), query);
return results;
}
/** DDG 리다이렉트 URL에서 실제 URL 추출 */
private String extractDdgUrl(String ddgHref) {
try {
// //duckduckgo.com/l/?uddg=ENCODED_URL&rut=...
if (ddgHref.contains("uddg=")) {
String uddgParam = ddgHref.substring(ddgHref.indexOf("uddg=") + 5);
int ampIdx = uddgParam.indexOf('&');
if (ampIdx > 0) uddgParam = uddgParam.substring(0, ampIdx);
return URLDecoder.decode(uddgParam, StandardCharsets.UTF_8);
}
// 직접 URL인 경우
if (ddgHref.startsWith("http")) return ddgHref;
} catch (Exception e) {
log.debug("[DDG] Failed to extract URL from: {}", ddgHref);
}
return null;
}
private List<Map<String, Object>> searchTabling(String restaurantName) throws Exception {
return searchDuckDuckGo(
private List<Map<String, Object>> searchTabling(String restaurantName) {
return webSearch.search(
"site:tabling.co.kr " + restaurantName,
"tabling.co.kr/restaurant/", "tabling.co.kr/place/"
);
}
private List<Map<String, Object>> searchCatchtable(String restaurantName) throws Exception {
return searchDuckDuckGo(
private List<Map<String, Object>> searchCatchtable(String restaurantName) {
return webSearch.search(
"site:app.catchtable.co.kr " + restaurantName,
"catchtable.co.kr/dining/", "catchtable.co.kr/shop/"
);

View File

@@ -0,0 +1,154 @@
package com.tasteby.service;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
import java.net.URI;
import java.net.URLDecoder;
import java.net.URLEncoder;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;
import java.nio.charset.StandardCharsets;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* #357 웹 검색 추상화.
* - Naver Search webkr.json 우선 (한국 식당 정확도 높음, 무료 일 25k).
* - 키 미설정 또는 5xx/timeout 시 DDG HTML 파싱으로 폴백.
* - 결과는 urlPatterns로 필터링 (기존 searchDuckDuckGo와 동일 인터페이스).
*/
@Service
public class WebSearchService {
private static final Logger log = LoggerFactory.getLogger(WebSearchService.class);
private static final int MAX_RESULTS = 5;
private static final HttpClient HTTP = HttpClient.newBuilder()
.followRedirects(HttpClient.Redirect.NORMAL)
.build();
private static final Pattern DDG_RESULT = Pattern.compile(
"<a[^>]+class=\"result__a\"[^>]+href=\"([^\"]+)\"[^>]*>(.*?)</a>",
Pattern.DOTALL);
private final ObjectMapper json = new ObjectMapper();
private final String naverClientId;
private final String naverClientSecret;
public WebSearchService(
@Value("${app.naver.client-id:}") String naverClientId,
@Value("${app.naver.client-secret:}") String naverClientSecret) {
this.naverClientId = naverClientId == null ? "" : naverClientId.trim();
this.naverClientSecret = naverClientSecret == null ? "" : naverClientSecret.trim();
log.info("WebSearchService init — Naver={}", naverClientId.isEmpty() ? "off" : "on");
}
public List<Map<String, Object>> search(String query, String... urlPatterns) {
if (!naverClientId.isEmpty() && !naverClientSecret.isEmpty()) {
try {
List<Map<String, Object>> n = searchNaver(query, urlPatterns);
if (!n.isEmpty()) return n;
} catch (Exception e) {
log.warn("[NaverSearch] failed, falling back to DDG: {}", e.getMessage());
}
}
try {
return searchDdg(query, urlPatterns);
} catch (Exception e) {
log.warn("[DDG] failed: {}", e.getMessage());
return List.of();
}
}
// ─── Naver ───
List<Map<String, Object>> searchNaver(String query, String... urlPatterns) throws Exception {
String encoded = URLEncoder.encode(query, StandardCharsets.UTF_8);
String url = "https://openapi.naver.com/v1/search/webkr.json?query=" + encoded + "&display=30";
HttpRequest req = HttpRequest.newBuilder()
.uri(URI.create(url))
.header("X-Naver-Client-Id", naverClientId)
.header("X-Naver-Client-Secret", naverClientSecret)
.GET()
.build();
HttpResponse<String> resp = HTTP.send(req, HttpResponse.BodyHandlers.ofString());
if (resp.statusCode() >= 400) {
throw new RuntimeException("Naver " + resp.statusCode());
}
JsonNode root = json.readTree(resp.body());
JsonNode items = root.path("items");
List<Map<String, Object>> out = new ArrayList<>();
Set<String> seen = new HashSet<>();
for (JsonNode it : items) {
if (out.size() >= MAX_RESULTS) break;
String link = it.path("link").asText("");
String title = stripTags(it.path("title").asText(""));
if (link.isEmpty() || !matchesPattern(link, urlPatterns)) continue;
if (seen.add(link)) out.add(Map.of("title", title, "url", link));
}
log.info("[NaverSearch] '{}' → {}", query, out.size());
return out;
}
// ─── DDG ───
List<Map<String, Object>> searchDdg(String query, String... urlPatterns) throws Exception {
String encoded = URLEncoder.encode(query, StandardCharsets.UTF_8);
String url = "https://html.duckduckgo.com/html/?q=" + encoded;
HttpRequest req = HttpRequest.newBuilder()
.uri(URI.create(url))
.header("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36")
.header("Accept", "text/html,application/xhtml+xml")
.header("Accept-Language", "ko-KR,ko;q=0.9")
.GET()
.build();
HttpResponse<String> resp = HTTP.send(req, HttpResponse.BodyHandlers.ofString());
String html = resp.body();
Matcher m = DDG_RESULT.matcher(html);
List<Map<String, Object>> out = new ArrayList<>();
Set<String> seen = new HashSet<>();
while (m.find() && out.size() < MAX_RESULTS) {
String href = m.group(1);
String title = m.group(2).replaceAll("<[^>]+>", "").trim();
String actual = extractDdgUrl(href);
if (actual == null || !matchesPattern(actual, urlPatterns)) continue;
if (seen.add(actual)) out.add(Map.of("title", title, "url", actual));
}
log.info("[DDG] '{}' → {}", query, out.size());
return out;
}
private String extractDdgUrl(String ddgHref) {
try {
if (ddgHref.contains("uddg=")) {
String p = ddgHref.substring(ddgHref.indexOf("uddg=") + 5);
int amp = p.indexOf('&');
if (amp > 0) p = p.substring(0, amp);
return URLDecoder.decode(p, StandardCharsets.UTF_8);
}
if (ddgHref.startsWith("http")) return ddgHref;
} catch (Exception e) {
log.debug("[DDG] url extract failed: {}", ddgHref);
}
return null;
}
static String stripTags(String s) {
return s == null ? "" : s.replaceAll("<[^>]+>", "").trim();
}
static boolean matchesPattern(String url, String[] patterns) {
if (patterns == null || patterns.length == 0) return true;
for (String p : patterns) {
if (url.contains(p)) return true;
}
return false;
}
}

View File

@@ -56,6 +56,11 @@ app:
youtube-api-key: ${YOUTUBE_DATA_API_KEY}
client-id: ${GOOGLE_CLIENT_ID:635551099330-2l003d3ernjmkqavd4f6s78r8r405iml.apps.googleusercontent.com}
# #357 — Naver Search API (Tabling/Catchtable URL 매칭). 미설정 시 DDG 폴백.
naver:
client-id: ${NAVER_CLIENT_ID:}
client-secret: ${NAVER_CLIENT_SECRET:}
cache:
ttl-seconds: 600

View File

@@ -9,9 +9,12 @@ type: Opaque
stringData:
ORACLE_USER: "<oracle-username>"
ORACLE_PASSWORD: "<oracle-password>"
ORACLE_DSN: "<tns-alias>_high?TNS_ADMIN=/etc/oracle/wallet"
ORACLE_DSN: "<tns-alias>_medium?TNS_ADMIN=/etc/oracle/wallet"
JWT_SECRET: "<jwt-secret>"
OCI_COMPARTMENT_ID: "<oci-compartment-id>"
OCI_CHAT_MODEL_ID: "<oci-chat-model-id>"
GOOGLE_MAPS_API_KEY: "<google-maps-api-key>"
YOUTUBE_DATA_API_KEY: "<youtube-data-api-key>"
# #357 — Naver Search API (선택). 미설정 시 DDG 폴백.
NAVER_CLIENT_ID: "<naver-client-id>"
NAVER_CLIENT_SECRET: "<naver-client-secret>"