From a504bf8ee5d73dd551f84c8da5cdaf3a0addff5e Mon Sep 17 00:00:00 2001 From: joungmin Date: Mon, 15 Jun 2026 20:16:14 +0900 Subject: [PATCH] =?UTF-8?q?feat(backend):=20#357=20DDG=20=E2=86=92=20Naver?= =?UTF-8?q?=20Search=20=EC=A0=95=EC=8B=9D=20API=20+=20DDG=20=ED=8F=B4?= =?UTF-8?q?=EB=B0=B1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - WebSearchService 신규 (Naver webkr.json 우선, 키 미설정/실패 시 DDG) - RestaurantController.searchTabling/searchCatchtable 내부 호출 교체 - 인라인 DDG 80줄 제거, 미사용 import 정리 - app.naver.client-id/secret 추가 (env: NAVER_CLIENT_ID/SECRET) - k8s secrets template에 NAVER 키 항목 Refs: #357 (close) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.7 --- CHANGELOG.md | 9 + .../controller/RestaurantController.java | 100 +----------- .../com/tasteby/service/WebSearchService.java | 154 ++++++++++++++++++ .../src/main/resources/application.yml | 5 + k8s/secrets.yaml.template | 5 +- 5 files changed, 181 insertions(+), 92 deletions(-) create mode 100644 backend-java/src/main/java/com/tasteby/service/WebSearchService.java diff --git a/CHANGELOG.md b/CHANGELOG.md index f4ae011..0ab2d50 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,15 @@ ## 2026-06-15 +### 🔎 #357 DDG → Naver Search 정식 API + DDG 폴백 (v0.1.44) +- WebSearchService 신규 (Naver webkr.json 우선, 키 미설정/실패 시 DDG 폴백) +- RestaurantController.searchTabling/searchCatchtable 내부 호출 교체, DDG 인라인 80줄 제거 +- application.yml: app.naver.client-id/secret (NAVER_CLIENT_ID/SECRET 환경변수) +- k8s/secrets.yaml.template에 NAVER_CLIENT_ID/SECRET 항목 추가 +- 미사용 import 정리 (HttpClient/URI/URLEncoder/Pattern 등 RestaurantController에서) +- 설계서: docs/design/357-web-search-api/README.md +- Refs: #357 (close) + ### 🎯 #356 영상-식당 관련도 LLM 평가 (v0.1.43) - DB: video_restaurants 컬럼 추가 (relevance/relevance_reason/relevance_evaluated_at) + idx_vr_relevance - VideoRelevanceService 신규 (#322 RestaurantVerifyService 패턴 모방, @Async verifyAsync/verify/verifyAll) diff --git a/backend-java/src/main/java/com/tasteby/controller/RestaurantController.java b/backend-java/src/main/java/com/tasteby/controller/RestaurantController.java index cabc8a1..32cab49 100644 --- a/backend-java/src/main/java/com/tasteby/controller/RestaurantController.java +++ b/backend-java/src/main/java/com/tasteby/controller/RestaurantController.java @@ -7,6 +7,7 @@ import com.tasteby.security.AuthUtil; import com.tasteby.service.CacheService; import com.tasteby.service.GeocodingService; import com.tasteby.service.RestaurantService; +import com.tasteby.service.WebSearchService; import jakarta.annotation.PreDestroy; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -15,19 +16,10 @@ import org.springframework.web.bind.annotation.*; import org.springframework.web.server.ResponseStatusException; import org.springframework.web.servlet.mvc.method.annotation.SseEmitter; -import java.net.URI; -import java.net.URLDecoder; -import java.net.URLEncoder; -import java.net.http.HttpClient; -import java.net.http.HttpRequest; -import java.net.http.HttpResponse; -import java.nio.charset.StandardCharsets; import java.util.*; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.ThreadLocalRandom; -import java.util.regex.Matcher; -import java.util.regex.Pattern; @RestController @RequestMapping("/api/restaurants") @@ -39,13 +31,15 @@ public class RestaurantController { private final GeocodingService geocodingService; private final CacheService cache; private final ObjectMapper objectMapper; + private final WebSearchService webSearch; private final ExecutorService executor = Executors.newVirtualThreadPerTaskExecutor(); - public RestaurantController(RestaurantService restaurantService, GeocodingService geocodingService, CacheService cache, ObjectMapper objectMapper) { + public RestaurantController(RestaurantService restaurantService, GeocodingService geocodingService, CacheService cache, ObjectMapper objectMapper, WebSearchService webSearch) { this.restaurantService = restaurantService; this.geocodingService = geocodingService; this.cache = cache; this.objectMapper = objectMapper; + this.webSearch = webSearch; } // #290 — Bean 종료 시 virtual thread executor를 정리하여 리소스 누수 방지. @@ -430,93 +424,17 @@ public class RestaurantController { return result; } - // ─── DuckDuckGo HTML search helpers ───────────────────────────────── + // ─── 예약 사이트 URL 검색 (#357 WebSearchService: Naver primary + DDG fallback) ─── - private static final HttpClient httpClient = HttpClient.newBuilder() - .followRedirects(HttpClient.Redirect.NORMAL) - .build(); - - private static final Pattern DDG_RESULT_PATTERN = Pattern.compile( - "]+class=\"result__a\"[^>]+href=\"([^\"]+)\"[^>]*>(.*?)", - Pattern.DOTALL - ); - - /** - * DuckDuckGo HTML 검색을 통해 특정 사이트의 URL을 찾는다. - * html.duckduckgo.com은 서버사이드 렌더링이라 봇 판정 없이 HTTP 요청만으로 결과를 파싱할 수 있다. - */ - private List> searchDuckDuckGo(String query, String... urlPatterns) throws Exception { - String encoded = URLEncoder.encode(query, StandardCharsets.UTF_8); - String searchUrl = "https://html.duckduckgo.com/html/?q=" + encoded; - log.info("[DDG] Searching: {}", query); - - HttpRequest request = HttpRequest.newBuilder() - .uri(URI.create(searchUrl)) - .header("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36") - .header("Accept", "text/html,application/xhtml+xml") - .header("Accept-Language", "ko-KR,ko;q=0.9") - .GET() - .build(); - - HttpResponse response = httpClient.send(request, HttpResponse.BodyHandlers.ofString()); - String html = response.body(); - - List> results = new ArrayList<>(); - Set seen = new HashSet<>(); - Matcher matcher = DDG_RESULT_PATTERN.matcher(html); - - while (matcher.find() && results.size() < 5) { - String href = matcher.group(1); - String title = matcher.group(2).replaceAll("<[^>]+>", "").trim(); - - // DDG 링크에서 실제 URL 추출 (uddg 파라미터) - String actualUrl = extractDdgUrl(href); - if (actualUrl == null) continue; - - boolean matches = false; - for (String pattern : urlPatterns) { - if (actualUrl.contains(pattern)) { - matches = true; - break; - } - } - if (matches && !seen.contains(actualUrl)) { - seen.add(actualUrl); - results.add(Map.of("title", title, "url", actualUrl)); - } - } - - log.info("[DDG] Found {} results for '{}'", results.size(), query); - return results; - } - - /** DDG 리다이렉트 URL에서 실제 URL 추출 */ - private String extractDdgUrl(String ddgHref) { - try { - // //duckduckgo.com/l/?uddg=ENCODED_URL&rut=... - if (ddgHref.contains("uddg=")) { - String uddgParam = ddgHref.substring(ddgHref.indexOf("uddg=") + 5); - int ampIdx = uddgParam.indexOf('&'); - if (ampIdx > 0) uddgParam = uddgParam.substring(0, ampIdx); - return URLDecoder.decode(uddgParam, StandardCharsets.UTF_8); - } - // 직접 URL인 경우 - if (ddgHref.startsWith("http")) return ddgHref; - } catch (Exception e) { - log.debug("[DDG] Failed to extract URL from: {}", ddgHref); - } - return null; - } - - private List> searchTabling(String restaurantName) throws Exception { - return searchDuckDuckGo( + private List> searchTabling(String restaurantName) { + return webSearch.search( "site:tabling.co.kr " + restaurantName, "tabling.co.kr/restaurant/", "tabling.co.kr/place/" ); } - private List> searchCatchtable(String restaurantName) throws Exception { - return searchDuckDuckGo( + private List> searchCatchtable(String restaurantName) { + return webSearch.search( "site:app.catchtable.co.kr " + restaurantName, "catchtable.co.kr/dining/", "catchtable.co.kr/shop/" ); diff --git a/backend-java/src/main/java/com/tasteby/service/WebSearchService.java b/backend-java/src/main/java/com/tasteby/service/WebSearchService.java new file mode 100644 index 0000000..dfb178d --- /dev/null +++ b/backend-java/src/main/java/com/tasteby/service/WebSearchService.java @@ -0,0 +1,154 @@ +package com.tasteby.service; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Service; + +import java.net.URI; +import java.net.URLDecoder; +import java.net.URLEncoder; +import java.net.http.HttpClient; +import java.net.http.HttpRequest; +import java.net.http.HttpResponse; +import java.nio.charset.StandardCharsets; +import java.util.*; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * #357 웹 검색 추상화. + * - Naver Search webkr.json 우선 (한국 식당 정확도 높음, 무료 일 25k). + * - 키 미설정 또는 5xx/timeout 시 DDG HTML 파싱으로 폴백. + * - 결과는 urlPatterns로 필터링 (기존 searchDuckDuckGo와 동일 인터페이스). + */ +@Service +public class WebSearchService { + + private static final Logger log = LoggerFactory.getLogger(WebSearchService.class); + private static final int MAX_RESULTS = 5; + + private static final HttpClient HTTP = HttpClient.newBuilder() + .followRedirects(HttpClient.Redirect.NORMAL) + .build(); + + private static final Pattern DDG_RESULT = Pattern.compile( + "]+class=\"result__a\"[^>]+href=\"([^\"]+)\"[^>]*>(.*?)", + Pattern.DOTALL); + + private final ObjectMapper json = new ObjectMapper(); + private final String naverClientId; + private final String naverClientSecret; + + public WebSearchService( + @Value("${app.naver.client-id:}") String naverClientId, + @Value("${app.naver.client-secret:}") String naverClientSecret) { + this.naverClientId = naverClientId == null ? "" : naverClientId.trim(); + this.naverClientSecret = naverClientSecret == null ? "" : naverClientSecret.trim(); + log.info("WebSearchService init — Naver={}", naverClientId.isEmpty() ? "off" : "on"); + } + + public List> search(String query, String... urlPatterns) { + if (!naverClientId.isEmpty() && !naverClientSecret.isEmpty()) { + try { + List> n = searchNaver(query, urlPatterns); + if (!n.isEmpty()) return n; + } catch (Exception e) { + log.warn("[NaverSearch] failed, falling back to DDG: {}", e.getMessage()); + } + } + try { + return searchDdg(query, urlPatterns); + } catch (Exception e) { + log.warn("[DDG] failed: {}", e.getMessage()); + return List.of(); + } + } + + // ─── Naver ─── + + List> searchNaver(String query, String... urlPatterns) throws Exception { + String encoded = URLEncoder.encode(query, StandardCharsets.UTF_8); + String url = "https://openapi.naver.com/v1/search/webkr.json?query=" + encoded + "&display=30"; + HttpRequest req = HttpRequest.newBuilder() + .uri(URI.create(url)) + .header("X-Naver-Client-Id", naverClientId) + .header("X-Naver-Client-Secret", naverClientSecret) + .GET() + .build(); + HttpResponse resp = HTTP.send(req, HttpResponse.BodyHandlers.ofString()); + if (resp.statusCode() >= 400) { + throw new RuntimeException("Naver " + resp.statusCode()); + } + JsonNode root = json.readTree(resp.body()); + JsonNode items = root.path("items"); + List> out = new ArrayList<>(); + Set seen = new HashSet<>(); + for (JsonNode it : items) { + if (out.size() >= MAX_RESULTS) break; + String link = it.path("link").asText(""); + String title = stripTags(it.path("title").asText("")); + if (link.isEmpty() || !matchesPattern(link, urlPatterns)) continue; + if (seen.add(link)) out.add(Map.of("title", title, "url", link)); + } + log.info("[NaverSearch] '{}' → {}", query, out.size()); + return out; + } + + // ─── DDG ─── + + List> searchDdg(String query, String... urlPatterns) throws Exception { + String encoded = URLEncoder.encode(query, StandardCharsets.UTF_8); + String url = "https://html.duckduckgo.com/html/?q=" + encoded; + HttpRequest req = HttpRequest.newBuilder() + .uri(URI.create(url)) + .header("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36") + .header("Accept", "text/html,application/xhtml+xml") + .header("Accept-Language", "ko-KR,ko;q=0.9") + .GET() + .build(); + HttpResponse resp = HTTP.send(req, HttpResponse.BodyHandlers.ofString()); + String html = resp.body(); + Matcher m = DDG_RESULT.matcher(html); + List> out = new ArrayList<>(); + Set seen = new HashSet<>(); + while (m.find() && out.size() < MAX_RESULTS) { + String href = m.group(1); + String title = m.group(2).replaceAll("<[^>]+>", "").trim(); + String actual = extractDdgUrl(href); + if (actual == null || !matchesPattern(actual, urlPatterns)) continue; + if (seen.add(actual)) out.add(Map.of("title", title, "url", actual)); + } + log.info("[DDG] '{}' → {}", query, out.size()); + return out; + } + + private String extractDdgUrl(String ddgHref) { + try { + if (ddgHref.contains("uddg=")) { + String p = ddgHref.substring(ddgHref.indexOf("uddg=") + 5); + int amp = p.indexOf('&'); + if (amp > 0) p = p.substring(0, amp); + return URLDecoder.decode(p, StandardCharsets.UTF_8); + } + if (ddgHref.startsWith("http")) return ddgHref; + } catch (Exception e) { + log.debug("[DDG] url extract failed: {}", ddgHref); + } + return null; + } + + static String stripTags(String s) { + return s == null ? "" : s.replaceAll("<[^>]+>", "").trim(); + } + + static boolean matchesPattern(String url, String[] patterns) { + if (patterns == null || patterns.length == 0) return true; + for (String p : patterns) { + if (url.contains(p)) return true; + } + return false; + } +} diff --git a/backend-java/src/main/resources/application.yml b/backend-java/src/main/resources/application.yml index 922611f..32d3a2c 100644 --- a/backend-java/src/main/resources/application.yml +++ b/backend-java/src/main/resources/application.yml @@ -56,6 +56,11 @@ app: youtube-api-key: ${YOUTUBE_DATA_API_KEY} client-id: ${GOOGLE_CLIENT_ID:635551099330-2l003d3ernjmkqavd4f6s78r8r405iml.apps.googleusercontent.com} + # #357 — Naver Search API (Tabling/Catchtable URL 매칭). 미설정 시 DDG 폴백. + naver: + client-id: ${NAVER_CLIENT_ID:} + client-secret: ${NAVER_CLIENT_SECRET:} + cache: ttl-seconds: 600 diff --git a/k8s/secrets.yaml.template b/k8s/secrets.yaml.template index 3bcdacf..b37f346 100644 --- a/k8s/secrets.yaml.template +++ b/k8s/secrets.yaml.template @@ -9,9 +9,12 @@ type: Opaque stringData: ORACLE_USER: "" ORACLE_PASSWORD: "" - ORACLE_DSN: "_high?TNS_ADMIN=/etc/oracle/wallet" + ORACLE_DSN: "_medium?TNS_ADMIN=/etc/oracle/wallet" JWT_SECRET: "" OCI_COMPARTMENT_ID: "" OCI_CHAT_MODEL_ID: "" GOOGLE_MAPS_API_KEY: "" YOUTUBE_DATA_API_KEY: "" + # #357 — Naver Search API (선택). 미설정 시 DDG 폴백. + NAVER_CLIENT_ID: "" + NAVER_CLIENT_SECRET: ""