diff --git a/CHANGELOG.md b/CHANGELOG.md index f4ae011..0ab2d50 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,15 @@ ## 2026-06-15 +### ๐Ÿ”Ž #357 DDG โ†’ Naver Search ์ •์‹ API + DDG ํด๋ฐฑ (v0.1.44) +- WebSearchService ์‹ ๊ทœ (Naver webkr.json ์šฐ์„ , ํ‚ค ๋ฏธ์„ค์ •/์‹คํŒจ ์‹œ DDG ํด๋ฐฑ) +- RestaurantController.searchTabling/searchCatchtable ๋‚ด๋ถ€ ํ˜ธ์ถœ ๊ต์ฒด, DDG ์ธ๋ผ์ธ 80์ค„ ์ œ๊ฑฐ +- application.yml: app.naver.client-id/secret (NAVER_CLIENT_ID/SECRET ํ™˜๊ฒฝ๋ณ€์ˆ˜) +- k8s/secrets.yaml.template์— NAVER_CLIENT_ID/SECRET ํ•ญ๋ชฉ ์ถ”๊ฐ€ +- ๋ฏธ์‚ฌ์šฉ import ์ •๋ฆฌ (HttpClient/URI/URLEncoder/Pattern ๋“ฑ RestaurantController์—์„œ) +- ์„ค๊ณ„์„œ: docs/design/357-web-search-api/README.md +- Refs: #357 (close) + ### ๐ŸŽฏ #356 ์˜์ƒ-์‹๋‹น ๊ด€๋ จ๋„ LLM ํ‰๊ฐ€ (v0.1.43) - DB: video_restaurants ์ปฌ๋Ÿผ ์ถ”๊ฐ€ (relevance/relevance_reason/relevance_evaluated_at) + idx_vr_relevance - VideoRelevanceService ์‹ ๊ทœ (#322 RestaurantVerifyService ํŒจํ„ด ๋ชจ๋ฐฉ, @Async verifyAsync/verify/verifyAll) diff --git a/backend-java/src/main/java/com/tasteby/controller/RestaurantController.java b/backend-java/src/main/java/com/tasteby/controller/RestaurantController.java index cabc8a1..32cab49 100644 --- a/backend-java/src/main/java/com/tasteby/controller/RestaurantController.java +++ b/backend-java/src/main/java/com/tasteby/controller/RestaurantController.java @@ -7,6 +7,7 @@ import com.tasteby.security.AuthUtil; import com.tasteby.service.CacheService; import com.tasteby.service.GeocodingService; import com.tasteby.service.RestaurantService; +import com.tasteby.service.WebSearchService; import jakarta.annotation.PreDestroy; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -15,19 +16,10 @@ import org.springframework.web.bind.annotation.*; import org.springframework.web.server.ResponseStatusException; import org.springframework.web.servlet.mvc.method.annotation.SseEmitter; -import java.net.URI; -import java.net.URLDecoder; -import java.net.URLEncoder; -import java.net.http.HttpClient; -import java.net.http.HttpRequest; -import java.net.http.HttpResponse; -import java.nio.charset.StandardCharsets; import java.util.*; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.ThreadLocalRandom; -import java.util.regex.Matcher; -import java.util.regex.Pattern; @RestController @RequestMapping("/api/restaurants") @@ -39,13 +31,15 @@ public class RestaurantController { private final GeocodingService geocodingService; private final CacheService cache; private final ObjectMapper objectMapper; + private final WebSearchService webSearch; private final ExecutorService executor = Executors.newVirtualThreadPerTaskExecutor(); - public RestaurantController(RestaurantService restaurantService, GeocodingService geocodingService, CacheService cache, ObjectMapper objectMapper) { + public RestaurantController(RestaurantService restaurantService, GeocodingService geocodingService, CacheService cache, ObjectMapper objectMapper, WebSearchService webSearch) { this.restaurantService = restaurantService; this.geocodingService = geocodingService; this.cache = cache; this.objectMapper = objectMapper; + this.webSearch = webSearch; } // #290 โ€” Bean ์ข…๋ฃŒ ์‹œ virtual thread executor๋ฅผ ์ •๋ฆฌํ•˜์—ฌ ๋ฆฌ์†Œ์Šค ๋ˆ„์ˆ˜ ๋ฐฉ์ง€. @@ -430,93 +424,17 @@ public class RestaurantController { return result; } - // โ”€โ”€โ”€ DuckDuckGo HTML search helpers โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + // โ”€โ”€โ”€ ์˜ˆ์•ฝ ์‚ฌ์ดํŠธ URL ๊ฒ€์ƒ‰ (#357 WebSearchService: Naver primary + DDG fallback) โ”€โ”€โ”€ - private static final HttpClient httpClient = HttpClient.newBuilder() - .followRedirects(HttpClient.Redirect.NORMAL) - .build(); - - private static final Pattern DDG_RESULT_PATTERN = Pattern.compile( - "]+class=\"result__a\"[^>]+href=\"([^\"]+)\"[^>]*>(.*?)", - Pattern.DOTALL - ); - - /** - * DuckDuckGo HTML ๊ฒ€์ƒ‰์„ ํ†ตํ•ด ํŠน์ • ์‚ฌ์ดํŠธ์˜ URL์„ ์ฐพ๋Š”๋‹ค. - * html.duckduckgo.com์€ ์„œ๋ฒ„์‚ฌ์ด๋“œ ๋ Œ๋”๋ง์ด๋ผ ๋ด‡ ํŒ์ • ์—†์ด HTTP ์š”์ฒญ๋งŒ์œผ๋กœ ๊ฒฐ๊ณผ๋ฅผ ํŒŒ์‹ฑํ•  ์ˆ˜ ์žˆ๋‹ค. - */ - private List> searchDuckDuckGo(String query, String... urlPatterns) throws Exception { - String encoded = URLEncoder.encode(query, StandardCharsets.UTF_8); - String searchUrl = "https://html.duckduckgo.com/html/?q=" + encoded; - log.info("[DDG] Searching: {}", query); - - HttpRequest request = HttpRequest.newBuilder() - .uri(URI.create(searchUrl)) - .header("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36") - .header("Accept", "text/html,application/xhtml+xml") - .header("Accept-Language", "ko-KR,ko;q=0.9") - .GET() - .build(); - - HttpResponse response = httpClient.send(request, HttpResponse.BodyHandlers.ofString()); - String html = response.body(); - - List> results = new ArrayList<>(); - Set seen = new HashSet<>(); - Matcher matcher = DDG_RESULT_PATTERN.matcher(html); - - while (matcher.find() && results.size() < 5) { - String href = matcher.group(1); - String title = matcher.group(2).replaceAll("<[^>]+>", "").trim(); - - // DDG ๋งํฌ์—์„œ ์‹ค์ œ URL ์ถ”์ถœ (uddg ํŒŒ๋ผ๋ฏธํ„ฐ) - String actualUrl = extractDdgUrl(href); - if (actualUrl == null) continue; - - boolean matches = false; - for (String pattern : urlPatterns) { - if (actualUrl.contains(pattern)) { - matches = true; - break; - } - } - if (matches && !seen.contains(actualUrl)) { - seen.add(actualUrl); - results.add(Map.of("title", title, "url", actualUrl)); - } - } - - log.info("[DDG] Found {} results for '{}'", results.size(), query); - return results; - } - - /** DDG ๋ฆฌ๋‹ค์ด๋ ‰ํŠธ URL์—์„œ ์‹ค์ œ URL ์ถ”์ถœ */ - private String extractDdgUrl(String ddgHref) { - try { - // //duckduckgo.com/l/?uddg=ENCODED_URL&rut=... - if (ddgHref.contains("uddg=")) { - String uddgParam = ddgHref.substring(ddgHref.indexOf("uddg=") + 5); - int ampIdx = uddgParam.indexOf('&'); - if (ampIdx > 0) uddgParam = uddgParam.substring(0, ampIdx); - return URLDecoder.decode(uddgParam, StandardCharsets.UTF_8); - } - // ์ง์ ‘ URL์ธ ๊ฒฝ์šฐ - if (ddgHref.startsWith("http")) return ddgHref; - } catch (Exception e) { - log.debug("[DDG] Failed to extract URL from: {}", ddgHref); - } - return null; - } - - private List> searchTabling(String restaurantName) throws Exception { - return searchDuckDuckGo( + private List> searchTabling(String restaurantName) { + return webSearch.search( "site:tabling.co.kr " + restaurantName, "tabling.co.kr/restaurant/", "tabling.co.kr/place/" ); } - private List> searchCatchtable(String restaurantName) throws Exception { - return searchDuckDuckGo( + private List> searchCatchtable(String restaurantName) { + return webSearch.search( "site:app.catchtable.co.kr " + restaurantName, "catchtable.co.kr/dining/", "catchtable.co.kr/shop/" ); diff --git a/backend-java/src/main/java/com/tasteby/service/WebSearchService.java b/backend-java/src/main/java/com/tasteby/service/WebSearchService.java new file mode 100644 index 0000000..dfb178d --- /dev/null +++ b/backend-java/src/main/java/com/tasteby/service/WebSearchService.java @@ -0,0 +1,154 @@ +package com.tasteby.service; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Service; + +import java.net.URI; +import java.net.URLDecoder; +import java.net.URLEncoder; +import java.net.http.HttpClient; +import java.net.http.HttpRequest; +import java.net.http.HttpResponse; +import java.nio.charset.StandardCharsets; +import java.util.*; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * #357 ์›น ๊ฒ€์ƒ‰ ์ถ”์ƒํ™”. + * - Naver Search webkr.json ์šฐ์„  (ํ•œ๊ตญ ์‹๋‹น ์ •ํ™•๋„ ๋†’์Œ, ๋ฌด๋ฃŒ ์ผ 25k). + * - ํ‚ค ๋ฏธ์„ค์ • ๋˜๋Š” 5xx/timeout ์‹œ DDG HTML ํŒŒ์‹ฑ์œผ๋กœ ํด๋ฐฑ. + * - ๊ฒฐ๊ณผ๋Š” urlPatterns๋กœ ํ•„ํ„ฐ๋ง (๊ธฐ์กด searchDuckDuckGo์™€ ๋™์ผ ์ธํ„ฐํŽ˜์ด์Šค). + */ +@Service +public class WebSearchService { + + private static final Logger log = LoggerFactory.getLogger(WebSearchService.class); + private static final int MAX_RESULTS = 5; + + private static final HttpClient HTTP = HttpClient.newBuilder() + .followRedirects(HttpClient.Redirect.NORMAL) + .build(); + + private static final Pattern DDG_RESULT = Pattern.compile( + "]+class=\"result__a\"[^>]+href=\"([^\"]+)\"[^>]*>(.*?)", + Pattern.DOTALL); + + private final ObjectMapper json = new ObjectMapper(); + private final String naverClientId; + private final String naverClientSecret; + + public WebSearchService( + @Value("${app.naver.client-id:}") String naverClientId, + @Value("${app.naver.client-secret:}") String naverClientSecret) { + this.naverClientId = naverClientId == null ? "" : naverClientId.trim(); + this.naverClientSecret = naverClientSecret == null ? "" : naverClientSecret.trim(); + log.info("WebSearchService init โ€” Naver={}", naverClientId.isEmpty() ? "off" : "on"); + } + + public List> search(String query, String... urlPatterns) { + if (!naverClientId.isEmpty() && !naverClientSecret.isEmpty()) { + try { + List> n = searchNaver(query, urlPatterns); + if (!n.isEmpty()) return n; + } catch (Exception e) { + log.warn("[NaverSearch] failed, falling back to DDG: {}", e.getMessage()); + } + } + try { + return searchDdg(query, urlPatterns); + } catch (Exception e) { + log.warn("[DDG] failed: {}", e.getMessage()); + return List.of(); + } + } + + // โ”€โ”€โ”€ Naver โ”€โ”€โ”€ + + List> searchNaver(String query, String... urlPatterns) throws Exception { + String encoded = URLEncoder.encode(query, StandardCharsets.UTF_8); + String url = "https://openapi.naver.com/v1/search/webkr.json?query=" + encoded + "&display=30"; + HttpRequest req = HttpRequest.newBuilder() + .uri(URI.create(url)) + .header("X-Naver-Client-Id", naverClientId) + .header("X-Naver-Client-Secret", naverClientSecret) + .GET() + .build(); + HttpResponse resp = HTTP.send(req, HttpResponse.BodyHandlers.ofString()); + if (resp.statusCode() >= 400) { + throw new RuntimeException("Naver " + resp.statusCode()); + } + JsonNode root = json.readTree(resp.body()); + JsonNode items = root.path("items"); + List> out = new ArrayList<>(); + Set seen = new HashSet<>(); + for (JsonNode it : items) { + if (out.size() >= MAX_RESULTS) break; + String link = it.path("link").asText(""); + String title = stripTags(it.path("title").asText("")); + if (link.isEmpty() || !matchesPattern(link, urlPatterns)) continue; + if (seen.add(link)) out.add(Map.of("title", title, "url", link)); + } + log.info("[NaverSearch] '{}' โ†’ {}", query, out.size()); + return out; + } + + // โ”€โ”€โ”€ DDG โ”€โ”€โ”€ + + List> searchDdg(String query, String... urlPatterns) throws Exception { + String encoded = URLEncoder.encode(query, StandardCharsets.UTF_8); + String url = "https://html.duckduckgo.com/html/?q=" + encoded; + HttpRequest req = HttpRequest.newBuilder() + .uri(URI.create(url)) + .header("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36") + .header("Accept", "text/html,application/xhtml+xml") + .header("Accept-Language", "ko-KR,ko;q=0.9") + .GET() + .build(); + HttpResponse resp = HTTP.send(req, HttpResponse.BodyHandlers.ofString()); + String html = resp.body(); + Matcher m = DDG_RESULT.matcher(html); + List> out = new ArrayList<>(); + Set seen = new HashSet<>(); + while (m.find() && out.size() < MAX_RESULTS) { + String href = m.group(1); + String title = m.group(2).replaceAll("<[^>]+>", "").trim(); + String actual = extractDdgUrl(href); + if (actual == null || !matchesPattern(actual, urlPatterns)) continue; + if (seen.add(actual)) out.add(Map.of("title", title, "url", actual)); + } + log.info("[DDG] '{}' โ†’ {}", query, out.size()); + return out; + } + + private String extractDdgUrl(String ddgHref) { + try { + if (ddgHref.contains("uddg=")) { + String p = ddgHref.substring(ddgHref.indexOf("uddg=") + 5); + int amp = p.indexOf('&'); + if (amp > 0) p = p.substring(0, amp); + return URLDecoder.decode(p, StandardCharsets.UTF_8); + } + if (ddgHref.startsWith("http")) return ddgHref; + } catch (Exception e) { + log.debug("[DDG] url extract failed: {}", ddgHref); + } + return null; + } + + static String stripTags(String s) { + return s == null ? "" : s.replaceAll("<[^>]+>", "").trim(); + } + + static boolean matchesPattern(String url, String[] patterns) { + if (patterns == null || patterns.length == 0) return true; + for (String p : patterns) { + if (url.contains(p)) return true; + } + return false; + } +} diff --git a/backend-java/src/main/resources/application.yml b/backend-java/src/main/resources/application.yml index 922611f..32d3a2c 100644 --- a/backend-java/src/main/resources/application.yml +++ b/backend-java/src/main/resources/application.yml @@ -56,6 +56,11 @@ app: youtube-api-key: ${YOUTUBE_DATA_API_KEY} client-id: ${GOOGLE_CLIENT_ID:635551099330-2l003d3ernjmkqavd4f6s78r8r405iml.apps.googleusercontent.com} + # #357 โ€” Naver Search API (Tabling/Catchtable URL ๋งค์นญ). ๋ฏธ์„ค์ • ์‹œ DDG ํด๋ฐฑ. + naver: + client-id: ${NAVER_CLIENT_ID:} + client-secret: ${NAVER_CLIENT_SECRET:} + cache: ttl-seconds: 600 diff --git a/k8s/secrets.yaml.template b/k8s/secrets.yaml.template index 3bcdacf..b37f346 100644 --- a/k8s/secrets.yaml.template +++ b/k8s/secrets.yaml.template @@ -9,9 +9,12 @@ type: Opaque stringData: ORACLE_USER: "" ORACLE_PASSWORD: "" - ORACLE_DSN: "_high?TNS_ADMIN=/etc/oracle/wallet" + ORACLE_DSN: "_medium?TNS_ADMIN=/etc/oracle/wallet" JWT_SECRET: "" OCI_COMPARTMENT_ID: "" OCI_CHAT_MODEL_ID: "" GOOGLE_MAPS_API_KEY: "" YOUTUBE_DATA_API_KEY: "" + # #357 โ€” Naver Search API (์„ ํƒ). ๋ฏธ์„ค์ • ์‹œ DDG ํด๋ฐฑ. + NAVER_CLIENT_ID: "" + NAVER_CLIENT_SECRET: ""