diff --git a/backend-java/src/main/java/com/tasteby/controller/RestaurantController.java b/backend-java/src/main/java/com/tasteby/controller/RestaurantController.java index e480052..5eedee9 100644 --- a/backend-java/src/main/java/com/tasteby/controller/RestaurantController.java +++ b/backend-java/src/main/java/com/tasteby/controller/RestaurantController.java @@ -2,7 +2,6 @@ package com.tasteby.controller; import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.ObjectMapper; -import com.microsoft.playwright.*; import com.tasteby.domain.Restaurant; import com.tasteby.security.AuthUtil; import com.tasteby.service.CacheService; @@ -15,15 +14,19 @@ import org.springframework.web.bind.annotation.*; import org.springframework.web.server.ResponseStatusException; import org.springframework.web.servlet.mvc.method.annotation.SseEmitter; +import java.net.URI; +import java.net.URLDecoder; import java.net.URLEncoder; +import java.net.http.HttpClient; +import java.net.http.HttpRequest; +import java.net.http.HttpResponse; import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.util.*; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.ThreadLocalRandom; +import java.util.regex.Matcher; +import java.util.regex.Pattern; @RestController @RequestMapping("/api/restaurants") @@ -139,12 +142,8 @@ public class RestaurantController { var r = restaurantService.findById(id); if (r == null) throw new ResponseStatusException(HttpStatus.NOT_FOUND); - try (Playwright pw = Playwright.create()) { - try (Browser browser = launchBrowser(pw)) { - BrowserContext ctx = newContext(browser); - Page page = newPage(ctx); - return searchTabling(page, r.getName()); - } + try { + return searchTabling(r.getName()); } catch (Exception e) { log.error("[TABLING] Search failed for '{}': {}", r.getName(), e.getMessage()); throw new ResponseStatusException(HttpStatus.BAD_GATEWAY, "Search failed: " + e.getMessage()); @@ -183,51 +182,44 @@ public class RestaurantController { int linked = 0; int notFound = 0; - try (Playwright pw = Playwright.create()) { - try (Browser browser = launchBrowser(pw)) { - BrowserContext ctx = newContext(browser); - Page page = newPage(ctx); + for (int i = 0; i < total; i++) { + var r = restaurants.get(i); + emit(emitter, Map.of("type", "processing", "current", i + 1, + "total", total, "name", r.getName())); - for (int i = 0; i < total; i++) { - var r = restaurants.get(i); - emit(emitter, Map.of("type", "processing", "current", i + 1, - "total", total, "name", r.getName())); - - try { - var results = searchTabling(page, r.getName()); - if (!results.isEmpty()) { - String url = String.valueOf(results.get(0).get("url")); - String title = String.valueOf(results.get(0).get("title")); - if (isNameSimilar(r.getName(), title)) { - restaurantService.update(r.getId(), Map.of("tabling_url", url)); - linked++; - emit(emitter, Map.of("type", "done", "current", i + 1, - "name", r.getName(), "url", url, "title", title)); - } else { - restaurantService.update(r.getId(), Map.of("tabling_url", "NONE")); - notFound++; - log.info("[TABLING] Name mismatch: '{}' vs '{}', skipping", r.getName(), title); - emit(emitter, Map.of("type", "notfound", "current", i + 1, - "name", r.getName(), "reason", "이름 불일치: " + title)); - } - } else { - restaurantService.update(r.getId(), Map.of("tabling_url", "NONE")); - notFound++; - emit(emitter, Map.of("type", "notfound", "current", i + 1, - "name", r.getName())); - } - } catch (Exception e) { + try { + var results = searchTabling(r.getName()); + if (!results.isEmpty()) { + String url = String.valueOf(results.get(0).get("url")); + String title = String.valueOf(results.get(0).get("title")); + if (isNameSimilar(r.getName(), title)) { + restaurantService.update(r.getId(), Map.of("tabling_url", url)); + linked++; + emit(emitter, Map.of("type", "done", "current", i + 1, + "name", r.getName(), "url", url, "title", title)); + } else { + restaurantService.update(r.getId(), Map.of("tabling_url", "NONE")); notFound++; - emit(emitter, Map.of("type", "error", "current", i + 1, - "name", r.getName(), "message", e.getMessage())); + log.info("[TABLING] Name mismatch: '{}' vs '{}', skipping", r.getName(), title); + emit(emitter, Map.of("type", "notfound", "current", i + 1, + "name", r.getName(), "reason", "이름 불일치: " + title)); } - - // Google 봇 판정 방지 랜덤 딜레이 (5~15초) - int delay = ThreadLocalRandom.current().nextInt(5000, 15001); - log.info("[TABLING] Waiting {}ms before next search...", delay); - page.waitForTimeout(delay); + } else { + restaurantService.update(r.getId(), Map.of("tabling_url", "NONE")); + notFound++; + emit(emitter, Map.of("type", "notfound", "current", i + 1, + "name", r.getName())); } + } catch (Exception e) { + notFound++; + emit(emitter, Map.of("type", "error", "current", i + 1, + "name", r.getName(), "message", e.getMessage())); } + + // 랜덤 딜레이 (2~5초) + int delay = ThreadLocalRandom.current().nextInt(2000, 5001); + log.info("[TABLING] Waiting {}ms before next search...", delay); + Thread.sleep(delay); } cache.flush(); @@ -277,12 +269,8 @@ public class RestaurantController { AuthUtil.requireAdmin(); var r = restaurantService.findById(id); if (r == null) throw new ResponseStatusException(HttpStatus.NOT_FOUND); - try (Playwright pw = Playwright.create()) { - try (Browser browser = launchBrowser(pw)) { - BrowserContext ctx = newContext(browser); - Page page = newPage(ctx); - return searchCatchtable(page, r.getName()); - } + try { + return searchCatchtable(r.getName()); } catch (Exception e) { log.error("[CATCHTABLE] Search failed for '{}': {}", r.getName(), e.getMessage()); throw new ResponseStatusException(HttpStatus.BAD_GATEWAY, "Search failed: " + e.getMessage()); @@ -321,50 +309,43 @@ public class RestaurantController { int linked = 0; int notFound = 0; - try (Playwright pw = Playwright.create()) { - try (Browser browser = launchBrowser(pw)) { - BrowserContext ctx = newContext(browser); - Page page = newPage(ctx); + for (int i = 0; i < total; i++) { + var r = restaurants.get(i); + emit(emitter, Map.of("type", "processing", "current", i + 1, + "total", total, "name", r.getName())); - for (int i = 0; i < total; i++) { - var r = restaurants.get(i); - emit(emitter, Map.of("type", "processing", "current", i + 1, - "total", total, "name", r.getName())); - - try { - var results = searchCatchtable(page, r.getName()); - if (!results.isEmpty()) { - String url = String.valueOf(results.get(0).get("url")); - String title = String.valueOf(results.get(0).get("title")); - if (isNameSimilar(r.getName(), title)) { - restaurantService.update(r.getId(), Map.of("catchtable_url", url)); - linked++; - emit(emitter, Map.of("type", "done", "current", i + 1, - "name", r.getName(), "url", url, "title", title)); - } else { - restaurantService.update(r.getId(), Map.of("catchtable_url", "NONE")); - notFound++; - log.info("[CATCHTABLE] Name mismatch: '{}' vs '{}', skipping", r.getName(), title); - emit(emitter, Map.of("type", "notfound", "current", i + 1, - "name", r.getName(), "reason", "이름 불일치: " + title)); - } - } else { - restaurantService.update(r.getId(), Map.of("catchtable_url", "NONE")); - notFound++; - emit(emitter, Map.of("type", "notfound", "current", i + 1, - "name", r.getName())); - } - } catch (Exception e) { + try { + var results = searchCatchtable(r.getName()); + if (!results.isEmpty()) { + String url = String.valueOf(results.get(0).get("url")); + String title = String.valueOf(results.get(0).get("title")); + if (isNameSimilar(r.getName(), title)) { + restaurantService.update(r.getId(), Map.of("catchtable_url", url)); + linked++; + emit(emitter, Map.of("type", "done", "current", i + 1, + "name", r.getName(), "url", url, "title", title)); + } else { + restaurantService.update(r.getId(), Map.of("catchtable_url", "NONE")); notFound++; - emit(emitter, Map.of("type", "error", "current", i + 1, - "name", r.getName(), "message", e.getMessage())); + log.info("[CATCHTABLE] Name mismatch: '{}' vs '{}', skipping", r.getName(), title); + emit(emitter, Map.of("type", "notfound", "current", i + 1, + "name", r.getName(), "reason", "이름 불일치: " + title)); } - - int delay = ThreadLocalRandom.current().nextInt(5000, 15001); - log.info("[CATCHTABLE] Waiting {}ms before next search...", delay); - page.waitForTimeout(delay); + } else { + restaurantService.update(r.getId(), Map.of("catchtable_url", "NONE")); + notFound++; + emit(emitter, Map.of("type", "notfound", "current", i + 1, + "name", r.getName())); } + } catch (Exception e) { + notFound++; + emit(emitter, Map.of("type", "error", "current", i + 1, + "name", r.getName(), "message", e.getMessage())); } + + int delay = ThreadLocalRandom.current().nextInt(2000, 5001); + log.info("[CATCHTABLE] Waiting {}ms before next search...", delay); + Thread.sleep(delay); } cache.flush(); @@ -407,119 +388,96 @@ public class RestaurantController { return result; } - // ─── Playwright helpers ────────────────────────────────────────────── + // ─── DuckDuckGo HTML search helpers ───────────────────────────────── - private Browser launchBrowser(Playwright pw) { - return pw.chromium().launch(new BrowserType.LaunchOptions() - .setHeadless(false) - .setArgs(List.of("--disable-blink-features=AutomationControlled"))); - } + private static final HttpClient httpClient = HttpClient.newBuilder() + .followRedirects(HttpClient.Redirect.NORMAL) + .build(); - private BrowserContext newContext(Browser browser) { - return browser.newContext(new Browser.NewContextOptions() - .setLocale("ko-KR").setViewportSize(1280, 900)); - } + private static final Pattern DDG_RESULT_PATTERN = Pattern.compile( + "]+class=\"result__a\"[^>]+href=\"([^\"]+)\"[^>]*>(.*?)", + Pattern.DOTALL + ); - private Page newPage(BrowserContext ctx) { - Page page = ctx.newPage(); - page.addInitScript("Object.defineProperty(navigator, 'webdriver', {get: () => false})"); - return page; - } + /** + * DuckDuckGo HTML 검색을 통해 특정 사이트의 URL을 찾는다. + * html.duckduckgo.com은 서버사이드 렌더링이라 봇 판정 없이 HTTP 요청만으로 결과를 파싱할 수 있다. + */ + private List
)} {infoTarget.cuisine_type && ( -{infoTarget.cuisine_type}
+{infoTarget.cuisine_type}
)} {infoTarget.address && ( -{infoTarget.address}
+{infoTarget.address}
)} {infoTarget.price_range && ( -{infoTarget.price_range}
+{infoTarget.price_range}
)} {infoTarget.phone && ( -{infoTarget.phone}
+{infoTarget.phone}
)}