feat(cache): #336 SCAN/UNLINK + disabled 자동 복구 + 에러 메트릭

- CacheService.flush: redis.keys() 블로킹 → SCAN cursor + UNLINK 논블로킹.
  UNLINK 미지원 환경은 DEL로 폴백. 500 batch 단위.
- 30초 주기 @Scheduled checkHealth: Redis ping → disabled 자동 토글.
  startup 시 disabled=true여도 Redis 재기동되면 자동 복구.
- recordError 헬퍼: AtomicLong errorCount + volatile lastError.
  로그 throttle (n==1 || n%100==0만 WARN, 나머지 DEBUG).
- CacheStats record + GET /api/admin/cache/stats (admin only).
- 설계서: docs/design/336-cache-scan-recovery/README.md (Approved).

Refs: #336
This commit is contained in:
joungmin
2026-06-15 15:07:22 +09:00
parent 1a5db34e15
commit c7bd3c4c09
3 changed files with 284 additions and 28 deletions

View File

@@ -22,4 +22,14 @@ public class AdminCacheController {
cacheService.flush();
return Map.of("ok", true);
}
/**
* #336 — 캐시 상태 가시화: disabled / errorCount / lastError.
* 외부 모니터링 도구 도입 전 운영자가 어드민에서 확인 가능.
*/
@GetMapping("/cache/stats")
public CacheService.CacheStats cacheStats() {
AuthUtil.requireAdmin();
return cacheService.getStats();
}
}

View File

@@ -5,46 +5,46 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.data.redis.connection.RedisConnectionFactory;
import org.springframework.data.redis.core.Cursor;
import org.springframework.data.redis.core.ScanOptions;
import org.springframework.data.redis.core.StringRedisTemplate;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Service;
import java.nio.charset.StandardCharsets;
import java.time.Duration;
import java.util.Set;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.atomic.AtomicLong;
@Service
public class CacheService {
private static final Logger log = LoggerFactory.getLogger(CacheService.class);
private static final String PREFIX = "tasteby:";
private static final String SCAN_PATTERN = PREFIX + "*";
private static final int SCAN_BATCH = 500;
private final StringRedisTemplate redis;
private final ObjectMapper mapper;
private final Duration ttl;
private boolean disabled = false;
// #336 — disabled/errorCount/lastError는 헬스체크와 다른 호출 스레드 사이에서 안전하게 공유.
private volatile boolean disabled = false;
private final AtomicLong errorCount = new AtomicLong(0);
private volatile String lastError = null;
public CacheService(StringRedisTemplate redis, ObjectMapper mapper,
@Value("${app.cache.ttl-seconds:600}") int ttlSeconds) {
this.redis = redis;
this.mapper = mapper;
this.ttl = Duration.ofSeconds(ttlSeconds);
// #276 — ping 연결 자원 누수 방지: try-with-resources
var factory = redis.getConnectionFactory();
if (factory == null) {
log.warn("Redis ConnectionFactory is null, caching disabled");
disabled = true;
return;
}
try (var conn = factory.getConnection()) {
conn.ping();
log.info("Redis connected");
} catch (Exception e) {
log.warn("Redis unavailable ({}), caching disabled", e.getMessage());
disabled = true;
}
this.disabled = !pingOk();
if (!disabled) log.info("Redis connected");
}
public String makeKey(String... parts) {
// #276 — null/빈 파트로 "tasteby::" 같은 잘못된 키 생성 방지
if (parts == null || parts.length == 0) {
throw new IllegalArgumentException("makeKey requires at least one part");
}
@@ -62,7 +62,7 @@ public class CacheService {
return mapper.readValue(val, type);
}
} catch (Exception e) {
log.debug("Cache get error: {}", e.getMessage());
recordError("get", e);
}
return null;
}
@@ -72,7 +72,7 @@ public class CacheService {
try {
return redis.opsForValue().get(key);
} catch (Exception e) {
log.debug("Cache get error: {}", e.getMessage());
recordError("getRaw", e);
return null;
}
}
@@ -83,30 +83,114 @@ public class CacheService {
String json = mapper.writeValueAsString(value);
redis.opsForValue().set(key, json, ttl);
} catch (JsonProcessingException e) {
log.debug("Cache set error: {}", e.getMessage());
recordError("set:serialize", e);
} catch (Exception e) {
recordError("set", e);
}
}
/**
* #336 — KEYS 블로킹 명령 대체.
* SCAN으로 cursor 순회 후 UNLINK(논블로킹 삭제)로 일괄 삭제.
*/
public void flush() {
if (disabled) return;
try {
Set<String> keys = redis.keys(PREFIX + "*");
if (keys != null && !keys.isEmpty()) {
redis.delete(keys);
Integer count = redis.execute((org.springframework.data.redis.core.RedisCallback<Integer>) conn -> {
List<byte[]> batch = new ArrayList<>(SCAN_BATCH);
int deleted = 0;
try (Cursor<byte[]> cursor = conn.keyCommands().scan(
ScanOptions.scanOptions().match(SCAN_PATTERN).count(SCAN_BATCH).build())) {
while (cursor.hasNext()) {
batch.add(cursor.next());
if (batch.size() >= SCAN_BATCH) {
deleted += unlinkBatch(conn, batch);
batch.clear();
}
}
if (!batch.isEmpty()) {
deleted += unlinkBatch(conn, batch);
}
} catch (Exception e) {
recordError("flush:scan", e);
}
log.info("Cache flushed");
return deleted;
});
log.info("Cache flushed ({} keys via SCAN+UNLINK)", count == null ? 0 : count);
}
private int unlinkBatch(org.springframework.data.redis.connection.RedisConnection conn, List<byte[]> keys) {
try {
Long n = conn.keyCommands().unlink(keys.toArray(new byte[0][]));
return n == null ? 0 : n.intValue();
} catch (Exception e) {
log.debug("Cache flush error: {}", e.getMessage());
// UNLINK 미지원 환경 대비 DEL 폴백
recordError("flush:unlink", e);
try {
Long n = conn.keyCommands().del(keys.toArray(new byte[0][]));
return n == null ? 0 : n.intValue();
} catch (Exception delErr) {
recordError("flush:del", delErr);
return 0;
}
}
}
// #290 — 단일 키 삭제 (캐시 역직렬화 실패 시 자동 evict 등에 사용)
public void del(String key) {
if (disabled) return;
try {
redis.delete(key);
} catch (Exception e) {
log.debug("Cache del error: {}", e.getMessage());
recordError("del", e);
}
}
/**
* #336 — Redis 다운 → disabled=true, 재기동되면 자동으로 disabled=false.
* 30초마다 ping 한 번(<1ms)이라 부하 미미.
*/
@Scheduled(fixedDelay = 30_000L)
public void checkHealth() {
boolean ok = pingOk();
if (ok && disabled) {
disabled = false;
log.info("Redis recovered, caching re-enabled");
} else if (!ok && !disabled) {
disabled = true;
log.warn("Redis lost, caching disabled");
}
}
private boolean pingOk() {
RedisConnectionFactory factory = redis.getConnectionFactory();
if (factory == null) return false;
try (var conn = factory.getConnection()) {
conn.ping();
return true;
} catch (Exception e) {
lastError = "ping: " + e.getMessage();
return false;
}
}
private void recordError(String op, Exception e) {
long n = errorCount.incrementAndGet();
String msg = e.getMessage();
lastError = op + ": " + (msg == null ? e.getClass().getSimpleName() : msg);
// 한 번씩만 WARN, 나머지는 DEBUG로 (운영 로그 폭주 방지 — 단순한 throttle)
if (n == 1 || n % 100 == 0) {
log.warn("Cache {} error #{}: {}", op, n, lastError);
} else {
log.debug("Cache {} error #{}: {}", op, n, lastError);
}
}
public boolean isDisabled() {
return disabled;
}
public CacheStats getStats() {
return new CacheStats(disabled, errorCount.get(), lastError);
}
public record CacheStats(boolean disabled, long errorCount, String lastError) {}
}