[03-Developer] #311 LLM warm-up + concurrent guard + quickCheck

ChatScreen 마운트 시 백그라운드 native init 으로 첫 send 시점에 native load 지연을 안 보이게 한다. 12개 AC + UX-Reviewer 의 6개 권고 모두 코드 반영. 핵심 변경: - `chat_warmup_provider.dart` — `ChatWarmupController` (Idle/Loading/Ready /Unavailable/Failed sealed state). fast path (`llm.isLoaded` → Ready), FileSystemException ↔ runtime kind 분기, _disposed race guard. - `model_lifecycle.dart` — `quickCheck()`: 2.4GB SHA-256 hashing 없이 meta_kv + 파일 존재만 보고 ready 추정 (R4 UX 권고). - `gemma_llm_service.dart` + `llm_service.dart` — `_loadingFuture` 동시 호출 가드. 두 caller 가 동시에 load() 해도 native init 은 1 회만. - `chat_screen.dart` — initState postFrameCallback 에서 warmup.start(). warmup 상태에 따라 hintText / spinner / 실패 banner 분기. AC coverage (12개): - AC1~AC8: ChatWarmupController unit (chat_warmup_test.dart 8 tests). - AC9~AC12: UX-Reviewer 의 4개 권고 (입력 enabled / send auto-activate / fast path no-flicker / 명령형 메시지 금지) — controller 레벨에서 검증. 테스트: 167 passed (1 pre-existing skip). `flutter analyze` clean. Refs #311 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-06-15 12:52:50 +09:00
parent 1fa4f24a8a
commit 5b4c05316a
8 changed files with 587 additions and 22 deletions
--- a/app/lib/data/ai/gemma_llm_service.dart
+++ b/app/lib/data/ai/gemma_llm_service.dart
@@ -37,13 +37,28 @@ class GemmaLlmService implements LlmService {

  InferenceModel? _model;
  bool _loaded = false;
+  Future<void>? _loadingFuture;

  @override
  bool get isLoaded => _loaded;

+  /// #311 AC7: concurrent-call guard. If a load is already in-flight (e.g.
+  /// `ChatScreen` warm-up + a racing `userTurn` lazy load), return the same
+  /// Future so native init runs at most once per process.
+  /// See `docs/design/311-llm-warmup/fn-concurrent_load_guard.md`.
  @override
-  Future<void> load() async {
-    if (_loaded) return;
+  Future<void> load() {
+    if (_loaded) return Future.value();
+    final existing = _loadingFuture;
+    if (existing != null) return existing;
+    final future = _doLoad();
+    _loadingFuture = future;
+    return future.whenComplete(() {
+      _loadingFuture = null;
+    });
+  }
+
+  Future<void> _doLoad() async {
    if (!await File(modelPath).exists()) {
      throw FileSystemException('model file missing', modelPath);
    }
--- a/app/lib/data/ai/llm_service.dart
+++ b/app/lib/data/ai/llm_service.dart
@@ -73,6 +73,16 @@ class MockLlmService implements LlmService {
  Map<String, dynamic>? lastSchema;
  Duration responseDelay = Duration.zero;

+  /// #311 test helpers. Simulate cold-load latency / failure so the warm-up
+  /// controller can be exercised. Mirrors the Gemma path:
+  /// - `loadDelay > 0`  → load completes after the delay
+  /// - `loadThrows`     → load throws this error
+  /// - `loadCount`      → observed by concurrent-load tests
+  Duration loadDelay = Duration.zero;
+  Object? loadThrows;
+  int loadCount = 0;
+  Future<void>? _loadingFuture;
+
  /// Queues consumed by [startChat] in order. Each entry is the event list
  /// returned for a single `send*` call.
  final List<List<LlmChatEvent>> chatScript = [];
@@ -82,8 +92,29 @@ class MockLlmService implements LlmService {
  @override
  bool get isLoaded => _loaded;

+  /// #311 AC7: same concurrent-call guard as [GemmaLlmService]. Repeated
+  /// in-flight `load()` calls share a single Future, so test assertions on
+  /// `loadCount` reflect the number of native-init attempts (1), not the
+  /// number of callers.
  @override
-  Future<void> load() async {
+  Future<void> load() {
+    if (_loaded) return Future.value();
+    final existing = _loadingFuture;
+    if (existing != null) return existing;
+    final future = _doLoad();
+    _loadingFuture = future;
+    return future.whenComplete(() {
+      _loadingFuture = null;
+    });
+  }
+
+  Future<void> _doLoad() async {
+    loadCount += 1;
+    if (loadDelay > Duration.zero) {
+      await Future<void>.delayed(loadDelay);
+    }
+    final err = loadThrows;
+    if (err != null) throw err;
    _loaded = true;
  }

--- a/app/lib/data/ai/model_lifecycle.dart
+++ b/app/lib/data/ai/model_lifecycle.dart
@@ -94,6 +94,44 @@ class ModelLifecycle {
    return p.join(dir.path, config.filename);
  }

+  /// Lightweight ready estimate for warm-up gating (#311).
+  ///
+  /// Skips the SHA-256 re-hash that [checkAvailability] performs — for a
+  /// ~2.4GB model file the hash is wall-clock-noticeable on every screen
+  /// mount. Returns `ready` iff:
+  ///   - opt_in is true
+  ///   - download_state is not in-progress
+  ///   - meta_kv has both ai_model_path and ai_model_sha256
+  ///   - the file exists on disk
+  ///
+  /// Tampering/disk-corruption detection is left to [checkAvailability]'s
+  /// cold path (SettingsScreen). The trade-off is documented in
+  /// `docs/design/311-llm-warmup/README.md` §11 R4.
+  Future<ModelAvailability> quickCheck() async {
+    try {
+      final optIn = await meta.find(AiMetaKeys.optIn);
+      if (optIn != 'true') return ModelAvailability.missing;
+
+      final state = await meta.find(AiMetaKeys.downloadState);
+      if (state == 'downloading' || state == 'paused') {
+        return ModelAvailability.downloading;
+      }
+
+      final pathStr = await meta.find(AiMetaKeys.modelPath);
+      if (pathStr == null) return ModelAvailability.missing;
+
+      final expected = await meta.find(AiMetaKeys.modelSha);
+      if (expected == null) return ModelAvailability.corrupt;
+
+      final file = File(pathStr);
+      if (!file.existsSync()) return ModelAvailability.missing;
+
+      return ModelAvailability.ready;
+    } catch (_) {
+      return ModelAvailability.corrupt;
+    }
+  }
+
  Future<ModelAvailability> checkAvailability() async {
    try {
      final optIn = await meta.find(AiMetaKeys.optIn);