[03-Developer] #311 LLM warm-up + concurrent guard + quickCheck
ChatScreen 마운트 시 백그라운드 native init 으로 첫 send 시점에 native load 지연을 안 보이게 한다. 12개 AC + UX-Reviewer 의 6개 권고 모두 코드 반영. 핵심 변경: - `chat_warmup_provider.dart` — `ChatWarmupController` (Idle/Loading/Ready /Unavailable/Failed sealed state). fast path (`llm.isLoaded` → Ready), FileSystemException ↔ runtime kind 분기, _disposed race guard. - `model_lifecycle.dart` — `quickCheck()`: 2.4GB SHA-256 hashing 없이 meta_kv + 파일 존재만 보고 ready 추정 (R4 UX 권고). - `gemma_llm_service.dart` + `llm_service.dart` — `_loadingFuture` 동시 호출 가드. 두 caller 가 동시에 load() 해도 native init 은 1 회만. - `chat_screen.dart` — initState postFrameCallback 에서 warmup.start(). warmup 상태에 따라 hintText / spinner / 실패 banner 분기. AC coverage (12개): - AC1~AC8: ChatWarmupController unit (chat_warmup_test.dart 8 tests). - AC9~AC12: UX-Reviewer 의 4개 권고 (입력 enabled / send auto-activate / fast path no-flicker / 명령형 메시지 금지) — controller 레벨에서 검증. 테스트: 167 passed (1 pre-existing skip). `flutter analyze` clean. Refs #311 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -37,13 +37,28 @@ class GemmaLlmService implements LlmService {
|
||||
|
||||
InferenceModel? _model;
|
||||
bool _loaded = false;
|
||||
Future<void>? _loadingFuture;
|
||||
|
||||
@override
|
||||
bool get isLoaded => _loaded;
|
||||
|
||||
/// #311 AC7: concurrent-call guard. If a load is already in-flight (e.g.
|
||||
/// `ChatScreen` warm-up + a racing `userTurn` lazy load), return the same
|
||||
/// Future so native init runs at most once per process.
|
||||
/// See `docs/design/311-llm-warmup/fn-concurrent_load_guard.md`.
|
||||
@override
|
||||
Future<void> load() async {
|
||||
if (_loaded) return;
|
||||
Future<void> load() {
|
||||
if (_loaded) return Future.value();
|
||||
final existing = _loadingFuture;
|
||||
if (existing != null) return existing;
|
||||
final future = _doLoad();
|
||||
_loadingFuture = future;
|
||||
return future.whenComplete(() {
|
||||
_loadingFuture = null;
|
||||
});
|
||||
}
|
||||
|
||||
Future<void> _doLoad() async {
|
||||
if (!await File(modelPath).exists()) {
|
||||
throw FileSystemException('model file missing', modelPath);
|
||||
}
|
||||
|
||||
@@ -73,6 +73,16 @@ class MockLlmService implements LlmService {
|
||||
Map<String, dynamic>? lastSchema;
|
||||
Duration responseDelay = Duration.zero;
|
||||
|
||||
/// #311 test helpers. Simulate cold-load latency / failure so the warm-up
|
||||
/// controller can be exercised. Mirrors the Gemma path:
|
||||
/// - `loadDelay > 0` → load completes after the delay
|
||||
/// - `loadThrows` → load throws this error
|
||||
/// - `loadCount` → observed by concurrent-load tests
|
||||
Duration loadDelay = Duration.zero;
|
||||
Object? loadThrows;
|
||||
int loadCount = 0;
|
||||
Future<void>? _loadingFuture;
|
||||
|
||||
/// Queues consumed by [startChat] in order. Each entry is the event list
|
||||
/// returned for a single `send*` call.
|
||||
final List<List<LlmChatEvent>> chatScript = [];
|
||||
@@ -82,8 +92,29 @@ class MockLlmService implements LlmService {
|
||||
@override
|
||||
bool get isLoaded => _loaded;
|
||||
|
||||
/// #311 AC7: same concurrent-call guard as [GemmaLlmService]. Repeated
|
||||
/// in-flight `load()` calls share a single Future, so test assertions on
|
||||
/// `loadCount` reflect the number of native-init attempts (1), not the
|
||||
/// number of callers.
|
||||
@override
|
||||
Future<void> load() async {
|
||||
Future<void> load() {
|
||||
if (_loaded) return Future.value();
|
||||
final existing = _loadingFuture;
|
||||
if (existing != null) return existing;
|
||||
final future = _doLoad();
|
||||
_loadingFuture = future;
|
||||
return future.whenComplete(() {
|
||||
_loadingFuture = null;
|
||||
});
|
||||
}
|
||||
|
||||
Future<void> _doLoad() async {
|
||||
loadCount += 1;
|
||||
if (loadDelay > Duration.zero) {
|
||||
await Future<void>.delayed(loadDelay);
|
||||
}
|
||||
final err = loadThrows;
|
||||
if (err != null) throw err;
|
||||
_loaded = true;
|
||||
}
|
||||
|
||||
|
||||
@@ -94,6 +94,44 @@ class ModelLifecycle {
|
||||
return p.join(dir.path, config.filename);
|
||||
}
|
||||
|
||||
/// Lightweight ready estimate for warm-up gating (#311).
|
||||
///
|
||||
/// Skips the SHA-256 re-hash that [checkAvailability] performs — for a
|
||||
/// ~2.4GB model file the hash is wall-clock-noticeable on every screen
|
||||
/// mount. Returns `ready` iff:
|
||||
/// - opt_in is true
|
||||
/// - download_state is not in-progress
|
||||
/// - meta_kv has both ai_model_path and ai_model_sha256
|
||||
/// - the file exists on disk
|
||||
///
|
||||
/// Tampering/disk-corruption detection is left to [checkAvailability]'s
|
||||
/// cold path (SettingsScreen). The trade-off is documented in
|
||||
/// `docs/design/311-llm-warmup/README.md` §11 R4.
|
||||
Future<ModelAvailability> quickCheck() async {
|
||||
try {
|
||||
final optIn = await meta.find(AiMetaKeys.optIn);
|
||||
if (optIn != 'true') return ModelAvailability.missing;
|
||||
|
||||
final state = await meta.find(AiMetaKeys.downloadState);
|
||||
if (state == 'downloading' || state == 'paused') {
|
||||
return ModelAvailability.downloading;
|
||||
}
|
||||
|
||||
final pathStr = await meta.find(AiMetaKeys.modelPath);
|
||||
if (pathStr == null) return ModelAvailability.missing;
|
||||
|
||||
final expected = await meta.find(AiMetaKeys.modelSha);
|
||||
if (expected == null) return ModelAvailability.corrupt;
|
||||
|
||||
final file = File(pathStr);
|
||||
if (!file.existsSync()) return ModelAvailability.missing;
|
||||
|
||||
return ModelAvailability.ready;
|
||||
} catch (_) {
|
||||
return ModelAvailability.corrupt;
|
||||
}
|
||||
}
|
||||
|
||||
Future<ModelAvailability> checkAvailability() async {
|
||||
try {
|
||||
final optIn = await meta.find(AiMetaKeys.optIn);
|
||||
|
||||
Reference in New Issue
Block a user