[03-Developer] #311 LLM warm-up + concurrent guard + quickCheck

ChatScreen 마운트 시 백그라운드 native init 으로 첫 send 시점에 native
load 지연을 안 보이게 한다. 12개 AC + UX-Reviewer 의 6개 권고 모두 코드
반영.

핵심 변경:
- `chat_warmup_provider.dart` — `ChatWarmupController` (Idle/Loading/Ready
  /Unavailable/Failed sealed state). fast path (`llm.isLoaded` → Ready),
  FileSystemException ↔ runtime kind 분기, _disposed race guard.
- `model_lifecycle.dart` — `quickCheck()`: 2.4GB SHA-256 hashing 없이
  meta_kv + 파일 존재만 보고 ready 추정 (R4 UX 권고).
- `gemma_llm_service.dart` + `llm_service.dart` — `_loadingFuture` 동시
  호출 가드. 두 caller 가 동시에 load() 해도 native init 은 1 회만.
- `chat_screen.dart` — initState postFrameCallback 에서 warmup.start().
  warmup 상태에 따라 hintText / spinner / 실패 banner 분기.

AC coverage (12개):
- AC1~AC8: ChatWarmupController unit (chat_warmup_test.dart 8 tests).
- AC9~AC12: UX-Reviewer 의 4개 권고 (입력 enabled / send auto-activate /
  fast path no-flicker / 명령형 메시지 금지) — controller 레벨에서 검증.

테스트: 167 passed (1 pre-existing skip). `flutter analyze` clean.

Refs #311

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-06-15 12:52:50 +09:00
parent 1fa4f24a8a
commit 5b4c05316a
8 changed files with 587 additions and 22 deletions

View File

@@ -37,13 +37,28 @@ class GemmaLlmService implements LlmService {
InferenceModel? _model;
bool _loaded = false;
Future<void>? _loadingFuture;
@override
bool get isLoaded => _loaded;
/// #311 AC7: concurrent-call guard. If a load is already in-flight (e.g.
/// `ChatScreen` warm-up + a racing `userTurn` lazy load), return the same
/// Future so native init runs at most once per process.
/// See `docs/design/311-llm-warmup/fn-concurrent_load_guard.md`.
@override
Future<void> load() async {
if (_loaded) return;
Future<void> load() {
if (_loaded) return Future.value();
final existing = _loadingFuture;
if (existing != null) return existing;
final future = _doLoad();
_loadingFuture = future;
return future.whenComplete(() {
_loadingFuture = null;
});
}
Future<void> _doLoad() async {
if (!await File(modelPath).exists()) {
throw FileSystemException('model file missing', modelPath);
}

View File

@@ -73,6 +73,16 @@ class MockLlmService implements LlmService {
Map<String, dynamic>? lastSchema;
Duration responseDelay = Duration.zero;
/// #311 test helpers. Simulate cold-load latency / failure so the warm-up
/// controller can be exercised. Mirrors the Gemma path:
/// - `loadDelay > 0` → load completes after the delay
/// - `loadThrows` → load throws this error
/// - `loadCount` → observed by concurrent-load tests
Duration loadDelay = Duration.zero;
Object? loadThrows;
int loadCount = 0;
Future<void>? _loadingFuture;
/// Queues consumed by [startChat] in order. Each entry is the event list
/// returned for a single `send*` call.
final List<List<LlmChatEvent>> chatScript = [];
@@ -82,8 +92,29 @@ class MockLlmService implements LlmService {
@override
bool get isLoaded => _loaded;
/// #311 AC7: same concurrent-call guard as [GemmaLlmService]. Repeated
/// in-flight `load()` calls share a single Future, so test assertions on
/// `loadCount` reflect the number of native-init attempts (1), not the
/// number of callers.
@override
Future<void> load() async {
Future<void> load() {
if (_loaded) return Future.value();
final existing = _loadingFuture;
if (existing != null) return existing;
final future = _doLoad();
_loadingFuture = future;
return future.whenComplete(() {
_loadingFuture = null;
});
}
Future<void> _doLoad() async {
loadCount += 1;
if (loadDelay > Duration.zero) {
await Future<void>.delayed(loadDelay);
}
final err = loadThrows;
if (err != null) throw err;
_loaded = true;
}

View File

@@ -94,6 +94,44 @@ class ModelLifecycle {
return p.join(dir.path, config.filename);
}
/// Lightweight ready estimate for warm-up gating (#311).
///
/// Skips the SHA-256 re-hash that [checkAvailability] performs — for a
/// ~2.4GB model file the hash is wall-clock-noticeable on every screen
/// mount. Returns `ready` iff:
/// - opt_in is true
/// - download_state is not in-progress
/// - meta_kv has both ai_model_path and ai_model_sha256
/// - the file exists on disk
///
/// Tampering/disk-corruption detection is left to [checkAvailability]'s
/// cold path (SettingsScreen). The trade-off is documented in
/// `docs/design/311-llm-warmup/README.md` §11 R4.
Future<ModelAvailability> quickCheck() async {
try {
final optIn = await meta.find(AiMetaKeys.optIn);
if (optIn != 'true') return ModelAvailability.missing;
final state = await meta.find(AiMetaKeys.downloadState);
if (state == 'downloading' || state == 'paused') {
return ModelAvailability.downloading;
}
final pathStr = await meta.find(AiMetaKeys.modelPath);
if (pathStr == null) return ModelAvailability.missing;
final expected = await meta.find(AiMetaKeys.modelSha);
if (expected == null) return ModelAvailability.corrupt;
final file = File(pathStr);
if (!file.existsSync()) return ModelAvailability.missing;
return ModelAvailability.ready;
} catch (_) {
return ModelAvailability.corrupt;
}
}
Future<ModelAvailability> checkAvailability() async {
try {
final optIn = await meta.find(AiMetaKeys.optIn);

View File

@@ -0,0 +1,135 @@
import 'dart:io';
import 'package:flutter_riverpod/flutter_riverpod.dart';
import '../data/ai/llm_service.dart';
import '../data/ai/model_lifecycle.dart';
import 'ai_providers.dart';
/// State machine for ChatScreen LLM warm-up (#311).
///
/// See `docs/design/311-llm-warmup/README.md` §6 / fn-chat_warmup_controller.md.
sealed class ChatWarmupState {
const ChatWarmupState();
}
final class ChatWarmupIdle extends ChatWarmupState {
const ChatWarmupIdle();
}
final class ChatWarmupLoading extends ChatWarmupState {
const ChatWarmupLoading();
}
final class ChatWarmupReady extends ChatWarmupState {
const ChatWarmupReady();
}
/// Warm-up was not attempted because [ModelLifecycle.quickCheck] returned
/// something other than `ready` (download incomplete, opt-out, corrupt).
/// UI behaves as if warm-up didn't exist; the first user send falls back to
/// the existing lazy `userTurn` path.
final class ChatWarmupUnavailable extends ChatWarmupState {
const ChatWarmupUnavailable();
}
/// `kind` discriminates the retry copy: `fileMissing` is a settings-level
/// recovery; `runtime` is a transient retry.
enum ChatWarmupFailureKind { fileMissing, runtime }
final class ChatWarmupFailed extends ChatWarmupState {
final String message;
final ChatWarmupFailureKind kind;
const ChatWarmupFailed(this.message, this.kind);
}
/// Drives `LlmService.load()` on ChatScreen mount so the first user send
/// doesn't pay native-init latency. AC1-AC12 (12개) 모두 본 controller 가
/// 흡수한다 (UI binding 은 chat_screen.dart 가 본 state 를 watch).
class ChatWarmupController extends StateNotifier<ChatWarmupState> {
ChatWarmupController({
required this.llm,
required this.lifecycle,
}) : super(const ChatWarmupIdle());
final LlmService llm;
final ModelLifecycle lifecycle;
bool _disposed = false;
/// Idempotent. Re-entrant guard via the Loading state — duplicate `start`
/// calls during an in-flight load do nothing (the running future will set
/// the final state). External callers use [retry] instead.
Future<void> start() async {
if (state is ChatWarmupLoading) return;
// AC11 / UX R4: fast path. Skip Loading entirely if the underlying
// service is already loaded — prevents 1-frame label flicker on
// ChatScreen re-entry.
if (llm.isLoaded) {
_safeSet(const ChatWarmupReady());
return;
}
final availability = await lifecycle.quickCheck();
if (_disposed) return;
if (availability != ModelAvailability.ready) {
_safeSet(const ChatWarmupUnavailable());
return;
}
_safeSet(const ChatWarmupLoading());
try {
await llm.load();
} catch (e) {
if (_disposed) return;
final kind = e is FileSystemException
? ChatWarmupFailureKind.fileMissing
: ChatWarmupFailureKind.runtime;
_safeSet(ChatWarmupFailed(_messageFor(kind), kind));
return;
}
if (_disposed) return;
_safeSet(const ChatWarmupReady());
}
Future<void> retry() async {
if (_disposed) return;
_safeSet(const ChatWarmupIdle());
await start();
}
/// AC6 / AC12: state는 disposed 인스턴스에는 더 이상 쓰지 않는다.
/// StateNotifier 의 setter 는 disposed 시 throw 하므로 가드 필수.
void _safeSet(ChatWarmupState s) {
if (_disposed) return;
state = s;
}
/// UX R5 / AC12: 메시지는 **상태**만 기술. "다시 시도해주세요" 같은
/// 명령형은 [다시 시도] 버튼이 담당하므로 본 문안에 넣지 않는다.
String _messageFor(ChatWarmupFailureKind kind) {
switch (kind) {
case ChatWarmupFailureKind.fileMissing:
return 'AI 모델 파일을 찾을 수 없어요.';
case ChatWarmupFailureKind.runtime:
return 'AI 를 시작하지 못했어요.';
}
}
@override
void dispose() {
_disposed = true;
super.dispose();
}
}
/// autoDispose: ChatScreen 이 pop 되면 controller 도 dispose → mount race 안전.
final chatWarmupProvider =
StateNotifierProvider.autoDispose<ChatWarmupController, ChatWarmupState>(
(ref) {
return ChatWarmupController(
llm: ref.watch(llmServiceProvider),
lifecycle: ref.watch(modelLifecycleProvider),
);
},
);

View File

@@ -3,6 +3,7 @@ import 'package:flutter_riverpod/flutter_riverpod.dart';
import '../../ai/tools/tool_envelope.dart';
import '../../state/chat_providers.dart';
import '../../state/chat_warmup_provider.dart';
/// AI chat surface (#260). Multi-turn tool calling powered by Gemma 4 +
/// in-process tool runtime. ConfirmGate modals appear on destructive
@@ -18,6 +19,18 @@ class _ChatScreenState extends ConsumerState<ChatScreen> {
final _textCtrl = TextEditingController();
final _scrollCtrl = ScrollController();
@override
void initState() {
super.initState();
// #311 AC1: ChatScreen mount → background warm-up. depsAsync.data 가
// resolve 되기 전에는 toolDepsProvider 도 미준비라 send 자체가 막혀
// 있으므로, 그 사이에 native init 만 먼저 끝낸다.
WidgetsBinding.instance.addPostFrameCallback((_) {
if (!mounted) return;
ref.read(chatWarmupProvider.notifier).start();
});
}
@override
void dispose() {
_textCtrl.dispose();
@@ -72,18 +85,33 @@ class _ChatScreenState extends ConsumerState<ChatScreen> {
Widget _buildBody(BuildContext context) {
final state = ref.watch(chatSessionControllerProvider);
final warmup = ref.watch(chatWarmupProvider);
_scrollToBottom();
// #311 AC3 / UX R3: warmup 중에는 hintText 만 교체. 입력창 자체는
// enabled (사용자가 미리 타이핑 가능 — AC9).
final isWarming = warmup is ChatWarmupLoading;
final hintText = isWarming
? 'AI 준비 중… 첫 시작은 몇 초 걸려요'
: '습관 추가, 기록, 카탈로그 질문…';
// AC10: warmup ready 이고 streaming 중이 아닐 때 send 활성. 빈 텍스트는
// _send() 가 early-return 하므로 별도 gating 불필요 (rebuild race 회피).
final canSend = !state.isStreaming && !isWarming;
final theme = Theme.of(context);
return Column(
children: [
if (warmup is ChatWarmupFailed) _WarmupErrorBanner(warmup: warmup),
if (state.error != null)
Container(
width: double.infinity,
color: Theme.of(context).colorScheme.errorContainer,
color: theme.colorScheme.errorContainer,
padding: const EdgeInsets.all(12),
child: Text(
state.error!,
style: TextStyle(
color: Theme.of(context).colorScheme.onErrorContainer,
color: theme.colorScheme.onErrorContainer,
),
),
),
@@ -117,31 +145,32 @@ class _ChatScreenState extends ConsumerState<ChatScreen> {
child: TextField(
controller: _textCtrl,
enabled: !state.isStreaming,
decoration: const InputDecoration(
hintText: '습관 추가, 기록, 카탈로그 질문…',
border: OutlineInputBorder(),
decoration: InputDecoration(
hintText: hintText,
border: const OutlineInputBorder(),
isDense: true,
),
maxLines: 4,
minLines: 1,
textInputAction: TextInputAction.send,
onSubmitted: (_) => _send(),
onSubmitted: (_) => canSend ? _send() : null,
),
),
const SizedBox(width: 8),
state.isStreaming
? const Padding(
padding: EdgeInsets.all(8),
child: SizedBox(
width: 24,
height: 24,
child: CircularProgressIndicator(strokeWidth: 2),
),
)
: IconButton.filled(
onPressed: _send,
icon: const Icon(Icons.send),
),
if (state.isStreaming || isWarming)
const Padding(
padding: EdgeInsets.all(8),
child: SizedBox(
width: 24,
height: 24,
child: CircularProgressIndicator(strokeWidth: 2),
),
)
else
IconButton.filled(
onPressed: canSend ? _send : null,
icon: const Icon(Icons.send),
),
],
),
),
@@ -150,6 +179,41 @@ class _ChatScreenState extends ConsumerState<ChatScreen> {
}
}
/// #311 AC5 / UX R5+R6: 실패 메시지는 상태만 기술, 행동은 [다시 시도] 버튼.
class _WarmupErrorBanner extends ConsumerWidget {
final ChatWarmupFailed warmup;
const _WarmupErrorBanner({required this.warmup});
@override
Widget build(BuildContext context, WidgetRef ref) {
final theme = Theme.of(context);
return Container(
width: double.infinity,
color: theme.colorScheme.errorContainer,
padding: const EdgeInsets.symmetric(horizontal: 12, vertical: 10),
child: Column(
crossAxisAlignment: CrossAxisAlignment.stretch,
children: [
Text(
warmup.message,
style: TextStyle(color: theme.colorScheme.onErrorContainer),
),
const SizedBox(height: 8),
Align(
alignment: Alignment.centerRight,
child: OutlinedButton(
onPressed: () {
ref.read(chatWarmupProvider.notifier).retry();
},
child: const Text('다시 시도'),
),
),
],
),
);
}
}
/// Human-friendly Korean labels for the 6 tools registered in
/// `ToolRegistry.defaults()`. Falls back to the raw tool name for any
/// future tool that hasn't been mapped yet — better to show the raw id