Files
life-helper/app/lib/data/ai/gemma_llm_service.dart
joungmin b1bed4d5ca [03-Developer] #260 in-app tool calling (Gemma 4 multi-turn)
ADR-0005 in-process tool runtime — 6 tools (catalog 2 + tracker 2 +
habit 2), ToolDispatcher with JSON-schema validation + modal ConfirmGate
for destructive ops, multi-turn LlmChatSession abstraction wired to
flutter_gemma 0.16.5 (ToolChoice.auto), ChatSessionController with
MAX_TURNS=4 safety + 8-turn history hint, ChatScreen entry behind AI
opt-in. R3/R7/R8 enforced inside handlers. 41 new tests (envelope,
catalog/tracker/habit tools, dispatcher, controller loop) — 151 total
passing.

Refs #260
2026-06-15 10:42:43 +09:00

256 lines
7.7 KiB
Dart

import 'dart:io';
import 'package:flutter/foundation.dart';
import 'package:flutter_gemma/flutter_gemma.dart';
import '../../ai/tools/tool_definition.dart' as tools;
import 'llm_service.dart';
/// HuggingFace access token injected at build time via
/// `--dart-define=HF_TOKEN=hf_xxx`. Empty string is permitted —
/// flutter_gemma will only need it for the initial network download,
/// which our `ModelLifecycle` handles separately; activation from a
/// local file path generally does not require the token.
const String _hfToken = String.fromEnvironment('HF_TOKEN', defaultValue: '');
/// One-shot guard so [FlutterGemma.initialize] runs at most once per
/// isolate. Re-init is unsupported by the underlying plugin.
bool _initialized = false;
/// Real on-device LLM backend using flutter_gemma 0.16.5 + Gemma 4 E2B.
///
/// Wired into the existing #215 pipeline: `ModelLifecycle` downloads &
/// SHA-verifies the .litertlm file, then [load] registers that file with
/// flutter_gemma as the active model. [generateStructured] opens a
/// short-lived chat with a single [Tool] (Gemma 4 native function
/// calling) and returns the first matching [FunctionCallResponse]'s args.
///
/// Function-calling design notes (see fn-gemma_llm_service.md §B v2):
/// - Gemma 4 SDK injects the tool declaration via its chat template, so
/// we pass [Tool] to `createChat(tools: ...)` rather than appending a
/// schema instruction to the prompt (double-wrap risk).
/// - `ToolChoice.required` forces the model to emit a function call.
class GemmaLlmService implements LlmService {
final String modelPath;
GemmaLlmService({required this.modelPath});
InferenceModel? _model;
bool _loaded = false;
@override
bool get isLoaded => _loaded;
@override
Future<void> load() async {
if (_loaded) return;
if (!await File(modelPath).exists()) {
throw FileSystemException('model file missing', modelPath);
}
if (!_initialized) {
await FlutterGemma.initialize(huggingFaceToken: _hfToken);
_initialized = true;
}
await FlutterGemma.installModel(
modelType: ModelType.gemma4,
fileType: ModelFileType.litertlm,
).fromFile(modelPath).install();
final model = await FlutterGemma.getActiveModel(maxTokens: 2048);
_model = model;
_loaded = true;
}
@override
Future<void> unload() async {
final m = _model;
_model = null;
_loaded = false;
if (m != null) {
try {
await m.close();
} catch (_) {
// Best-effort cleanup — runtime may already be torn down.
}
}
}
@override
Future<Map<String, dynamic>> generateStructured(
String prompt,
Map<String, dynamic> schema,
) async {
if (!_loaded || _model == null) {
throw StateError('LlmService not loaded');
}
final fnName = schema['name'];
final fnParams = schema['parameters'];
if (fnName is! String || fnName.isEmpty) {
throw ArgumentError('schema.name missing');
}
if (fnParams is! Map) {
throw ArgumentError('schema.parameters missing');
}
final fnDesc = (schema['description'] as String?) ?? '';
final tool = Tool(
name: fnName,
description: fnDesc,
parameters: Map<String, dynamic>.from(fnParams),
);
final chat = await _model!.createChat(
modelType: ModelType.gemma4,
supportsFunctionCalls: true,
toolChoice: ToolChoice.required,
tools: [tool],
);
try {
await chat.addQueryChunk(Message.text(text: prompt, isUser: true));
final stream = chat.generateChatResponseAsync();
return await collectFunctionCall(stream, fnName);
} finally {
try {
await chat.close();
} catch (_) {
// Native session close failure is non-fatal — log + continue.
}
}
}
@override
Future<LlmChatSession> startChat({
required List<tools.ToolDefinition> tools,
}) async {
if (!_loaded || _model == null) {
throw StateError('LlmService not loaded');
}
final gemmaTools = tools
.map((t) => Tool(
name: t.name,
description: t.description,
parameters: Map<String, dynamic>.from(t.parametersSchema),
))
.toList();
final chat = await _model!.createChat(
modelType: ModelType.gemma4,
supportsFunctionCalls: true,
// ToolChoice.auto = 모델이 자율 결정 (multi-tool + reply-only 모두 지원).
toolChoice: ToolChoice.auto,
tools: gemmaTools,
);
return _GemmaChatSession(chat);
}
}
class _GemmaChatSession implements LlmChatSession {
final dynamic _chat;
bool _closed = false;
_GemmaChatSession(this._chat);
@override
Stream<LlmChatEvent> sendUser(String text) {
if (_closed) {
throw StateError('LlmChatSession is closed');
}
return _run(Message.text(text: text, isUser: true));
}
@override
Stream<LlmChatEvent> sendToolResult({
required String toolName,
required Map<String, dynamic> result,
}) {
if (_closed) {
throw StateError('LlmChatSession is closed');
}
return _run(Message.toolResponse(toolName: toolName, response: result));
}
Stream<LlmChatEvent> _run(Message msg) async* {
await _chat.addQueryChunk(msg);
final Stream<ModelResponse> stream = _chat.generateChatResponseAsync();
await for (final event in stream) {
if (event is TextResponse) {
yield LlmTextChunk(event.token);
} else if (event is FunctionCallResponse) {
yield LlmFunctionCall(
event.name,
Map<String, dynamic>.from(event.args),
);
return; // model hands control back to caller for tool exec
} else if (event is ParallelFunctionCallResponse &&
event.calls.isNotEmpty) {
// ADR-0005: parallel calls collapsed to first — sequential dispatch.
final first = event.calls.first;
yield LlmFunctionCall(
first.name,
Map<String, dynamic>.from(first.args),
);
return;
}
// ThinkingResponse / other: skip.
}
}
@override
Future<void> close() async {
if (_closed) return;
_closed = true;
try {
await _chat.close();
} catch (_) {
// Best-effort cleanup.
}
}
}
/// Extracts the first `FunctionCallResponse(name == expectedName)` from
/// a flutter_gemma response stream. `TextResponse` / `ThinkingResponse`
/// events are skipped. A mismatched name throws fast.
///
/// File-private under `_collectFunctionCall` from [GemmaLlmService];
/// exposed as a top-level via `@visibleForTesting` so unit tests can
/// feed synthetic streams (see fn-spec §D, 8 test cases).
@visibleForTesting
Future<Map<String, dynamic>> collectFunctionCall(
Stream<ModelResponse> stream,
String expectedName,
) async {
Map<String, dynamic>? result;
String? wrongName;
try {
await for (final event in stream) {
if (event is FunctionCallResponse) {
if (event.name == expectedName) {
result = Map<String, dynamic>.from(event.args);
break;
} else {
wrongName = event.name;
break;
}
}
if (event is ParallelFunctionCallResponse && event.calls.isNotEmpty) {
final first = event.calls.first;
if (first.name == expectedName) {
result = Map<String, dynamic>.from(first.args);
} else {
wrongName = first.name;
}
break;
}
// TextResponse / ThinkingResponse: skip.
}
} catch (_) {
// Discard raw error to avoid leaking prompt content in logs/crash
// reports — the caller surfaces a generic message.
throw const FormatException('stream error');
}
if (wrongName != null) {
throw FormatException('unexpected function: $wrongName');
}
if (result == null) {
throw const FormatException('no function call emitted');
}
return result;
}