[Developer] #218 Real Gemma 4 E2B integration via flutter_gemma 0.16.5
Implements the OQ-1 follow-up to #215 v0.2.0: replace the placeholder GemmaLlmService stub with a real flutter_gemma 0.16.5 backend driving Gemma 4 E2B (litert-community/gemma-4-E2B-it-litert-lm, 2.41GB). Highlights: - GemmaLlmService.load → FlutterGemma.initialize + installModel.fromFile + getActiveModel; idempotent + FileSystemException on missing file. - generateStructured uses Gemma 4 native function calling via createChat(tools: [Tool(...)], toolChoice: required). Stream parsed by collectFunctionCall — first FCR wins, ParallelFCR first-call wins, TextResponse/ThinkingResponse skipped, errors sanitized to prevent prompt leakage. - main.dart wires _LazyLlmService adapter that resolves to GemmaLlmService when ModelLifecycle reports ready, MockLlmService otherwise. - ai_providers.dart pins real model URL + SHA-256 (181938...39a63c). - F2 hardening: ModelLifecycle.purge wraps each delete + meta remove in try/catch so a single OS-level flake cannot block opt-out. - Android: INTERNET / FOREGROUND_SERVICE / POST_NOTIFICATIONS permissions + R8 proguard-rules.pro keeping MediaPipe / LiteRT / TFLite / protobuf JNI entry points (release builds otherwise crash on first inference). Design-First: fn-gemma_llm_service.md updated to v2 — §C (_appendSchemaInstruction) deprecated after reading flutter_gemma 0.16.5 source (Gemma 4 SDK injects tool declarations via template; prompt-side append would double-wrap). Tests: - 10 new unit tests for collectFunctionCall covering all 8 fn-spec cases + 2 ParallelFunctionCallResponse paths. - All 81 existing tests still pass. - flutter analyze: 0 issues. Refs #218 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,21 +1,40 @@
|
||||
import 'dart:io';
|
||||
|
||||
import 'package:flutter/foundation.dart';
|
||||
import 'package:flutter_gemma/flutter_gemma.dart';
|
||||
|
||||
import 'llm_service.dart';
|
||||
|
||||
/// Stub for the real `flutter_gemma` integration.
|
||||
/// HuggingFace access token injected at build time via
|
||||
/// `--dart-define=HF_TOKEN=hf_xxx`. Empty string is permitted —
|
||||
/// flutter_gemma will only need it for the initial network download,
|
||||
/// which our `ModelLifecycle` handles separately; activation from a
|
||||
/// local file path generally does not require the token.
|
||||
const String _hfToken = String.fromEnvironment('HF_TOKEN', defaultValue: '');
|
||||
|
||||
/// One-shot guard so [FlutterGemma.initialize] runs at most once per
|
||||
/// isolate. Re-init is unsupported by the underlying plugin.
|
||||
bool _initialized = false;
|
||||
|
||||
/// Real on-device LLM backend using flutter_gemma 0.16.5 + Gemma 4 E2B.
|
||||
///
|
||||
/// Wired up only after OQ-1 (exact model URL + SHA + flutter_gemma API
|
||||
/// surface) is confirmed in Developer phase. Today this throws
|
||||
/// `UnimplementedError` from every method — the rest of the system
|
||||
/// (suggestFrame, ModelLifecycle, Riverpod providers) is built against the
|
||||
/// `LlmService` abstract above and runs end-to-end with `MockLlmService`.
|
||||
/// Wired into the existing #215 pipeline: `ModelLifecycle` downloads &
|
||||
/// SHA-verifies the .litertlm file, then [load] registers that file with
|
||||
/// flutter_gemma as the active model. [generateStructured] opens a
|
||||
/// short-lived chat with a single [Tool] (Gemma 4 native function
|
||||
/// calling) and returns the first matching [FunctionCallResponse]'s args.
|
||||
///
|
||||
/// When the package is added, replace the bodies with calls into
|
||||
/// FlutterGemma.init / generateWithFunctionCalling per the package docs.
|
||||
/// Existing tests + UI hooks remain unchanged.
|
||||
/// Function-calling design notes (see fn-gemma_llm_service.md §B v2):
|
||||
/// - Gemma 4 SDK injects the tool declaration via its chat template, so
|
||||
/// we pass [Tool] to `createChat(tools: ...)` rather than appending a
|
||||
/// schema instruction to the prompt (double-wrap risk).
|
||||
/// - `ToolChoice.required` forces the model to emit a function call.
|
||||
class GemmaLlmService implements LlmService {
|
||||
final String modelPath;
|
||||
|
||||
GemmaLlmService({required this.modelPath});
|
||||
|
||||
InferenceModel? _model;
|
||||
bool _loaded = false;
|
||||
|
||||
@override
|
||||
@@ -23,14 +42,35 @@ class GemmaLlmService implements LlmService {
|
||||
|
||||
@override
|
||||
Future<void> load() async {
|
||||
throw UnimplementedError(
|
||||
'GemmaLlmService.load: pending OQ-1 (model URL + flutter_gemma).',
|
||||
);
|
||||
if (_loaded) return;
|
||||
if (!await File(modelPath).exists()) {
|
||||
throw FileSystemException('model file missing', modelPath);
|
||||
}
|
||||
if (!_initialized) {
|
||||
await FlutterGemma.initialize(huggingFaceToken: _hfToken);
|
||||
_initialized = true;
|
||||
}
|
||||
await FlutterGemma.installModel(
|
||||
modelType: ModelType.gemma4,
|
||||
fileType: ModelFileType.litertlm,
|
||||
).fromFile(modelPath).install();
|
||||
final model = await FlutterGemma.getActiveModel(maxTokens: 2048);
|
||||
_model = model;
|
||||
_loaded = true;
|
||||
}
|
||||
|
||||
@override
|
||||
Future<void> unload() async {
|
||||
final m = _model;
|
||||
_model = null;
|
||||
_loaded = false;
|
||||
if (m != null) {
|
||||
try {
|
||||
await m.close();
|
||||
} catch (_) {
|
||||
// Best-effort cleanup — runtime may already be torn down.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@override
|
||||
@@ -38,8 +78,90 @@ class GemmaLlmService implements LlmService {
|
||||
String prompt,
|
||||
Map<String, dynamic> schema,
|
||||
) async {
|
||||
throw UnimplementedError(
|
||||
'GemmaLlmService.generateStructured: pending OQ-1.',
|
||||
if (!_loaded || _model == null) {
|
||||
throw StateError('LlmService not loaded');
|
||||
}
|
||||
final fnName = schema['name'];
|
||||
final fnParams = schema['parameters'];
|
||||
if (fnName is! String || fnName.isEmpty) {
|
||||
throw ArgumentError('schema.name missing');
|
||||
}
|
||||
if (fnParams is! Map) {
|
||||
throw ArgumentError('schema.parameters missing');
|
||||
}
|
||||
final fnDesc = (schema['description'] as String?) ?? '';
|
||||
final tool = Tool(
|
||||
name: fnName,
|
||||
description: fnDesc,
|
||||
parameters: Map<String, dynamic>.from(fnParams),
|
||||
);
|
||||
|
||||
final chat = await _model!.createChat(
|
||||
modelType: ModelType.gemma4,
|
||||
supportsFunctionCalls: true,
|
||||
toolChoice: ToolChoice.required,
|
||||
tools: [tool],
|
||||
);
|
||||
try {
|
||||
await chat.addQueryChunk(Message.text(text: prompt, isUser: true));
|
||||
final stream = chat.generateChatResponseAsync();
|
||||
return await collectFunctionCall(stream, fnName);
|
||||
} finally {
|
||||
try {
|
||||
await chat.close();
|
||||
} catch (_) {
|
||||
// Native session close failure is non-fatal — log + continue.
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Extracts the first `FunctionCallResponse(name == expectedName)` from
|
||||
/// a flutter_gemma response stream. `TextResponse` / `ThinkingResponse`
|
||||
/// events are skipped. A mismatched name throws fast.
|
||||
///
|
||||
/// File-private under `_collectFunctionCall` from [GemmaLlmService];
|
||||
/// exposed as a top-level via `@visibleForTesting` so unit tests can
|
||||
/// feed synthetic streams (see fn-spec §D, 8 test cases).
|
||||
@visibleForTesting
|
||||
Future<Map<String, dynamic>> collectFunctionCall(
|
||||
Stream<ModelResponse> stream,
|
||||
String expectedName,
|
||||
) async {
|
||||
Map<String, dynamic>? result;
|
||||
String? wrongName;
|
||||
try {
|
||||
await for (final event in stream) {
|
||||
if (event is FunctionCallResponse) {
|
||||
if (event.name == expectedName) {
|
||||
result = Map<String, dynamic>.from(event.args);
|
||||
break;
|
||||
} else {
|
||||
wrongName = event.name;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (event is ParallelFunctionCallResponse && event.calls.isNotEmpty) {
|
||||
final first = event.calls.first;
|
||||
if (first.name == expectedName) {
|
||||
result = Map<String, dynamic>.from(first.args);
|
||||
} else {
|
||||
wrongName = first.name;
|
||||
}
|
||||
break;
|
||||
}
|
||||
// TextResponse / ThinkingResponse: skip.
|
||||
}
|
||||
} catch (_) {
|
||||
// Discard raw error to avoid leaking prompt content in logs/crash
|
||||
// reports — the caller surfaces a generic message.
|
||||
throw const FormatException('stream error');
|
||||
}
|
||||
if (wrongName != null) {
|
||||
throw FormatException('unexpected function: $wrongName');
|
||||
}
|
||||
if (result == null) {
|
||||
throw const FormatException('no function call emitted');
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -233,21 +233,37 @@ class ModelLifecycle {
|
||||
/// opt-out: delete model file + clear all ai_* meta keys (except opt_in
|
||||
/// which the caller toggles). Returns freed bytes (0 if nothing existed).
|
||||
/// Idempotent.
|
||||
///
|
||||
/// F2 hardening (#218): per-file try/catch so a single OS-level delete
|
||||
/// failure (locked file, permission flake) does not abort the whole
|
||||
/// purge — meta keys still get cleared and the orphan file becomes a
|
||||
/// background storage concern rather than a stuck "opt-out failed"
|
||||
/// state. The freed-bytes count only reflects successful deletes.
|
||||
Future<int> purge() async {
|
||||
int freed = 0;
|
||||
final pathStr = await meta.find(AiMetaKeys.modelPath);
|
||||
if (pathStr != null) {
|
||||
final f = File(pathStr);
|
||||
if (f.existsSync()) {
|
||||
freed += await f.length();
|
||||
await f.delete();
|
||||
try {
|
||||
final f = File(pathStr);
|
||||
if (f.existsSync()) {
|
||||
final size = await f.length();
|
||||
await f.delete();
|
||||
freed += size;
|
||||
}
|
||||
} catch (_) {
|
||||
// Best-effort; leave orphan file, continue purging meta.
|
||||
}
|
||||
}
|
||||
final tempPath = '${await _modelPath()}.tmp';
|
||||
final temp = File(tempPath);
|
||||
if (temp.existsSync()) {
|
||||
freed += await temp.length();
|
||||
await temp.delete();
|
||||
try {
|
||||
final tempPath = '${await _modelPath()}.tmp';
|
||||
final temp = File(tempPath);
|
||||
if (temp.existsSync()) {
|
||||
final size = await temp.length();
|
||||
await temp.delete();
|
||||
freed += size;
|
||||
}
|
||||
} catch (_) {
|
||||
// Same as above — best-effort cleanup of the .tmp partial.
|
||||
}
|
||||
for (final k in [
|
||||
AiMetaKeys.modelPath,
|
||||
@@ -255,7 +271,12 @@ class ModelLifecycle {
|
||||
AiMetaKeys.downloadState,
|
||||
AiMetaKeys.downloadBytes,
|
||||
]) {
|
||||
await meta.remove(k);
|
||||
try {
|
||||
await meta.remove(k);
|
||||
} catch (_) {
|
||||
// Meta is a single sqlite table; failures here are rare.
|
||||
// Swallow so the loop completes even if one key errors.
|
||||
}
|
||||
}
|
||||
return freed;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user