import 'dart:io'; import 'package:flutter/foundation.dart'; import 'package:flutter_gemma/flutter_gemma.dart'; import '../../ai/tools/tool_definition.dart' as tools; import 'llm_service.dart'; /// HuggingFace access token injected at build time via /// `--dart-define=HF_TOKEN=hf_xxx`. Empty string is permitted — /// flutter_gemma will only need it for the initial network download, /// which our `ModelLifecycle` handles separately; activation from a /// local file path generally does not require the token. const String _hfToken = String.fromEnvironment('HF_TOKEN', defaultValue: ''); /// One-shot guard so [FlutterGemma.initialize] runs at most once per /// isolate. Re-init is unsupported by the underlying plugin. bool _initialized = false; /// Real on-device LLM backend using flutter_gemma 0.16.5 + Gemma 4 E2B. /// /// Wired into the existing #215 pipeline: `ModelLifecycle` downloads & /// SHA-verifies the .litertlm file, then [load] registers that file with /// flutter_gemma as the active model. [generateStructured] opens a /// short-lived chat with a single [Tool] (Gemma 4 native function /// calling) and returns the first matching [FunctionCallResponse]'s args. /// /// Function-calling design notes (see fn-gemma_llm_service.md §B v2): /// - Gemma 4 SDK injects the tool declaration via its chat template, so /// we pass [Tool] to `createChat(tools: ...)` rather than appending a /// schema instruction to the prompt (double-wrap risk). /// - `ToolChoice.required` forces the model to emit a function call. class GemmaLlmService implements LlmService { final String modelPath; GemmaLlmService({required this.modelPath}); InferenceModel? _model; bool _loaded = false; @override bool get isLoaded => _loaded; @override Future load() async { if (_loaded) return; if (!await File(modelPath).exists()) { throw FileSystemException('model file missing', modelPath); } if (!_initialized) { await FlutterGemma.initialize(huggingFaceToken: _hfToken); _initialized = true; } await FlutterGemma.installModel( modelType: ModelType.gemma4, fileType: ModelFileType.litertlm, ).fromFile(modelPath).install(); final model = await FlutterGemma.getActiveModel(maxTokens: 2048); _model = model; _loaded = true; } @override Future unload() async { final m = _model; _model = null; _loaded = false; if (m != null) { try { await m.close(); } catch (_) { // Best-effort cleanup — runtime may already be torn down. } } } @override Future> generateStructured( String prompt, Map schema, ) async { if (!_loaded || _model == null) { throw StateError('LlmService not loaded'); } final fnName = schema['name']; final fnParams = schema['parameters']; if (fnName is! String || fnName.isEmpty) { throw ArgumentError('schema.name missing'); } if (fnParams is! Map) { throw ArgumentError('schema.parameters missing'); } final fnDesc = (schema['description'] as String?) ?? ''; final tool = Tool( name: fnName, description: fnDesc, parameters: Map.from(fnParams), ); final chat = await _model!.createChat( modelType: ModelType.gemma4, supportsFunctionCalls: true, toolChoice: ToolChoice.required, tools: [tool], ); try { await chat.addQueryChunk(Message.text(text: prompt, isUser: true)); final stream = chat.generateChatResponseAsync(); return await collectFunctionCall(stream, fnName); } finally { try { await chat.close(); } catch (_) { // Native session close failure is non-fatal — log + continue. } } } @override Future startChat({ required List tools, }) async { if (!_loaded || _model == null) { throw StateError('LlmService not loaded'); } final gemmaTools = tools .map((t) => Tool( name: t.name, description: t.description, parameters: Map.from(t.parametersSchema), )) .toList(); final chat = await _model!.createChat( modelType: ModelType.gemma4, supportsFunctionCalls: true, // ToolChoice.auto = 모델이 자율 결정 (multi-tool + reply-only 모두 지원). toolChoice: ToolChoice.auto, tools: gemmaTools, ); return _GemmaChatSession(chat); } } class _GemmaChatSession implements LlmChatSession { final dynamic _chat; bool _closed = false; _GemmaChatSession(this._chat); @override Stream sendUser(String text) { if (_closed) { throw StateError('LlmChatSession is closed'); } return _run(Message.text(text: text, isUser: true)); } @override Stream sendToolResult({ required String toolName, required Map result, }) { if (_closed) { throw StateError('LlmChatSession is closed'); } return _run(Message.toolResponse(toolName: toolName, response: result)); } Stream _run(Message msg) async* { await _chat.addQueryChunk(msg); final Stream stream = _chat.generateChatResponseAsync(); await for (final event in stream) { if (event is TextResponse) { yield LlmTextChunk(event.token); } else if (event is FunctionCallResponse) { yield LlmFunctionCall( event.name, Map.from(event.args), ); return; // model hands control back to caller for tool exec } else if (event is ParallelFunctionCallResponse && event.calls.isNotEmpty) { // ADR-0005: parallel calls collapsed to first — sequential dispatch. final first = event.calls.first; yield LlmFunctionCall( first.name, Map.from(first.args), ); return; } // ThinkingResponse / other: skip. } } @override Future close() async { if (_closed) return; _closed = true; try { await _chat.close(); } catch (_) { // Best-effort cleanup. } } } /// Extracts the first `FunctionCallResponse(name == expectedName)` from /// a flutter_gemma response stream. `TextResponse` / `ThinkingResponse` /// events are skipped. A mismatched name throws fast. /// /// File-private under `_collectFunctionCall` from [GemmaLlmService]; /// exposed as a top-level via `@visibleForTesting` so unit tests can /// feed synthetic streams (see fn-spec §D, 8 test cases). @visibleForTesting Future> collectFunctionCall( Stream stream, String expectedName, ) async { Map? result; String? wrongName; try { await for (final event in stream) { if (event is FunctionCallResponse) { if (event.name == expectedName) { result = Map.from(event.args); break; } else { wrongName = event.name; break; } } if (event is ParallelFunctionCallResponse && event.calls.isNotEmpty) { final first = event.calls.first; if (first.name == expectedName) { result = Map.from(first.args); } else { wrongName = first.name; } break; } // TextResponse / ThinkingResponse: skip. } } catch (_) { // Discard raw error to avoid leaking prompt content in logs/crash // reports — the caller surfaces a generic message. throw const FormatException('stream error'); } if (wrongName != null) { throw FormatException('unexpected function: $wrongName'); } if (result == null) { throw const FormatException('no function call emitted'); } return result; }