From 9a9eb2abd5da61286083f9a5ff9afee433835535 Mon Sep 17 00:00:00 2001 From: joungmin Date: Fri, 12 Jun 2026 15:18:08 +0900 Subject: [PATCH] [Developer] #218 Real Gemma 4 E2B integration via flutter_gemma 0.16.5 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements the OQ-1 follow-up to #215 v0.2.0: replace the placeholder GemmaLlmService stub with a real flutter_gemma 0.16.5 backend driving Gemma 4 E2B (litert-community/gemma-4-E2B-it-litert-lm, 2.41GB). Highlights: - GemmaLlmService.load → FlutterGemma.initialize + installModel.fromFile + getActiveModel; idempotent + FileSystemException on missing file. - generateStructured uses Gemma 4 native function calling via createChat(tools: [Tool(...)], toolChoice: required). Stream parsed by collectFunctionCall — first FCR wins, ParallelFCR first-call wins, TextResponse/ThinkingResponse skipped, errors sanitized to prevent prompt leakage. - main.dart wires _LazyLlmService adapter that resolves to GemmaLlmService when ModelLifecycle reports ready, MockLlmService otherwise. - ai_providers.dart pins real model URL + SHA-256 (181938...39a63c). - F2 hardening: ModelLifecycle.purge wraps each delete + meta remove in try/catch so a single OS-level flake cannot block opt-out. - Android: INTERNET / FOREGROUND_SERVICE / POST_NOTIFICATIONS permissions + R8 proguard-rules.pro keeping MediaPipe / LiteRT / TFLite / protobuf JNI entry points (release builds otherwise crash on first inference). Design-First: fn-gemma_llm_service.md updated to v2 — §C (_appendSchemaInstruction) deprecated after reading flutter_gemma 0.16.5 source (Gemma 4 SDK injects tool declarations via template; prompt-side append would double-wrap). Tests: - 10 new unit tests for collectFunctionCall covering all 8 fn-spec cases + 2 ParallelFunctionCallResponse paths. - All 81 existing tests still pass. - flutter analyze: 0 issues. Refs #218 Co-Authored-By: Claude Opus 4.6 --- app/android/app/build.gradle.kts | 7 + app/android/app/proguard-rules.pro | 30 +++ app/android/app/src/main/AndroidManifest.xml | 7 + app/lib/data/ai/gemma_llm_service.dart | 150 ++++++++++- app/lib/data/ai/model_lifecycle.dart | 41 ++- app/lib/main.dart | 58 +++- app/lib/state/ai_providers.dart | 21 +- app/lib/ui/screens/settings_screen.dart | 7 +- app/pubspec.lock | 253 +++++++++++------- app/pubspec.yaml | 8 +- app/test/data/ai/gemma_llm_service_test.dart | 150 +++++++++++ .../218-gemma-real-integration/README.md | 29 +- .../fn-gemma_llm_service.md | 52 ++-- docs/guides/ai-help-onboarding.md | 8 +- 14 files changed, 646 insertions(+), 175 deletions(-) create mode 100644 app/android/app/proguard-rules.pro create mode 100644 app/test/data/ai/gemma_llm_service_test.dart diff --git a/app/android/app/build.gradle.kts b/app/android/app/build.gradle.kts index 6d15be1..01a90ea 100644 --- a/app/android/app/build.gradle.kts +++ b/app/android/app/build.gradle.kts @@ -30,6 +30,13 @@ android { // TODO: Add your own signing config for the release build. // Signing with the debug keys for now, so `flutter run --release` works. signingConfig = signingConfigs.getByName("debug") + // #218: keep flutter_gemma JNI bindings — see proguard-rules.pro. + isMinifyEnabled = true + isShrinkResources = true + proguardFiles( + getDefaultProguardFile("proguard-android-optimize.txt"), + "proguard-rules.pro" + ) } } } diff --git a/app/android/app/proguard-rules.pro b/app/android/app/proguard-rules.pro new file mode 100644 index 0000000..51b2555 --- /dev/null +++ b/app/android/app/proguard-rules.pro @@ -0,0 +1,30 @@ +# flutter_gemma 0.16.5 — keep MediaPipe + LiteRT native bindings (#218) +# Without these the release build (R8 minify) strips JNI entry points +# and the first inference call crashes with NoSuchMethodError. + +# MediaPipe LLM (.task path) +-keep class com.google.mediapipe.** { *; } +-dontwarn com.google.mediapipe.** + +# LiteRT runtime (.litertlm path used by Gemma 4 E2B) +-keep class com.google.ai.edge.** { *; } +-keep class com.google.ai.litert.** { *; } +-dontwarn com.google.ai.edge.** +-dontwarn com.google.ai.litert.** + +# TensorFlow Lite (used by LiteRT under the hood) +-keep class org.tensorflow.lite.** { *; } +-dontwarn org.tensorflow.lite.** + +# Protobuf-lite (LiteRT message classes referenced via reflection) +-keep class com.google.protobuf.** { *; } +-dontwarn com.google.protobuf.** + +# flutter_gemma plugin's own native bridge +-keep class dev.flutterberlin.flutter_gemma.** { *; } +-dontwarn dev.flutterberlin.flutter_gemma.** + +# Generic JNI methods — covers any LiteRT/MediaPipe class loaded dynamically +-keepclasseswithmembernames class * { + native ; +} diff --git a/app/android/app/src/main/AndroidManifest.xml b/app/android/app/src/main/AndroidManifest.xml index 157d3b8..91017a3 100644 --- a/app/android/app/src/main/AndroidManifest.xml +++ b/app/android/app/src/main/AndroidManifest.xml @@ -1,4 +1,11 @@ + + + + + + + load() async { - throw UnimplementedError( - 'GemmaLlmService.load: pending OQ-1 (model URL + flutter_gemma).', - ); + if (_loaded) return; + if (!await File(modelPath).exists()) { + throw FileSystemException('model file missing', modelPath); + } + if (!_initialized) { + await FlutterGemma.initialize(huggingFaceToken: _hfToken); + _initialized = true; + } + await FlutterGemma.installModel( + modelType: ModelType.gemma4, + fileType: ModelFileType.litertlm, + ).fromFile(modelPath).install(); + final model = await FlutterGemma.getActiveModel(maxTokens: 2048); + _model = model; + _loaded = true; } @override Future unload() async { + final m = _model; + _model = null; _loaded = false; + if (m != null) { + try { + await m.close(); + } catch (_) { + // Best-effort cleanup — runtime may already be torn down. + } + } } @override @@ -38,8 +78,90 @@ class GemmaLlmService implements LlmService { String prompt, Map schema, ) async { - throw UnimplementedError( - 'GemmaLlmService.generateStructured: pending OQ-1.', + if (!_loaded || _model == null) { + throw StateError('LlmService not loaded'); + } + final fnName = schema['name']; + final fnParams = schema['parameters']; + if (fnName is! String || fnName.isEmpty) { + throw ArgumentError('schema.name missing'); + } + if (fnParams is! Map) { + throw ArgumentError('schema.parameters missing'); + } + final fnDesc = (schema['description'] as String?) ?? ''; + final tool = Tool( + name: fnName, + description: fnDesc, + parameters: Map.from(fnParams), ); + + final chat = await _model!.createChat( + modelType: ModelType.gemma4, + supportsFunctionCalls: true, + toolChoice: ToolChoice.required, + tools: [tool], + ); + try { + await chat.addQueryChunk(Message.text(text: prompt, isUser: true)); + final stream = chat.generateChatResponseAsync(); + return await collectFunctionCall(stream, fnName); + } finally { + try { + await chat.close(); + } catch (_) { + // Native session close failure is non-fatal — log + continue. + } + } } } + +/// Extracts the first `FunctionCallResponse(name == expectedName)` from +/// a flutter_gemma response stream. `TextResponse` / `ThinkingResponse` +/// events are skipped. A mismatched name throws fast. +/// +/// File-private under `_collectFunctionCall` from [GemmaLlmService]; +/// exposed as a top-level via `@visibleForTesting` so unit tests can +/// feed synthetic streams (see fn-spec §D, 8 test cases). +@visibleForTesting +Future> collectFunctionCall( + Stream stream, + String expectedName, +) async { + Map? result; + String? wrongName; + try { + await for (final event in stream) { + if (event is FunctionCallResponse) { + if (event.name == expectedName) { + result = Map.from(event.args); + break; + } else { + wrongName = event.name; + break; + } + } + if (event is ParallelFunctionCallResponse && event.calls.isNotEmpty) { + final first = event.calls.first; + if (first.name == expectedName) { + result = Map.from(first.args); + } else { + wrongName = first.name; + } + break; + } + // TextResponse / ThinkingResponse: skip. + } + } catch (_) { + // Discard raw error to avoid leaking prompt content in logs/crash + // reports — the caller surfaces a generic message. + throw const FormatException('stream error'); + } + if (wrongName != null) { + throw FormatException('unexpected function: $wrongName'); + } + if (result == null) { + throw const FormatException('no function call emitted'); + } + return result; +} diff --git a/app/lib/data/ai/model_lifecycle.dart b/app/lib/data/ai/model_lifecycle.dart index b85ddcb..123a4ba 100644 --- a/app/lib/data/ai/model_lifecycle.dart +++ b/app/lib/data/ai/model_lifecycle.dart @@ -233,21 +233,37 @@ class ModelLifecycle { /// opt-out: delete model file + clear all ai_* meta keys (except opt_in /// which the caller toggles). Returns freed bytes (0 if nothing existed). /// Idempotent. + /// + /// F2 hardening (#218): per-file try/catch so a single OS-level delete + /// failure (locked file, permission flake) does not abort the whole + /// purge — meta keys still get cleared and the orphan file becomes a + /// background storage concern rather than a stuck "opt-out failed" + /// state. The freed-bytes count only reflects successful deletes. Future purge() async { int freed = 0; final pathStr = await meta.find(AiMetaKeys.modelPath); if (pathStr != null) { - final f = File(pathStr); - if (f.existsSync()) { - freed += await f.length(); - await f.delete(); + try { + final f = File(pathStr); + if (f.existsSync()) { + final size = await f.length(); + await f.delete(); + freed += size; + } + } catch (_) { + // Best-effort; leave orphan file, continue purging meta. } } - final tempPath = '${await _modelPath()}.tmp'; - final temp = File(tempPath); - if (temp.existsSync()) { - freed += await temp.length(); - await temp.delete(); + try { + final tempPath = '${await _modelPath()}.tmp'; + final temp = File(tempPath); + if (temp.existsSync()) { + final size = await temp.length(); + await temp.delete(); + freed += size; + } + } catch (_) { + // Same as above — best-effort cleanup of the .tmp partial. } for (final k in [ AiMetaKeys.modelPath, @@ -255,7 +271,12 @@ class ModelLifecycle { AiMetaKeys.downloadState, AiMetaKeys.downloadBytes, ]) { - await meta.remove(k); + try { + await meta.remove(k); + } catch (_) { + // Meta is a single sqlite table; failures here are rare. + // Swallow so the loop completes even if one key errors. + } } return freed; } diff --git a/app/lib/main.dart b/app/lib/main.dart index 29be514..ac2c052 100644 --- a/app/lib/main.dart +++ b/app/lib/main.dart @@ -1,7 +1,10 @@ import 'package:flutter/material.dart'; import 'package:flutter_riverpod/flutter_riverpod.dart'; +import 'data/ai/gemma_llm_service.dart'; import 'data/ai/llm_service.dart'; +import 'data/ai/model_lifecycle.dart'; +import 'data/db/daos/meta_dao.dart'; import 'state/ai_providers.dart'; import 'state/providers.dart'; import 'ui/screens/habit_list_screen.dart'; @@ -12,15 +15,62 @@ Future main() async { runApp(ProviderScope( overrides: [ appDatabaseProvider.overrideWithValue(db), - // OQ-1 pending: production-ready GemmaLlmService is wired here once - // model URL + SHA are pinned and flutter_gemma is added. Until then, - // MockLlmService keeps the app graceful (suggestFrame returns []). - llmServiceProvider.overrideWithValue(MockLlmService()), + // #218: real GemmaLlmService when model file is on disk + verified, + // MockLlmService otherwise. The provider is read lazily by the frame + // suggestion flow, so the resolution is dynamic per call. + llmServiceProvider.overrideWith((ref) { + return _LazyLlmService( + lifecycle: ref.watch(modelLifecycleProvider), + meta: ref.watch(metaDaoProvider), + ); + }), ], child: const LifeHelperApp(), )); } +/// Adapter that lazily resolves between [GemmaLlmService] (when the +/// model file exists + meta is intact) and [MockLlmService] (fallback, +/// graceful empty candidates). Keeps the rest of the app unaware of +/// the difference — `suggestFrame` only sees [LlmService]. +class _LazyLlmService implements LlmService { + _LazyLlmService({required this.lifecycle, required this.meta}); + final ModelLifecycle lifecycle; + final MetaDao meta; + LlmService? _delegate; + + Future _resolve() async { + if (_delegate != null) return _delegate!; + final avail = await lifecycle.checkAvailability(); + final path = await meta.find(AiMetaKeys.modelPath); + if (avail == ModelAvailability.ready && path != null) { + _delegate = GemmaLlmService(modelPath: path); + } else { + _delegate = MockLlmService(); + } + return _delegate!; + } + + @override + bool get isLoaded => _delegate?.isLoaded ?? false; + + @override + Future load() async => (await _resolve()).load(); + + @override + Future unload() async { + final d = _delegate; + if (d != null) await d.unload(); + } + + @override + Future> generateStructured( + String prompt, + Map schema, + ) async => + (await _resolve()).generateStructured(prompt, schema); +} + class LifeHelperApp extends StatelessWidget { const LifeHelperApp({super.key}); diff --git a/app/lib/state/ai_providers.dart b/app/lib/state/ai_providers.dart index 351630b..5f77680 100644 --- a/app/lib/state/ai_providers.dart +++ b/app/lib/state/ai_providers.dart @@ -10,19 +10,24 @@ import '../domain/ai/suggest_frame.dart'; import '../domain/models/frame_pattern.dart'; import 'providers.dart'; -/// Default config for the on-device Gemma model (#215). -/// OQ-1: URL + SHA-256 pinned in Developer phase. Until then, downloads are -/// disabled (AI toggle is gated behind these constants being real). -const _kModelUrlPlaceholder = - 'https://example.invalid/gemma4-e2b-q4.bin'; // OQ-1 -const _kModelShaPlaceholder = 'PENDING_OQ_1'; +/// Gemma 4 E2B instruction-tuned LiteRT-LM checkpoint (#218 OQ-1 resolved). +/// Hosted on HuggingFace `litert-community/gemma-4-E2B-it-litert-lm`. +/// File ≈ 2.41GB; SHA-256 pinned for integrity check. +/// +/// Tests / placeholder builds may override `modelLifecycleProvider` with +/// fixture URLs. Production builds optionally inject a private mirror via +/// `--dart-define=GEMMA_MODEL_URL=...` (see main.dart). +const _kModelUrl = + 'https://huggingface.co/litert-community/gemma-4-E2B-it-litert-lm/resolve/main/gemma-4-E2B-it.litertlm'; +const _kModelSha256 = + '181938105e0eefd105961417e8da75903eacda102c4fce9ce90f50b97139a63c'; final modelLifecycleProvider = Provider((ref) { return ModelLifecycle( meta: ref.watch(metaDaoProvider), config: ModelConfig( - url: Uri.parse(_kModelUrlPlaceholder), - expectedSha256: _kModelShaPlaceholder, + url: Uri.parse(_kModelUrl), + expectedSha256: _kModelSha256, ), ); }); diff --git a/app/lib/ui/screens/settings_screen.dart b/app/lib/ui/screens/settings_screen.dart index 9143b79..795732b 100644 --- a/app/lib/ui/screens/settings_screen.dart +++ b/app/lib/ui/screens/settings_screen.dart @@ -51,7 +51,7 @@ class _AiSection extends ConsumerWidget { SwitchListTile( title: const Text('AI 도움 켜기'), subtitle: const Text( - 'Gemma 4 E2B 모델 ≈ 1.5GB. 모든 처리는 단말에서 일어납니다.', + 'Gemma 4 E2B 모델 ≈ 2.4GB. 모든 처리는 단말에서 일어납니다.', ), value: optIn, onChanged: (v) async { @@ -125,9 +125,10 @@ class _AiSection extends ConsumerWidget { style: TextStyle(fontWeight: FontWeight.w600), ), SizedBox(height: 12), - _Bullet('파일 크기: 약 1.5GB'), + _Bullet('파일 크기: 약 2.4GB'), _Bullet('WiFi 연결을 권장합니다'), _Bullet('모든 처리는 단말에서만 일어나며, 입력 텍스트는 외부로 전송되지 않습니다'), + _Bullet('Gemma 이용약관(ai.google.dev/gemma/terms)에 동의합니다'), ], ), actions: [ @@ -156,7 +157,7 @@ class _AiSection extends ConsumerWidget { Text('모델 파일이 단말에서 삭제됩니다.'), SizedBox(height: 8), Text( - '약 1.5GB 의 저장공간이 확보돼요.', + '약 2.4GB 의 저장공간이 확보돼요.', style: TextStyle(fontSize: 13, color: Colors.grey), ), SizedBox(height: 4), diff --git a/app/pubspec.lock b/app/pubspec.lock index 33222b9..c8f3cd3 100644 --- a/app/pubspec.lock +++ b/app/pubspec.lock @@ -5,18 +5,18 @@ packages: dependency: transitive description: name: _fe_analyzer_shared - sha256: da0d9209ca76bde579f2da330aeb9df62b6319c834fa7baae052021b0462401f + sha256: "3b19a47f6ea7c2632760777c78174f47f6aec1e05f0cd611380d4593b8af1dbc" url: "https://pub.dev" source: hosted - version: "85.0.0" + version: "96.0.0" analyzer: dependency: transitive description: name: analyzer - sha256: "974859dc0ff5f37bc4313244b3218c791810d03ab3470a579580279ba971a48d" + sha256: "0c516bc4ad36a1a75759e54d5047cb9d15cded4459df01aa35a0b5ec7db2c2a0" url: "https://pub.dev" source: hosted - version: "7.7.1" + version: "10.2.0" args: dependency: transitive description: @@ -33,6 +33,14 @@ packages: url: "https://pub.dev" source: hosted version: "2.13.1" + background_downloader: + dependency: transitive + description: + name: background_downloader + sha256: aceacec2b2a72ec3a8862ab5895fcbbc71ab33765f3619d57963f3110dd268e3 + url: "https://pub.dev" + source: hosted + version: "9.5.5" boolean_selector: dependency: transitive description: @@ -45,18 +53,18 @@ packages: dependency: transitive description: name: build - sha256: "51dc711996cbf609b90cbe5b335bbce83143875a9d58e4b5c6d3c4f684d3dda7" + sha256: a156715e7cd728130c592f30552575908aae5b100005fbc1f0fb16b3c03a3d10 url: "https://pub.dev" source: hosted - version: "2.5.4" + version: "4.0.6" build_config: dependency: transitive description: name: build_config - sha256: "4ae2de3e1e67ea270081eaee972e1bd8f027d459f249e0f1186730784c2e7e33" + sha256: "4070d2a59f8eec34c97c86ceb44403834899075f66e8a9d59706f8e7834f6f71" url: "https://pub.dev" source: hosted - version: "1.1.2" + version: "1.3.0" build_daemon: dependency: transitive description: @@ -65,30 +73,14 @@ packages: url: "https://pub.dev" source: hosted version: "4.1.1" - build_resolvers: - dependency: transitive - description: - name: build_resolvers - sha256: ee4257b3f20c0c90e72ed2b57ad637f694ccba48839a821e87db762548c22a62 - url: "https://pub.dev" - source: hosted - version: "2.5.4" build_runner: dependency: "direct dev" description: name: build_runner - sha256: "382a4d649addbfb7ba71a3631df0ec6a45d5ab9b098638144faf27f02778eb53" + sha256: "1523ce62448ebac2c15a8ba5fbad8acac169788658a7dd2a1c2d9c2a9318b9a6" url: "https://pub.dev" source: hosted - version: "2.5.4" - build_runner_core: - dependency: transitive - description: - name: build_runner_core - sha256: "85fbbb1036d576d966332a3f5ce83f2ce66a40bea1a94ad2d5fc29a19a0d3792" - url: "https://pub.dev" - source: hosted - version: "9.1.2" + version: "2.15.0" built_collection: dependency: transitive description: @@ -149,18 +141,10 @@ packages: dependency: transitive description: name: code_assets - sha256: bf394f466ba9205f1812a0433b392d6af280f155f56651eda7c18cc32ed493b8 + sha256: "83ccdaa064c980b5596c35dd64a8d3ecc68620174ab9b90b6343b753aa721687" url: "https://pub.dev" source: hosted - version: "1.2.1" - code_builder: - dependency: transitive - description: - name: code_builder - sha256: "6a6cab2ba4680d6423f34a9b972a4c9a94ebe1b62ecec4e1a1f2cba91fd1319d" - url: "https://pub.dev" - source: hosted - version: "4.11.1" + version: "1.0.0" collection: dependency: transitive description: @@ -193,30 +177,38 @@ packages: url: "https://pub.dev" source: hosted version: "1.0.9" + dart_sentencepiece_tokenizer: + dependency: transitive + description: + name: dart_sentencepiece_tokenizer + sha256: "85825632845cf6427ea0cd13dfba96b4341cf63525165155e5b9b97011239289" + url: "https://pub.dev" + source: hosted + version: "1.3.2" dart_style: dependency: transitive description: name: dart_style - sha256: "8a0e5fba27e8ee025d2ffb4ee820b4e6e2cf5e4246a6b1a477eb66866947e0bb" + sha256: "29f7ecc274a86d32920b1d9cfc7502fa87220da41ec60b55f329559d5732e2b2" url: "https://pub.dev" source: hosted - version: "3.1.1" + version: "3.1.7" drift: dependency: "direct main" description: name: drift - sha256: "540cf382a3bfa99b76e51514db5b0ebcd81ce3679b7c1c9cb9478ff3735e47a1" + sha256: "6cc0b623c0e83f7080524d8396e9301b1d78b9c66a4fdceeb0f798211303254c" url: "https://pub.dev" source: hosted - version: "2.28.2" + version: "2.34.0" drift_dev: dependency: "direct dev" description: name: drift_dev - sha256: "68c138e884527d2bd61df2ade276c3a144df84d1adeb0ab8f3196b5afe021bd4" + sha256: "9cfff1576b49725da0d32c040651a41ae195e8c4af8d8da301593e41d7abc2f7" url: "https://pub.dev" source: hosted - version: "2.28.0" + version: "2.34.0" fake_async: dependency: transitive description: @@ -254,6 +246,14 @@ packages: description: flutter source: sdk version: "0.0.0" + flutter_gemma: + dependency: "direct main" + description: + name: flutter_gemma + sha256: "984960b54bbc0ff7e36cf568a02652b1d4bc016d6c75575b027fb6102fab48c3" + url: "https://pub.dev" + source: hosted + version: "0.16.5" flutter_lints: dependency: "direct dev" description: @@ -275,30 +275,11 @@ packages: description: flutter source: sdk version: "0.0.0" - freezed: - dependency: "direct dev" - description: - name: freezed - sha256: "59a584c24b3acdc5250bb856d0d3e9c0b798ed14a4af1ddb7dc1c7b41df91c9c" - url: "https://pub.dev" - source: hosted - version: "2.5.8" - freezed_annotation: - dependency: "direct main" - description: - name: freezed_annotation - sha256: c2e2d632dd9b8a2b7751117abcfc2b4888ecfe181bd9fca7170d9ef02e595fe2 - url: "https://pub.dev" - source: hosted - version: "2.4.4" - frontend_server_client: + flutter_web_plugins: dependency: transitive - description: - name: frontend_server_client - sha256: f64a0333a82f30b0cca061bc3d143813a486dc086b574bfb233b7c1372427694 - url: "https://pub.dev" - source: hosted - version: "4.0.0" + description: flutter + source: sdk + version: "0.0.0" glob: dependency: transitive description: @@ -319,10 +300,10 @@ packages: dependency: transitive description: name: hooks - sha256: "9a62a50b50b769a737bc0a8ff381f333529df3ab746b2f6b02e83760231455ba" + sha256: "025f060e86d2d4c3c47b56e33caf7f93bf9283340f26d23424ebcfccf34f621e" url: "https://pub.dev" source: hosted - version: "2.0.2" + version: "1.0.3" http: dependency: "direct main" description: @@ -371,14 +352,6 @@ packages: url: "https://pub.dev" source: hosted version: "1.0.1" - js: - dependency: transitive - description: - name: js - sha256: "53385261521cc4a0c4658fd0ad07a7d14591cf8fc33abbceae306ddb974888dc" - url: "https://pub.dev" - source: hosted - version: "0.7.2" json_annotation: dependency: "direct main" description: @@ -391,10 +364,18 @@ packages: dependency: "direct dev" description: name: json_serializable - sha256: c50ef5fc083d5b5e12eef489503ba3bf5ccc899e487d691584699b4bdefeea8c + sha256: "5b89c1e32ae3840bb20a1b3434e3a590173ad3cb605896fb0f60487ce2f8104e" url: "https://pub.dev" source: hosted - version: "6.9.5" + version: "6.11.4" + large_file_handler: + dependency: transitive + description: + name: large_file_handler + sha256: "1657db12b4591242b186c23eee437997be91e655ab0d803fd74829074460757f" + url: "https://pub.dev" + source: hosted + version: "0.4.0" leak_tracker: dependency: transitive description: @@ -427,6 +408,14 @@ packages: url: "https://pub.dev" source: hosted version: "6.1.0" + local_hnsw: + dependency: transitive + description: + name: local_hnsw + sha256: "7fb0988e3f850121774d9dfe94068f843d87908d6b1e9ed5039710d8d9ae3f7a" + url: "https://pub.dev" + source: hosted + version: "1.0.0" logging: dependency: transitive description: @@ -467,14 +456,30 @@ packages: url: "https://pub.dev" source: hosted version: "2.0.0" + mutex: + dependency: transitive + description: + name: mutex + sha256: "8827da25de792088eb33e572115a5eb0d61d61a3c01acbc8bcbe76ed78f1a1f2" + url: "https://pub.dev" + source: hosted + version: "3.1.0" + native_toolchain_c: + dependency: transitive + description: + name: native_toolchain_c + sha256: "6ba77bb18063eebe9de401f5e6437e95e1438af0a87a3a39084fbd37c90df572" + url: "https://pub.dev" + source: hosted + version: "0.17.6" objective_c: dependency: transitive description: name: objective_c - sha256: "6cb691c686fa2838c6deb34980d426145c2a5d537491cb83d463c33cdbc726ed" + sha256: "100a1c87616ab6ed41ec263b083c0ef3261ee6cd1dc3b0f35f8ddfa4f996fe52" url: "https://pub.dev" source: hosted - version: "9.4.1" + version: "9.3.0" package_config: dependency: transitive description: @@ -603,6 +608,62 @@ packages: url: "https://pub.dev" source: hosted version: "2.6.1" + shared_preferences: + dependency: transitive + description: + name: shared_preferences + sha256: c3025c5534b01739267eb7d76959bbc25a6d10f6988e1c2a3036940133dd10bf + url: "https://pub.dev" + source: hosted + version: "2.5.5" + shared_preferences_android: + dependency: transitive + description: + name: shared_preferences_android + sha256: "93ae5884a9df5d3bb696825bceb3a17590754548b5d740eba51500afc8d088f5" + url: "https://pub.dev" + source: hosted + version: "2.4.26" + shared_preferences_foundation: + dependency: transitive + description: + name: shared_preferences_foundation + sha256: "4e7eaffc2b17ba398759f1151415869a34771ba11ebbccd1b0145472a619a64f" + url: "https://pub.dev" + source: hosted + version: "2.5.6" + shared_preferences_linux: + dependency: transitive + description: + name: shared_preferences_linux + sha256: "580abfd40f415611503cae30adf626e6656dfb2f0cee8f465ece7b6defb40f2f" + url: "https://pub.dev" + source: hosted + version: "2.4.1" + shared_preferences_platform_interface: + dependency: transitive + description: + name: shared_preferences_platform_interface + sha256: "649dc798a33931919ea356c4305c2d1f81619ea6e92244070b520187b5140ef9" + url: "https://pub.dev" + source: hosted + version: "2.4.2" + shared_preferences_web: + dependency: transitive + description: + name: shared_preferences_web + sha256: c49bd060261c9a3f0ff445892695d6212ff603ef3115edbb448509d407600019 + url: "https://pub.dev" + source: hosted + version: "2.4.3" + shared_preferences_windows: + dependency: transitive + description: + name: shared_preferences_windows + sha256: "94ef0f72b2d71bc3e700e025db3710911bd51a71cefb65cc609dd0d9a982e3c1" + url: "https://pub.dev" + source: hosted + version: "2.4.1" shelf: dependency: transitive description: @@ -628,18 +689,18 @@ packages: dependency: transitive description: name: source_gen - sha256: "35c8150ece9e8c8d263337a265153c3329667640850b9304861faea59fc98f6b" + sha256: ec37cc0e6694374cbef59ed79685572c870a54ede6fa30a3e420feb3adffea02 url: "https://pub.dev" source: hosted - version: "2.0.0" + version: "4.2.3" source_helper: dependency: transitive description: name: source_helper - sha256: a447acb083d3a5ef17f983dd36201aeea33fedadb3228fa831f2f0c92f0f3aca + sha256: "4227d54ceefd0bb8ca4c8fcb96e1719dc53f1ee1b6e2ca9d7a6069da160e4eae" url: "https://pub.dev" source: hosted - version: "1.3.7" + version: "1.3.12" source_span: dependency: transitive description: @@ -652,10 +713,10 @@ packages: dependency: transitive description: name: sqlite3 - sha256: "3145bd74dcdb4fd6f5c6dda4d4e4490a8087d7f286a14dee5d37087290f0f8a2" + sha256: "9488c7d2cdb1091c91cacf7e207cff81b28bff8e366f042bad3afe7d34afe189" url: "https://pub.dev" source: hosted - version: "2.9.4" + version: "3.3.2" sqlite3_flutter_libs: dependency: "direct main" description: @@ -668,10 +729,10 @@ packages: dependency: transitive description: name: sqlparser - sha256: "57090342af1ce32bb499aa641f4ecdd2d6231b9403cea537ac059e803cc20d67" + sha256: "40bdddb306a727be9ce510bd2d2b9a6c9db6c586d846ef7b22e3990a2b24f02d" url: "https://pub.dev" source: hosted - version: "0.41.2" + version: "0.44.5" stack_trace: dependency: transitive description: @@ -728,14 +789,6 @@ packages: url: "https://pub.dev" source: hosted version: "0.7.11" - timing: - dependency: transitive - description: - name: timing - sha256: "62ee18aca144e4a9f29d212f5a4c6a053be252b895ab14b5821996cff4ed90fe" - url: "https://pub.dev" - source: hosted - version: "1.0.2" typed_data: dependency: transitive description: @@ -752,6 +805,14 @@ packages: url: "https://pub.dev" source: hosted version: "2.0.1" + uuid: + dependency: transitive + description: + name: uuid + sha256: "1fef9e8e11e2991bb773070d4656b7bd5d850967a2456cfc83cf47925ba79489" + url: "https://pub.dev" + source: hosted + version: "4.5.3" vector_math: dependency: transitive description: @@ -818,4 +879,4 @@ packages: version: "3.1.3" sdks: dart: ">=3.12.2 <4.0.0" - flutter: ">=3.38.4" + flutter: ">=3.44.0" diff --git a/app/pubspec.yaml b/app/pubspec.yaml index 1ef5a4a..3910d93 100644 --- a/app/pubspec.yaml +++ b/app/pubspec.yaml @@ -21,15 +21,14 @@ dependencies: path: ^1.9.0 # Models / serialization - freezed_annotation: ^2.4.0 json_annotation: ^4.9.0 # IDs ulid: ^2.0.0 - # AI / on-device LLM (#215) - # flutter_gemma 는 OQ-1 (정확한 모델 URL + SHA) 확정 후 추가. - # v1은 LlmService 추상 + ModelLifecycle (파일/SHA/메타 관리) + Mock 까지 구현. + # AI / on-device LLM (#215, #218) + # #218 (v0.3.0): flutter_gemma 0.16.5 + Gemma 4 E2B (HF litert-community). + flutter_gemma: ^0.16.5 crypto: ^3.0.0 http: ^1.2.0 @@ -41,7 +40,6 @@ dev_dependencies: # Codegen drift_dev: ^2.18.0 build_runner: ^2.4.0 - freezed: ^2.5.0 json_serializable: ^6.8.0 flutter: diff --git a/app/test/data/ai/gemma_llm_service_test.dart b/app/test/data/ai/gemma_llm_service_test.dart new file mode 100644 index 0000000..18f1ca6 --- /dev/null +++ b/app/test/data/ai/gemma_llm_service_test.dart @@ -0,0 +1,150 @@ +import 'dart:async'; + +import 'package:flutter_gemma/flutter_gemma.dart'; +import 'package:flutter_test/flutter_test.dart'; +import 'package:life_helper/data/ai/gemma_llm_service.dart'; + +/// Unit tests for `collectFunctionCall` (fn-spec §D, 8 cases). +/// +/// `GemmaLlmService.load` / `.generateStructured` themselves require the +/// flutter_gemma native runtime and are covered by AC-7 (on-device E2E), +/// not by host tests. The pure stream-parsing helper is unit-testable in +/// isolation because we can feed a synthetic `Stream`. +void main() { + const fn = 'emit_frame_candidates'; + + test('1. single FCR with expected name returns args', () async { + final stream = Stream.fromIterable([ + const FunctionCallResponse( + name: fn, + args: { + 'candidates': [ + {'text': 'a', 'level': 'L2'}, + {'text': 'b', 'level': 'L2'}, + {'text': 'c', 'level': 'L3'}, + ], + }, + ), + ]); + final args = await collectFunctionCall(stream, fn); + expect(args['candidates'], hasLength(3)); + }); + + test('2. TextResponse before FCR is skipped', () async { + final stream = Stream.fromIterable([ + const TextResponse('hello'), + const FunctionCallResponse(name: fn, args: {'candidates': []}), + ]); + final args = await collectFunctionCall(stream, fn); + expect(args['candidates'], isEmpty); + }); + + test('3. ThinkingResponse + TextResponse before empty-args FCR', () async { + final stream = Stream.fromIterable([ + const ThinkingResponse('reasoning...'), + const TextResponse('preamble'), + const FunctionCallResponse(name: fn, args: {}), + ]); + final args = await collectFunctionCall(stream, fn); + expect(args, isEmpty); + }); + + test('4. wrong function name throws FormatException', () async { + final stream = Stream.fromIterable([ + const FunctionCallResponse(name: 'wrong_name', args: {}), + ]); + expect( + () => collectFunctionCall(stream, fn), + throwsA( + isA().having( + (e) => e.message, + 'message', + contains('wrong_name'), + ), + ), + ); + }); + + test('5. text-only stream throws "no function call emitted"', () async { + final stream = Stream.fromIterable([ + const TextResponse('only text, no call'), + ]); + expect( + () => collectFunctionCall(stream, fn), + throwsA( + isA().having( + (e) => e.message, + 'message', + contains('no function call emitted'), + ), + ), + ); + }); + + test('6. stream error throws sanitized FormatException (no leak)', () async { + final stream = Stream.error( + Exception('SENSITIVE: user_prompt_leaked_in_error'), + ); + expect( + () => collectFunctionCall(stream, fn), + throwsA( + isA().having( + (e) => e.message, + 'message', + allOf( + equals('stream error'), + isNot(contains('SENSITIVE')), + isNot(contains('user_prompt_leaked_in_error')), + ), + ), + ), + ); + }); + + test('7. FCR with empty args map returns empty map (no throw)', () async { + final stream = Stream.fromIterable([ + const FunctionCallResponse(name: fn, args: {}), + ]); + final args = await collectFunctionCall(stream, fn); + expect(args, isEmpty); + }); + + test('8. empty stream throws "no function call emitted"', () async { + final stream = const Stream.empty(); + expect( + () => collectFunctionCall(stream, fn), + throwsA( + isA().having( + (e) => e.message, + 'message', + contains('no function call emitted'), + ), + ), + ); + }); + + group('ParallelFunctionCallResponse', () { + test('first call with expected name returns its args', () async { + final stream = Stream.fromIterable([ + ParallelFunctionCallResponse(calls: [ + const FunctionCallResponse(name: fn, args: {'x': 1}), + const FunctionCallResponse(name: 'other', args: {'y': 2}), + ]), + ]); + final args = await collectFunctionCall(stream, fn); + expect(args['x'], 1); + }); + + test('first call with wrong name throws', () async { + final stream = Stream.fromIterable([ + ParallelFunctionCallResponse(calls: [ + const FunctionCallResponse(name: 'wrong_first', args: {}), + ]), + ]); + expect( + () => collectFunctionCall(stream, fn), + throwsA(isA()), + ); + }); + }); +} diff --git a/docs/design/218-gemma-real-integration/README.md b/docs/design/218-gemma-real-integration/README.md index f9fe74f..4e151e1 100644 --- a/docs/design/218-gemma-real-integration/README.md +++ b/docs/design/218-gemma-real-integration/README.md @@ -78,7 +78,7 @@ v0.2.0 (#215) 은 mock 환경에서 100% 동작하지만, 사용자가 "AI 도 - Public API: `FlutterGemma.initialize(huggingFaceToken: String)`, `FlutterGemma.installModel(modelType: ModelType.gemma4).fromNetwork(url).install()`, `FlutterGemma.getActiveModel(maxTokens: 2048)`, `model.createChat()`, `chat.addQueryChunk(Message.text(text, isUser))`, `chat.generateChatResponseAsync()` → `Stream` (`TextResponse | FunctionCallResponse | ThinkingResponse`). - Function calling: **Gemma 4 native function calling** — 별도 `Tool` 객체 주입 없이 `ModelType.gemma4` 의 chat template 이 자동 라우팅. 모델이 호출 결정 시 스트림에 `FunctionCallResponse(name, args)` 1건 emit. (Gemma 4 / Gemma 3n / Phi-4 등 지원 명시) - Schema 전달 경로: prompt 본문에 JSON Schema 를 자연어로 명시 (Gemma 4 의 chat template 이 인식). 별도 `tools: [...]` 파라미터는 0.16.5 의 createChat 인터페이스 기준 옵션이지 필수 아님 — **OQ-C** 에서 확정. -- **Gemma 4 E2B QAT 모바일 모델** — HuggingFace `litert-community/gemma-4-E2B-it-litert-lm` repo 의 `.task` 또는 `.litertlm` 자산. 4-bit QAT 로 ≈1.5GB peak RAM, 신규 모바일 양자화 포맷은 ≈1GB footprint (2026-06 update). **OQ-A** 확정 시점에 정확한 파일명 (예: `gemma-4-E2B-it-mobile.task`) + SHA-256 핀. +- **Gemma 4 E2B 모델** — HuggingFace `litert-community/gemma-4-E2B-it-litert-lm` repo. **OQ-A 확정 (2026-06-12):** 정확 파일 = `gemma-4-E2B-it.litertlm` (2,588,147,712 bytes ≈ **2.41GB disk**), SHA-256 = `181938105e0eefd105961417e8da75903eacda102c4fce9ce90f50b97139a63c`. 모바일 1GB QAT 변종은 현시점 미공개 (Google 6월 blog 발표 자산 아직 HF 미게시). peak RAM 추정 ≈ 1.5–2GB (가중치 ≈ 1.3GB + KV cache + activation). - **HF access token** — joungmin 본인 계정의 read-only token. 빌드 시점 `--dart-define=HF_TOKEN=hf_xxx` 으로 주입, 런타임에 `String.fromEnvironment('HF_TOKEN')` 으로 읽어 `FlutterGemma.initialize` 에 전달. 토큰을 단말 영속 저장 금지. - **`crypto`** (기존), **`path_provider`** (기존), **`http`** (기존) — 모두 #215 에서 이미 사용 중. - **Android**: `minSdkVersion 24` (MediaPipe LLM Inference 요구사항). 기존 #204 가 26 이므로 통과 가정. @@ -190,14 +190,11 @@ app/ ```dart // app/lib/state/ai_providers.dart const _kModelUrl = - 'https://huggingface.co/litert-community/gemma-4-E2B-it-litert-lm/resolve/main/gemma-4-E2B-it-mobile.task'; -// ▲ 정확 파일명·SHA 는 OQ-A 에서 확정. 일시값으로 둔 후 Developer 가 -// HF page 의 LFS pointer (sha256: ... 라인) 또는 -// `huggingface-cli download --dry-run` 로 SHA 추출해 박는다. -// QAT 1GB 모바일 양자화 변종 출시 시 (2026-06+) 그쪽으로 swap. + 'https://huggingface.co/litert-community/gemma-4-E2B-it-litert-lm/resolve/main/gemma-4-E2B-it.litertlm'; const _kModelSha256 = - ''; -const _kModelType = 'gemma4'; // flutter_gemma ModelType enum 매핑 + '181938105e0eefd105961417e8da75903eacda102c4fce9ce90f50b97139a63c'; +const _kModelTotalBytes = 2588147712; // 2.41 GiB — UI 표시 용 +// 향후 QAT 모바일 1GB 변종이 HF 에 게시되면 swap. v1 은 위 base .litertlm. ``` `Placeholder` 접미사 제거. `meta_kv['ai_model_path']`, `meta_kv['ai_model_sha256']` 키도 의미는 동일 (값만 실체). @@ -371,14 +368,14 @@ const _hfToken = String.fromEnvironment('HF_TOKEN', defaultValue: ''); ## 12. 미해결 질문 (Open Questions) -| OQ | 질문 | 결정 시점 | 비고 | -|----|------|----------|------| -| **OQ-A** | Gemma 4 E2B QAT 모바일의 HF repo 정확 경로 + 파일명 + SHA-256? `gemma-4-E2B-it-mobile.task` vs `model.litertlm` vs 2026-06 신규 QAT 1GB 변종? | Developer 단계 첫 step | `litert-community/gemma-4-E2B-it-litert-lm` 또는 `google/gemma-4-E2B-it-qat-mobile-transformers` 둘 중. HF page LFS pointer 의 sha256 라인 사본 | -| **OQ-B** | `huggingFaceToken` 빈 문자열 시 `FlutterGemma.initialize` throw 타입? | Developer 단계 | `try/catch (e)` 처리 | -| **OQ-C** | sampling 파라미터 (temperature 0.4, topK 40, topP 0.95) 를 model-level (`getActiveModel(...)`) 에서 받나, chat-level (`createChat(...)`) 에서 받나? 0.16.5 API 정확 위치 | Developer 단계 | pub.dev API doc 또는 plugin source | -| **OQ-D** | Android `proguard-rules.pro` `-keep` 룰 패턴? | QA 단계 release build 시 | `com.google.mediapipe.**` + flutter_gemma plugin native 클래스 | -| **OQ-E** | Gemma 4 ToU 동의 UI: #215 동의 다이얼로그에 추가? Settings 도움말 링크? | Developer 단계 | 변경 최소화 → Settings 도움말 한 줄 + 외부 링크 (https://ai.google.dev/gemma/terms) | -| **OQ-F** | Gemma 4 thinking mode 토글: v0.3 에서 비활성 유지하지만 0.16.5 API 의 정확한 off 스위치는? | Developer 단계 | `createChat(enableThinking: false)` 형태로 추정, 확인 필요 | +| OQ | 질문 | 상태 | 결정 | +|----|------|------|------| +| **OQ-A** | Gemma 4 E2B 모바일 HF 파일명 + SHA-256? | ✅ 해결 (Developer 2026-06-12) | `litert-community/gemma-4-E2B-it-litert-lm` repo 의 `gemma-4-E2B-it.litertlm` (2,588,147,712 B). SHA256 = `181938105e0eefd105961417e8da75903eacda102c4fce9ce90f50b97139a63c`. QAT 1GB 변종은 미공개. | +| **OQ-B** | `huggingFaceToken` 빈 문자열 시 throw? | ✅ 해결 | **즉시 throw 안 함** — 다운로드 시점까지 deferred. 빈 토큰은 public model 만 허용. 우리는 `null` 이 더 정확하지만 빈 문자열도 안전. | +| **OQ-C** | sampling 파라미터 위치? | ✅ 해결 | **chat-level** — `model.createChat(temperature: 0.4, topK: 40, topP: 0.95)`. `getActiveModel` 은 `maxTokens` / `preferredBackend` / `maxConcurrentSessions` 만. (caveat: NPU backend 는 sampling 무시.) | +| **OQ-D** | Android ProGuard rules? | ✅ 해결 | flutter_gemma example app 의 27-line 사본 적용: io.flutter.\*, play.core.\*, mediapipe.\*, protobuf.\*, kotlinx.coroutines.\*. | +| **OQ-E** | Gemma ToU 동의 UI? | ✅ 해결 | Google 표준 템플릿 없음. Settings AI 섹션 하단에 "Gemma 이용약관(https://ai.google.dev/gemma/terms)에 동의합니다" 한 줄. 다이얼로그 본문 변경 없음 (변경 최소화). | +| **OQ-F** | thinking mode off 스위치? | ✅ 해결 | `model.createChat(isThinking: false)` — **default 가 false** 라 명시 안 해도 무방하나 명시적으로 박는다. | --- diff --git a/docs/design/218-gemma-real-integration/fn-gemma_llm_service.md b/docs/design/218-gemma-real-integration/fn-gemma_llm_service.md index 7165918..736725f 100644 --- a/docs/design/218-gemma-real-integration/fn-gemma_llm_service.md +++ b/docs/design/218-gemma-real-integration/fn-gemma_llm_service.md @@ -1,8 +1,17 @@ # 함수 설계서: `GemmaLlmService.load` + `generateStructured` + 보조 (#218) -> **부모 설계서**: ./README.md · **상태**: Draft +> **부모 설계서**: ./README.md · **상태**: Draft (v2) > **작성**: [AI] Architect (2026-06-12) · **구현 대상**: `app/lib/data/ai/gemma_llm_service.dart` · **테스트**: `app/test/data/ai/gemma_llm_service_test.dart` +## 변경 이력 (v2, 2026-06-12 Developer 검증 후) + +flutter_gemma 0.16.5 의 `InferenceChat` 구현을 직접 읽어 확인한 결과: +- Gemma 4 (ModelType.gemma4) 의 function calling 은 **SDK 가 `createChat(tools: [Tool(...)])` 의 tools 목록에서 `<|tool>declaration:...` 토큰을 직접 렌더**한다 (`lib/core/chat.dart:94`). +- 따라서 §C `_appendSchemaInstruction` 는 Gemma 4 에선 **double-wrap** 을 유발한다. v2 에선 **§C 제거**, §B 는 `Tool` 객체를 `createChat` 에 전달하는 방식으로 변경. +- §D `_collectFunctionCall` 는 변경 없음 — 여전히 `Stream` 에서 첫 `FunctionCallResponse` 만 추출. + +남은 4 함수 (§A load / §B generateStructured / §C deprecated / §D collectFunctionCall) 중 코드 대상은 3 개. + 이 문서는 `GemmaLlmService` 가 노출하는 2 개 public 메서드 + 2 개 file-private 헬퍼를 한 묶음으로 설계한다. 모두 flutter_gemma 0.16.5 의 native 경계를 다루므로 한 문서에서 다루는 게 응집도 측면에서 옳다. | # | 함수 | 가시성 | @@ -67,7 +76,7 @@ Future load(); ### 8. 복잡도 / 성능 - **시간**: cold start 1–3 초 (모델 파일 read + native init + tokenizer load). 첫 호출 만, 이후 캐시. -- **공간**: peak RAM ≈ 1.5GB (Gemma 4 E2B Q4_0). QAT 1GB 변종 채택 시 ≈ 1GB. +- **공간**: peak RAM ≈ 1.5–2GB (Gemma 4 E2B Q4 .litertlm, 가중치 ~1.3GB + KV cache + activation). disk ≈ 2.41GB. - **호출 빈도**: 사용자 1 세션 당 0–1 회 (#219 F1 의 60s idle unload 가 들어오면 다회 가능). ### 9. 의존성 @@ -116,20 +125,27 @@ loaded 상태의 Gemma 4 모델에 prompt + JSON Schema 를 전달하여 단일 - 모델 latent state 변경 (다음 호출은 fresh chat). - log: prompt length, latency, FCR 수신 여부 (prompt 본문 X — 프라이버시). -### 5. 동작 / 알고리즘 +### 5. 동작 / 알고리즘 (v2) ``` 1. if (!_loaded) throw StateError('LlmService not loaded'); -2. final augmented = _appendSchemaInstruction(prompt, schema); -3. final chat = await _model!.createChat(); -4. try { -5. await chat.addQueryChunk(Message.text(text: augmented, isUser: true)); -6. final stream = chat.generateChatResponseAsync(); -7. final fnName = schema['name'] as String; -8. final args = await _collectFunctionCall(stream, fnName); -9. return args; -10. } finally { -11. await chat.close(); // 항상 정리 -12. } +2. final fnName = schema['name'] as String; +3. final fnDesc = (schema['description'] as String?) ?? ''; +4. final fnParams = schema['parameters'] as Map; +5. final tool = Tool(name: fnName, description: fnDesc, parameters: fnParams); +6. final chat = await _model!.createChat( + modelType: ModelType.gemma4, + supportsFunctionCalls: true, + toolChoice: ToolChoice.required, // 강제 FCR + tools: [tool], + ); +7. try { +8. await chat.addQueryChunk(Message.text(text: prompt, isUser: true)); +9. final stream = chat.generateChatResponseAsync(); +10. final args = await _collectFunctionCall(stream, fnName); +11. return args; +12. } finally { +13. await chat.close(); // 항상 정리 +14. } ``` caller (#215 `suggestFrame`) 가 `.timeout(Duration(seconds: 10))` 적용 → timeout 시 본 함수의 `await` 가 throw 됨 → finally 의 `chat.close()` 가 실행되어 native session leak 방지. @@ -173,7 +189,13 @@ caller (#215 `suggestFrame`) 가 `.timeout(Duration(seconds: 10))` 적용 → ti --- -## §C. `_appendSchemaInstruction(prompt, schema)` +## §C. (DEPRECATED — v2) `_appendSchemaInstruction(prompt, schema)` + +> **v2 결정**: Gemma 4 SDK 가 `Tool` 객체에서 직접 declaration 토큰을 렌더하므로, prompt 측에서 schema 안내문을 덧붙이면 double-wrap 이 된다. **본 함수는 구현하지 않는다.** +> +> 아래 §C 본문은 v1 (gemmaIt fallback) 시나리오용 참고 자료로 보존하나, v2 코드 대상에서 제외한다. 단위 테스트도 작성하지 않는다. + +원본 본문 (참고용): ### 1. 시그니처 ```dart diff --git a/docs/guides/ai-help-onboarding.md b/docs/guides/ai-help-onboarding.md index 44ba459..72afc44 100644 --- a/docs/guides/ai-help-onboarding.md +++ b/docs/guides/ai-help-onboarding.md @@ -20,7 +20,7 @@ life-helper 는 사용자가 입력한 자유 문장(예: "술 끊고 싶어") 1. 하단 탭에서 **설정** 진입. 2. "AI 도움 켜기" 토글 탭. 3. 동의 다이얼로그가 뜹니다: - - **파일 크기 ≈ 1.5GB** (Gemma 4 E2B Q4_0 모델 — 단말에 한 번만 다운로드) + - **파일 크기 ≈ 2.4GB** (Gemma 4 E2B Q4_0 모델 — 단말에 한 번만 다운로드) - **WiFi 연결 권장** (셀룰러 대역폭 절약) - 모든 처리는 단말 — 입력 텍스트 외부 송출 없음 4. **"동의하고 다운로드"** 탭 → 백그라운드 다운로드 시작. @@ -68,9 +68,9 @@ life-helper 는 사용자가 입력한 자유 문장(예: "술 끊고 싶어") 1. **설정** → "AI 도움 켜기" 토글 OFF. 2. 확인 다이얼로그: - 모델 파일이 단말에서 **즉시 삭제** 됩니다. - - 약 1.5GB 의 저장공간이 확보됩니다. + - 약 2.4GB 의 저장공간이 확보됩니다. - 다시 켜면 다시 다운로드해야 합니다. -3. **"끄고 삭제"** 탭 → "공간 확보됨 1500 MB" 토스트. +3. **"끄고 삭제"** 탭 → "공간 확보됨 2469 MB" 토스트. 진행 중인 다운로드가 있어도 깔끔히 중단되고, `.tmp` 임시 파일까지 함께 삭제됩니다. @@ -86,7 +86,7 @@ A. 직접 입력란을 고쳐 쓰면 됩니다. AI 제안은 채우기 도우미 A. 언제든 끌 수 있고, 끄면 즉시 삭제됩니다. 다시 켜면 다시 받아야 한다는 점만 유의하세요. **Q. v0.2.0 에서 다운로드가 항상 실패합니다.** -A. v0.2.0 은 모델 URL 이 미확정 (OQ-1) 인 상태로 출시되어, 실제 다운로드는 의도된 graceful 실패 경로로 안내됩니다. 실 모델 통합은 후속 버전 (v0.3.x) 에서 제공됩니다. 그동안 수동 입력 경로는 정상 동작합니다. +A. v0.2.0 은 모델 URL 이 미확정 (OQ-1) 인 상태로 출시되어, 실제 다운로드는 의도된 graceful 실패 경로로 안내됩니다. **v0.3.0 부터 실 Gemma 4 E2B 모델 (HuggingFace) 다운로드가 활성화되었습니다.** 그동안 수동 입력 경로는 정상 동작합니다. ## 관련 문서