[03-Developer] #260 in-app tool calling (Gemma 4 multi-turn)

ADR-0005 in-process tool runtime — 6 tools (catalog 2 + tracker 2 +
habit 2), ToolDispatcher with JSON-schema validation + modal ConfirmGate
for destructive ops, multi-turn LlmChatSession abstraction wired to
flutter_gemma 0.16.5 (ToolChoice.auto), ChatSessionController with
MAX_TURNS=4 safety + 8-turn history hint, ChatScreen entry behind AI
opt-in. R3/R7/R8 enforced inside handlers. 41 new tests (envelope,
catalog/tracker/habit tools, dispatcher, controller loop) — 151 total
passing.

Refs #260
This commit is contained in:
2026-06-15 10:42:43 +09:00
parent eca097aa2c
commit b1bed4d5ca
21 changed files with 2313 additions and 0 deletions

View File

@@ -0,0 +1,188 @@
import 'package:flutter/material.dart';
import 'package:flutter_test/flutter_test.dart';
import 'package:life_helper/ai/tools/tool_dispatcher.dart';
import 'package:life_helper/ai/tools/tool_envelope.dart';
import 'package:life_helper/ai/tools/tool_registry.dart';
import 'package:life_helper/data/ai/llm_service.dart';
import 'package:life_helper/state/chat_providers.dart';
import '../ai/tools/_tool_test_helpers.dart';
class _Harness {
final ChatSessionController controller;
final MockLlmService mock;
final dynamic db;
_Harness(this.controller, this.mock, this.db);
}
// ignore: library_private_types_in_public_api
Future<_Harness> makeHarness() async {
final ctx = await bootstrapToolDeps();
final mock = MockLlmService();
await mock.load();
final controller = ChatSessionController(
llm: mock,
dispatcher: ToolDispatcher(registry: ToolRegistry.defaults()),
deps: ctx.deps,
tools: ToolRegistry.defaults().all.toList(),
);
return _Harness(controller, mock, ctx.db);
}
/// Pumps an empty Material harness and returns a live mounted BuildContext
/// for read-only tool dispatch. The context becomes unmounted when the
/// widget is pumped away (used in the destructive-cancel test).
Future<BuildContext> mountContext(WidgetTester tester) async {
late BuildContext captured;
await tester.pumpWidget(MaterialApp(
home: Builder(builder: (ctx) {
captured = ctx;
return const SizedBox.shrink();
}),
));
return captured;
}
void main() {
testWidgets('자연어 응답만 — model 메시지로 종료', (tester) async {
final h = await makeHarness();
addTearDown(() {
h.controller.dispose();
h.db.close();
});
final ctx = await mountContext(tester);
h.mock.enqueueChatEvents([const LlmTextChunk('안녕!')]);
await h.controller.userTurn('hi', ctx);
expect(h.controller.state.isStreaming, false);
expect(h.controller.state.messages.length, 2);
expect(h.controller.state.messages.first, isA<UserChatMessage>());
expect(h.controller.state.messages.last, isA<ModelChatMessage>());
expect(
(h.controller.state.messages.last as ModelChatMessage).text,
'안녕!',
);
});
testWidgets('1 tool call + 응답 — 3 메시지', (tester) async {
final h = await makeHarness();
addTearDown(() {
h.controller.dispose();
h.db.close();
});
final ctx = await mountContext(tester);
h.mock.enqueueChatEvents([
const LlmFunctionCall('search_catalog', {}),
]);
h.mock.enqueueChatEvents([
const LlmTextChunk('카탈로그 결과를 확인했어요.'),
]);
await h.controller.userTurn('카탈로그 보여줘', ctx);
expect(h.controller.state.messages.length, 3);
expect(h.controller.state.messages[1], isA<ToolCallChatMessage>());
expect(
(h.controller.state.messages[1] as ToolCallChatMessage).result,
isA<ToolOk>(),
);
expect(h.controller.state.error, isNull);
});
testWidgets('destructive + unmounted context → ToolCancelled',
(tester) async {
final h = await makeHarness();
addTearDown(() {
h.controller.dispose();
h.db.close();
});
final ctx = await mountContext(tester);
// 컨텍스트를 강제로 unmount.
await tester.pumpWidget(const SizedBox.shrink());
expect(ctx.mounted, false);
h.mock.enqueueChatEvents([
const LlmFunctionCall('add_habit', {
'protocol_id': 'morning_sunlight',
'frame_level': 'L2',
'framed_text': '햇빛',
}),
]);
h.mock.enqueueChatEvents([const LlmTextChunk('취소했어요.')]);
await h.controller.userTurn('습관 추가', ctx);
final toolMsg = h.controller.state.messages
.whereType<ToolCallChatMessage>()
.single;
expect(toolMsg.result, isA<ToolCancelled>());
});
testWidgets('MAX_TURNS 초과 → error 세팅', (tester) async {
final h = await makeHarness();
addTearDown(() {
h.controller.dispose();
h.db.close();
});
final ctx = await mountContext(tester);
for (var i = 0; i < kChatMaxTurns + 1; i++) {
h.mock.enqueueChatEvents([
const LlmFunctionCall('search_catalog', {}),
]);
}
await h.controller.userTurn('무한루프', ctx);
expect(h.controller.state.error, contains('루프'));
expect(h.controller.state.isStreaming, false);
});
testWidgets('빈 입력 무시', (tester) async {
final h = await makeHarness();
addTearDown(() {
h.controller.dispose();
h.db.close();
});
final ctx = await mountContext(tester);
await h.controller.userTurn(' ', ctx);
expect(h.controller.state.messages, isEmpty);
expect(h.mock.chatStartCount, 0);
});
testWidgets('clear() 가 메시지 초기화', (tester) async {
final h = await makeHarness();
addTearDown(() {
h.controller.dispose();
h.db.close();
});
final ctx = await mountContext(tester);
h.mock.enqueueChatEvents([const LlmTextChunk('hi')]);
await h.controller.userTurn('x', ctx);
expect(h.controller.state.messages, isNotEmpty);
h.controller.clear();
expect(h.controller.state.messages, isEmpty);
expect(h.controller.state.error, isNull);
});
testWidgets('tool result 가 다음 sendToolResult 로 전달', (tester) async {
final h = await makeHarness();
addTearDown(() {
h.controller.dispose();
h.db.close();
});
final ctx = await mountContext(tester);
h.mock.enqueueChatEvents([
const LlmFunctionCall('list_active_habits', {}),
]);
h.mock.enqueueChatEvents([
const LlmTextChunk('현재 습관 0개.'),
]);
await h.controller.userTurn('내 습관 알려줘', ctx);
final chat = h.mock.lastChat!;
expect(chat.userInputs, ['내 습관 알려줘']);
expect(chat.toolResults.length, 1);
expect(chat.toolResults.first.$1, 'list_active_habits');
final submitted = chat.toolResults.first.$2;
expect(submitted['status'], 'ok');
});
}