From 5d8b0fcdb8550ec5587ac306559aebd233d29ef3 Mon Sep 17 00:00:00 2001 From: joungmin Date: Mon, 30 Mar 2026 06:37:07 +0000 Subject: [PATCH] Initial project setup with env template and gitignore - .gitignore: Java/Maven, Node.js, IDE, OS, credentials - .env.sample: backend + frontend environment variable template - README.md: project overview and getting started guide - CLAUDE.md: development rules and guidelines - docs/: SUNDOL spec and design patterns guide Co-Authored-By: Claude Opus 4.6 (1M context) --- .env.sample | 55 ++ .gitignore | 69 +++ CLAUDE.md | 70 +++ README.md | 59 ++ docs/DESIGN_PATTERNS_GUIDE.md | 104 ++++ docs/SUNDOL_SPEC.md | 1090 +++++++++++++++++++++++++++++++++ 6 files changed, 1447 insertions(+) create mode 100644 .env.sample create mode 100644 .gitignore create mode 100644 CLAUDE.md create mode 100644 README.md create mode 100644 docs/DESIGN_PATTERNS_GUIDE.md create mode 100644 docs/SUNDOL_SPEC.md diff --git a/.env.sample b/.env.sample new file mode 100644 index 0000000..d6c41f5 --- /dev/null +++ b/.env.sample @@ -0,0 +1,55 @@ +# ============================================ +# SUNDOL Environment Variables +# Copy this file to .env and fill in values +# cp .env.sample .env +# ============================================ + +# --------------------- +# Database (Oracle 23ai) +# --------------------- +DB_HOST=localhost +DB_SERVICE=FREEPDB1 +DB_USER=sundol +DB_PASSWORD= + +# --------------------- +# JWT +# --------------------- +JWT_SECRET= + +# --------------------- +# Google OAuth +# --------------------- +GOOGLE_CLIENT_ID= +GOOGLE_CLIENT_SECRET= + +# --------------------- +# OCI GenAI +# --------------------- +OCI_COMPARTMENT_ID= +OCI_REGION=us-chicago-1 + +# --------------------- +# YouTube (optional) +# --------------------- +YOUTUBE_API_KEY= + +# --------------------- +# Redis +# --------------------- +REDIS_HOST=localhost +REDIS_PORT=6379 + +# --------------------- +# Frontend (Next.js) +# --------------------- +NEXT_PUBLIC_API_URL=http://localhost:8080 +NEXTAUTH_URL=http://localhost:3000 +NEXTAUTH_SECRET= +API_URL=http://localhost:8080 + +# --------------------- +# Gitea +# --------------------- +GITEA_USER=joungmin +GITEA_PASSWORD= diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..fd08fbe --- /dev/null +++ b/.gitignore @@ -0,0 +1,69 @@ +# ======================== +# Environment / Secrets +# ======================== +.env +.env.local +.env.production +.env.*.local + +# ======================== +# Java / Maven (Backend) +# ======================== +target/ +*.class +*.jar +*.war +*.ear +*.log +hs_err_pid* + +# Maven wrapper +.mvn/wrapper/maven-wrapper.jar + +# ======================== +# Node.js (Frontend) +# ======================== +node_modules/ +.next/ +out/ +build/ +dist/ +npm-debug.log* +yarn-debug.log* +yarn-error.log* +.pnpm-debug.log* + +# ======================== +# IDE +# ======================== +.idea/ +*.iml +*.iws +.vscode/ +*.swp +*.swo +*~ + +# ======================== +# OS +# ======================== +.DS_Store +Thumbs.db + +# ======================== +# OCI / Cloud credentials +# ======================== +.oci/ +Wallet_*/ +*.pem +*.key + +# ======================== +# Docker +# ======================== +oracle_data/ + +# ======================== +# Claude Code +# ======================== +.claude/ diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..607b973 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,70 @@ +# 작업 규칙 (절대 규칙) + +- 코드를 수정하기 전에 반드시 계획을 사용자에게 설명하고 컨펌을 받은 후 진행할 것. 컨펌 없이 코드를 절대 건드리지 말 것. +- 사용자가 질문하면 질문에만 답할 것. 답변 후 임의로 코드를 수정하지 말 것. +- 문제를 발견해도 직접 고치지 말고 보고만 할 것. +- 전략 로직(매수/매도 조건, LLM 프롬프트, 계층 구조)은 절대 자의적으로 변경 금지. +- 존댓말 사용. +- null 처리, exception 처리 시 무시/빈값 처리 하지 않도록 한다. (ignored, 빈 리스트 반환 등 금지) +- 변경이 단순하더라도 절차를 생략하지 말 것. "단순하니까 괜찮겠지"라는 임의 판단 금지. + +# 배포 전 필수 절차 (순서대로) + +1. 컴파일 확인 +2. PMD 정적 분석 실행 +3. 변경된 코드 전체를 다시 읽고 로직 리뷰 (재시도 시 데이터 중복 적재, null 참조, 카운터/리스트 누적 등 세는 로직 문제 확인) +4. 점검 결과를 사용자에게 보고하고 승인받은 후 배포 진행 + +# 빌드/배포 + +- 빌드: `cd backend && export JAVA_HOME=/opt/homebrew/Cellar/openjdk/25.0.2/libexec/openjdk.jdk/Contents/Home && set -a && source ../.env && set +a && mvn package -q -DskipTests` +- 컴파일만: `cd backend && export JAVA_HOME=/opt/homebrew/Cellar/openjdk/25.0.2/libexec/openjdk.jdk/Contents/Home && set -a && source ../.env && set +a && mvn compile` +- 배포 시 반드시 `git push origin main` 포함 + +# DB 접속 (Oracle Autonomous DB - SQLcl) + +- 환경변수 파일: `/Users/joungminko/devkit/account_manager/.env` +- SQLcl 실행: +```bash +export JAVA_HOME=/opt/homebrew/Cellar/openjdk/25.0.2/libexec/openjdk.jdk/Contents/Home +export TNS_ADMIN=/Users/joungminko/devkit/db_conn/Wallet_WKW7PT1B3PIK6DTI +/opt/homebrew/Caskroom/sqlcl/25.4.2.044.1837/sqlcl/bin/sql admin/Dhfkzmf#12345@wkw7pt1b3pik6dti_medium +``` +- DDL 변경이 필요하면 SQLcl로 직접 ALTER TABLE 실행할 것 + +# 코드 설계 원칙 + +- 최대한 모듈화하여 재활용 가능하도록 설계할 것 +- 아래 디자인 패턴 가이드를 참조하여 적절한 패턴을 적용할 것 +- 과도한 패턴 사용은 지양하고, 문제 복잡도에 맞는 수준으로 적용할 것 + +# 소프트웨어 디자인 패턴 참조 가이드 + +디자인 패턴은 개발자들이 직면하는 반복적인 문제들을 해결하기 위한 검증된 방법론입니다. + +## 1. 생성 패턴 (Creational Patterns) + +- **싱글톤 (Singleton)**: 특정 클래스의 인스턴스가 오직 하나만 생성되도록 보장 (전역 설정, 로깅, 캐시 관리) +- **팩토리 (Factory)**: 객체 생성 로직을 캡슐화하여 클라이언트에서 직접 생성하지 않도록 함 (조건부 객체 생성) +- **빌더 (Builder)**: 복잡한 객체를 단계별로 구성하여 생성 (매개변수가 많거나 선택적일 때) +- **프로토타입 (Prototype)**: 기존 객체를 복제하여 기능 확장 + +## 2. 구조 패턴 (Structural Patterns) + +- **퍼사드 (Facade)**: 복잡한 내부 시스템을 단순한 인터페이스로 감싸기 (API 래퍼, 복잡한 라이브러리 단순화) +- **어댑터 (Adapter)**: 서로 호환되지 않는 인터페이스를 연결 +- **프록시 (Proxy)**: 실제 객체 대신 대리인 객체를 사용하여 접근 제어나 부가 기능 수행 (지연 로딩, 캐싱) + +## 3. 행위 패턴 (Behavioral Patterns) + +- **옵저버 (Observer/Pub-Sub)**: 한 객체의 상태 변화를 여러 구독자들에게 알림 (이벤트 기반 시스템) +- **이터레이터 (Iterator)**: 컬렉션의 내부 구조를 노출하지 않고 요소들을 순회 +- **전략 (Strategy)**: 알고리즘을 외부에서 주입받아 사용하여 동작을 확장 (Open-Closed 원칙 준수) +- **메디에이터 (Mediator)**: 객체들이 직접 통신하지 않고 중재자를 거쳐 복잡한 관계 단순화 +- **스테이트 (State)**: 객체의 내부 상태에 따라 행동이 달라지게 함 (복잡한 조건문을 클래스로 대체) + +## 패턴 적용 주의사항 + +- 단순한 문제에 복잡한 패턴 적용 지양 +- 일부 패턴은 성능 오버헤드 발생 가능하므로 성능 고려 +- 팀원들이 이해할 수 있는 수준의 패턴 선택 diff --git a/README.md b/README.md new file mode 100644 index 0000000..3c0f32d --- /dev/null +++ b/README.md @@ -0,0 +1,59 @@ +# SUNDOL + +**Smart Unified Natural Dog-Operated Layer** + +Personal Knowledge House · AI Assistant · Productivity Hub + +## Features + +- **Knowledge Ingestion** — YouTube, blog, news, raw text 자동 수집 및 처리 +- **Semantic Search** — Oracle 23ai VECTOR 기반 의미 검색 +- **AI Chat (RAG)** — 지식 기반 대화, 출처 인용 +- **Study Cards (SRS)** — SM-2 간격 반복 학습 카드 +- **Todos** — 작업/하위작업 관리 +- **Habit Tracker** — 습관 추적, 스트릭 관리 + +## Tech Stack + +| Layer | Technology | +|-------|-----------| +| Backend | Spring Boot 3, Java 21 | +| Frontend | Next.js 14, TypeScript, Tailwind CSS | +| Database | Oracle 23ai (VECTOR support) | +| AI | OCI Generative AI (Cohere / Llama) | +| Auth | Google SSO + JWT | +| Cache | Redis | + +## Getting Started + +```bash +# 1. 환경변수 설정 +cp .env.sample .env +# .env 파일에 실제 값 입력 + +# 2. Docker Compose로 실행 +docker-compose up -d + +# 3. 개별 실행 (Backend) +cd sundol-backend +mvn spring-boot:run + +# 4. 개별 실행 (Frontend) +cd sundol-frontend +npm install && npm run dev +``` + +## Project Structure + +``` +sundol/ +├── sundol-backend/ # Spring Boot 3 +├── sundol-frontend/ # Next.js 14 +├── db/migration/ # Flyway SQL scripts +├── docs/ # Specifications +├── docker-compose.yml +├── .env.sample # Environment variable template +└── README.md +``` + +자세한 스펙은 [docs/SUNDOL_SPEC.md](docs/SUNDOL_SPEC.md) 참조. diff --git a/docs/DESIGN_PATTERNS_GUIDE.md b/docs/DESIGN_PATTERNS_GUIDE.md new file mode 100644 index 0000000..0eee3a2 --- /dev/null +++ b/docs/DESIGN_PATTERNS_GUIDE.md @@ -0,0 +1,104 @@ +# 소프트웨어 디자인 패턴 참조 가이드 + +디자인 패턴은 개발자들이 직면하는 반복적인 문제들을 해결하기 위한 검증된 방법론입니다. + +## 1. 생성 패턴 (Creational Patterns) + +객체 생성 방식과 관련된 패턴들 + +### 싱글톤 (Singleton) + +- **목적**: 특정 클래스의 인스턴스가 오직 하나만 생성되도록 보장 +- **사용 예**: 앱 전체의 설정 데이터, 전역 상태 관리, 공유 자원 접근 +- **특징**: 애플리케이션 전체에서 단일 인스턴스 보장 + +### 팩토리 (Factory) + +- **목적**: 객체 생성 로직을 캡슐화하여 클라이언트에서 직접 생성하지 않도록 함 +- **사용 예**: 플랫폼에 따라 다른 버튼 객체 생성, 버거 주문 시스템 +- **특징**: new 키워드 대신 함수나 메서드를 통한 객체 생성 + +### 빌더 (Builder) + +- **목적**: 복잡한 객체를 단계별로 구성하여 생성 +- **사용 예**: 많은 파라미터가 필한 객체 생성 +- **특징**: 메서드 체이닝을 통한 단계별 구성 후 build() 메서드로 완성 + +### 프로토타입 (Prototype) + +- **목적**: 클래스 상속 대신 기존 객체를 복제하여 기능 확장 +- **사용 예**: 자바스크립트의 프로토타입 상속 +- **특징**: 객체 복제(Clone)를 통한 기능 확장 + +## 2. 구조 패턴 (Structural Patterns) + +객체 간의 관계와 조합에 관련된 패턴들 + +### 퍼사드 (Facade) + +- **목적**: 복잡한 내부 시스템을 단순한 인터페이스로 감싸기 +- **사용 예**: API 래퍼 클래스, 복잡한 라이브러리의 간단한 인터페이스 +- **특징**: 복잡한 세부사항을 숨기고 상위 수준의 인터페이스 제공 + +### 어댑터 (Adapter) + +- **목적**: 서로 호환되지 않는 인터페이스를 연결 +- **사용 예**: 마이크로 USB를 USB 포트에 연결하는 어댑터 +- **특징**: 기존 클래스를 수정하지 않고 다른 인터페이스와 호환 + +### 프록시 (Proxy) + +- **목적**: 실제 객체 대신 대리인 객체를 사용하여 접근 제어나 부가 기능 수행 +- **사용 예**: Vue.js의 반응형 시스템, 지연 로딩, 캐싱 +- **특징**: 실제 객체에 대한 접근을 제어하거나 추가 기능 제공 + +## 3. 행위 패턴 (Behavioral Patterns) + +객체 간의 통신과 상호작용에 관련된 패턴들 + +### 옵저버 (Observer/Pub-Sub) + +- **목적**: 한 객체의 상태 변화를 여러 구독자들에게 알림 +- **사용 예**: Firebase의 실시간 데이터 업데이트, 유튜브 구독 알림 시스템 +- **특징**: 1:N 관계의 실시간 알림 시스템 + +### 이터레이터 (Iterator) + +- **목적**: 컬렉션의 내부 구조를 노출하지 않고 요소들을 순회 +- **사용 예**: 배열, 연결 리스트, 트리 등의 순회 +- **특징**: 복잡한 자료구조 내부를 몰라도 표준화된 방법으로 순회 가능 + +### 전략 (Strategy) + +- **목적**: 알고리즘을 외부에서 주입받아 사용하여 동작을 확장 +- **사용 예**: 정렬 알고리즘 선택, 결제 방식 선택 +- **특징**: Open-Closed 원칙 준수, 기존 코드 수정 없이 동작 확장 + +### 메디에이터 (Mediator) + +- **목적**: 객체들이 직접 통신하지 않고 중재자를 거쳐 복잡한 관계 단순화 +- **사용 예**: Express.js의 미들웨어, 채팅방 시스템 +- **특징**: 객체 간 결합도 감소, 중앙 집중식 통신 관리 + +### 스테이트 (State) + +- **목적**: 객체의 내부 상태에 따라 행동이 완전히 달라지게 함 +- **사용 예**: 게임 캐릭터 상태, UI 컴포넌트 상태 +- **특징**: 복잡한 조건문(if/switch)을 클래스로 대체하여 관리 + +## 패턴 적용 가이드 + +### 언제 사용할까? + +- **싱글톤**: 전역 설정, 로깅, 캐시 관리 +- **팩토리**: 객체 생성이 복잡하거나 조건부일 때 +- **빌더**: 생성자 매개변수가 많거나 선택적일 때 +- **옵저버**: 이벤트 기반 시스템, 상태 변화 알림 +- **전략**: 알고리즘 교체가 필요한 경우 +- **퍼사드**: 복잡한 API를 단순화할 때 + +### 주의사항 + +- **과도한 패턴 사용 금지**: 단순한 문제에 복잡한 패턴 적용 지양 +- **성능 고려**: 일부 패턴은 성능 오버헤드 발생 가능 +- **팀 이해도**: 팀원들이 이해할 수 있는 수준의 패턴 선택 diff --git a/docs/SUNDOL_SPEC.md b/docs/SUNDOL_SPEC.md new file mode 100644 index 0000000..58087cb --- /dev/null +++ b/docs/SUNDOL_SPEC.md @@ -0,0 +1,1090 @@ +# S.U.N.D.O.L — System Specification +**Smart Unified Natural Dog-Operated Layer** +Personal Knowledge House · AI Assistant · Productivity Hub + +Intended for: Claude Code autonomous implementation +Version: 1.0.0 | Date: 2026-03-30 + +--- + +## Table of Contents + +1. Product Vision & Features +2. System Architecture +3. Database Schema — Oracle 23ai +4. Backend — Spring Boot 3 +5. AI Layer — OCI Generative AI +6. Frontend — Next.js 14 +7. Security — Google SSO + JWT +8. REST API Reference +9. Key Algorithms +10. Environment Variables +11. Project Structure + +--- + +## 1. Product Vision & Features + +SUNDOL is a personal intelligence layer. It ingests anything valuable (YouTube videos, blog posts, news articles, raw text), extracts and understands the content, and makes it instantly searchable and conversational through AI. It also manages tasks and habits — a single hub for knowledge, productivity, and self-improvement. + +### Feature Areas + +| Feature | What it does | Key details | +|---------|-------------|-------------| +| Knowledge Ingestion | Add YouTube, blog, news, raw text, or any URL | Auto-extracts transcript (YT), crawls web pages (Jsoup), stores raw text | +| Semantic Search | Find knowledge by meaning, not keywords | Oracle 23ai VECTOR_DISTANCE cosine similarity + keyword hybrid | +| AI Chat (RAG) | Ask questions answered from your knowledge base | Retrieval-Augmented Generation with cited source chunks, chat history | +| Study Cards (SRS) | Auto-generate flashcards from any knowledge item | SM-2 spaced repetition scheduler, ease factor, review intervals | +| Todos | Task and subtask management | Priority, due date, status, nested subtasks | +| Habit Tracker | Build good habits, break bad ones | Daily check-ins, streak tracking (current + best), BUILD/STOP habit types | +| Tagging | Organise all knowledge with custom tags | Many-to-many tag–item, color-coded | + +--- + +## 2. System Architecture + +``` +┌─────────────────────────────────────────────────────────┐ +│ CLIENT TIER │ +│ Next.js 14 (App Router, TypeScript) │ +│ Google OAuth 2.0 initiation · JWT in httpOnly cookies │ +└────────────────────┬────────────────────────────────────┘ + │ HTTPS · JWT Bearer token +┌────────────────────▼────────────────────────────────────┐ +│ SECURITY LAYER │ +│ Spring Security 6 · JwtAuthFilter · Google ID verify │ +└────────────────────┬────────────────────────────────────┘ + │ +┌────────────────────▼────────────────────────────────────┐ +│ SPRING BOOT 3 — SERVICE LAYER │ +│ Auth · Ingest · Search · Chat · Todo · Habit │ +│ StudyCard · Tag · Background @Async workers │ +└──────────┬──────────────────────────┬───────────────────┘ + │ │ +┌──────────▼──────────┐ ┌───────────▼───────────────────┐ +│ AI / LLM LAYER │ │ DATA LAYER │ +│ OCI GenAI Service │ │ Oracle DB 23ai │ +│ · Cohere / Llama │ │ · Relational tables │ +│ · Embedding model │ │ · VECTOR column (chunks) │ +│ YouTube Transcript │ │ · HNSW vector index │ +│ Jsoup web crawler │ │ Flyway migrations │ +└─────────────────────┘ └───────────────────────────────┘ +``` + +### Communication Patterns + +- **Client → API**: HTTPS REST, `Authorization: Bearer ` on all authenticated endpoints +- **Google SSO**: Client → Google consent → Google ID token → Spring verifies → issues own JWT pair (access 15min + refresh 30d) +- **Ingest pipeline**: `POST /api/ingest` returns `202 Accepted`. `@Async` worker extracts text → chunks → embeds via OCI GenAI → stores VECTOR in Oracle +- **RAG query**: user query → embed (OCI) → VECTOR_DISTANCE search → top-K chunks → build prompt → OCI GenAI completion → response with citations +- **Habit streaks**: nightly `@Scheduled` job recomputes streaks, marks missed if no log entry for that day + +--- + +## 3. Database Schema — Oracle 23ai + +> **Oracle 23ai VECTOR**: Use `VECTOR(1024, FLOAT32)` to match OCI GenAI embed-multilingual-v3 (1024 dimensions). Similarity search uses `VECTOR_DISTANCE(embedding, :queryVec, COSINE)` with an HNSW index. No separate vector DB needed. + +### USERS + +```sql +CREATE TABLE users ( + id RAW(16) DEFAULT SYS_GUID() PRIMARY KEY, + email VARCHAR2(320) NOT NULL UNIQUE, + display_name VARCHAR2(255), + avatar_url VARCHAR2(1000), + google_sub VARCHAR2(255) UNIQUE, -- Google subject ID + refresh_token VARCHAR2(1000), -- hashed rotating refresh token + created_at TIMESTAMP DEFAULT SYSTIMESTAMP NOT NULL, + updated_at TIMESTAMP DEFAULT SYSTIMESTAMP NOT NULL +); +``` + +### KNOWLEDGE_ITEMS + +```sql +-- type: YOUTUBE | BLOG | NEWS | TEXT | URL +-- status: PENDING | EXTRACTING | CHUNKING | EMBEDDING | READY | FAILED +CREATE TABLE knowledge_items ( + id RAW(16) DEFAULT SYS_GUID() PRIMARY KEY, + user_id RAW(16) NOT NULL REFERENCES users(id) ON DELETE CASCADE, + type VARCHAR2(20) NOT NULL, + title VARCHAR2(1000), + raw_content CLOB, -- original URL or raw text input + extracted_text CLOB, -- cleaned text after extraction + source_url VARCHAR2(2000), + status VARCHAR2(20) DEFAULT 'PENDING', + metadata CLOB CHECK (metadata IS JSON), -- e.g. {"duration":3600,"author":"..."} + created_at TIMESTAMP DEFAULT SYSTIMESTAMP NOT NULL, + updated_at TIMESTAMP DEFAULT SYSTIMESTAMP NOT NULL +); +CREATE INDEX ki_user_idx ON knowledge_items(user_id); +CREATE INDEX ki_status_idx ON knowledge_items(status); +CREATE INDEX ki_type_idx ON knowledge_items(user_id, type); +``` + +### CHUNKS + +```sql +-- embedding dimensions match OCI GenAI embed-multilingual-v3 (1024) +CREATE TABLE chunks ( + id RAW(16) DEFAULT SYS_GUID() PRIMARY KEY, + knowledge_item_id RAW(16) NOT NULL REFERENCES knowledge_items(id) ON DELETE CASCADE, + chunk_index NUMBER NOT NULL, + content CLOB NOT NULL, + embedding VECTOR(1024, FLOAT32), + token_count NUMBER, + created_at TIMESTAMP DEFAULT SYSTIMESTAMP NOT NULL +); +CREATE INDEX chunk_ki_idx ON chunks(knowledge_item_id); + +-- HNSW approximate nearest-neighbour vector index +CREATE VECTOR INDEX chunk_vec_idx ON chunks(embedding) + ORGANIZATION INMEMORY NEIGHBOR GRAPH + DISTANCE COSINE + WITH TARGET ACCURACY 95; +``` + +### STUDY_CARDS + +```sql +CREATE TABLE study_cards ( + id RAW(16) DEFAULT SYS_GUID() PRIMARY KEY, + knowledge_item_id RAW(16) NOT NULL REFERENCES knowledge_items(id) ON DELETE CASCADE, + user_id RAW(16) NOT NULL REFERENCES users(id) ON DELETE CASCADE, + question VARCHAR2(2000) NOT NULL, + answer CLOB NOT NULL, + ease_factor NUMBER(4,2) DEFAULT 2.5, -- SM-2 algorithm + interval_days NUMBER DEFAULT 1, + next_review_at TIMESTAMP DEFAULT SYSTIMESTAMP, + review_count NUMBER DEFAULT 0, + created_at TIMESTAMP DEFAULT SYSTIMESTAMP NOT NULL +); +CREATE INDEX sc_user_review_idx ON study_cards(user_id, next_review_at); +``` + +### CHAT_SESSIONS & MESSAGES + +```sql +CREATE TABLE chat_sessions ( + id RAW(16) DEFAULT SYS_GUID() PRIMARY KEY, + user_id RAW(16) NOT NULL REFERENCES users(id) ON DELETE CASCADE, + title VARCHAR2(500), + created_at TIMESTAMP DEFAULT SYSTIMESTAMP NOT NULL, + last_message_at TIMESTAMP DEFAULT SYSTIMESTAMP +); + +-- role: USER | ASSISTANT | SYSTEM +CREATE TABLE messages ( + id RAW(16) DEFAULT SYS_GUID() PRIMARY KEY, + session_id RAW(16) NOT NULL REFERENCES chat_sessions(id) ON DELETE CASCADE, + role VARCHAR2(20) NOT NULL, + content CLOB NOT NULL, + source_chunks CLOB CHECK (source_chunks IS JSON), -- [{chunk_id, score, snippet}] + tokens_used NUMBER, + created_at TIMESTAMP DEFAULT SYSTIMESTAMP NOT NULL +); +CREATE INDEX msg_session_idx ON messages(session_id, created_at); +``` + +### TODOS + +```sql +-- status: TODO | IN_PROGRESS | DONE | CANCELLED +-- priority: LOW | MEDIUM | HIGH | URGENT +CREATE TABLE todos ( + id RAW(16) DEFAULT SYS_GUID() PRIMARY KEY, + user_id RAW(16) NOT NULL REFERENCES users(id) ON DELETE CASCADE, + parent_id RAW(16) REFERENCES todos(id) ON DELETE CASCADE, -- null = root task + title VARCHAR2(1000) NOT NULL, + description CLOB, + status VARCHAR2(20) DEFAULT 'TODO', + priority VARCHAR2(10) DEFAULT 'MEDIUM', + due_at TIMESTAMP, + completed_at TIMESTAMP, + created_at TIMESTAMP DEFAULT SYSTIMESTAMP NOT NULL +); +CREATE INDEX todo_user_status_idx ON todos(user_id, status); +CREATE INDEX todo_parent_idx ON todos(parent_id); +``` + +### HABITS & HABIT_LOGS + +```sql +-- habit_type: BUILD | STOP +-- frequency: DAILY | WEEKLY +-- target_days: "DAILY" or comma-separated "MON,WED,FRI" +CREATE TABLE habits ( + id RAW(16) DEFAULT SYS_GUID() PRIMARY KEY, + user_id RAW(16) NOT NULL REFERENCES users(id) ON DELETE CASCADE, + title VARCHAR2(500) NOT NULL, + habit_type VARCHAR2(10) DEFAULT 'BUILD', + frequency VARCHAR2(10) DEFAULT 'DAILY', + target_days VARCHAR2(100) DEFAULT 'DAILY', + streak_current NUMBER DEFAULT 0, + streak_best NUMBER DEFAULT 0, + is_active NUMBER(1) DEFAULT 1, + created_at TIMESTAMP DEFAULT SYSTIMESTAMP NOT NULL +); + +CREATE TABLE habit_logs ( + id RAW(16) DEFAULT SYS_GUID() PRIMARY KEY, + habit_id RAW(16) NOT NULL REFERENCES habits(id) ON DELETE CASCADE, + log_date DATE NOT NULL, + checked_in NUMBER(1) DEFAULT 1, + note VARCHAR2(1000), + created_at TIMESTAMP DEFAULT SYSTIMESTAMP NOT NULL, + CONSTRAINT habit_log_unique UNIQUE (habit_id, log_date) +); +CREATE INDEX hl_habit_date_idx ON habit_logs(habit_id, log_date); +``` + +### TAGS & KNOWLEDGE_TAGS + +```sql +CREATE TABLE tags ( + id RAW(16) DEFAULT SYS_GUID() PRIMARY KEY, + user_id RAW(16) NOT NULL REFERENCES users(id) ON DELETE CASCADE, + name VARCHAR2(100) NOT NULL, + color VARCHAR2(7) DEFAULT '#6366F1', + CONSTRAINT tag_user_name_unique UNIQUE (user_id, name) +); + +CREATE TABLE knowledge_tags ( + knowledge_item_id RAW(16) NOT NULL REFERENCES knowledge_items(id) ON DELETE CASCADE, + tag_id RAW(16) NOT NULL REFERENCES tags(id) ON DELETE CASCADE, + PRIMARY KEY (knowledge_item_id, tag_id) +); +``` + +--- + +## 4. Backend — Spring Boot 3 + +### Tech Stack + +| Dependency | Purpose | +|-----------|---------| +| spring-boot-starter-web | REST controllers | +| spring-boot-starter-security | JWT filter chain | +| spring-boot-starter-data-jpa | JPA / Hibernate ORM | +| ojdbc11 + ucp | Oracle JDBC driver + connection pool | +| flyway-core | Schema migrations | +| jjwt-api + jjwt-impl | JWT issue & validation | +| google-api-client | Google ID token verification | +| jsoup | HTML web crawler / text extraction | +| oci-java-sdk-generativeaiinference | OCI GenAI embeddings + chat | +| spring-boot-starter-data-redis | Session cache, rate limiting | +| spring-boot-starter-validation | Request validation | + +### Project Structure + +``` +sundol-backend/ +├── src/main/java/com/sundol/ +│ ├── config/ +│ │ ├── SecurityConfig.java # Spring Security + JWT filter chain +│ │ ├── OciGenAiConfig.java # OCI SDK client bean +│ │ └── AsyncConfig.java # @Async thread pool config +│ ├── auth/ +│ │ ├── AuthController.java # POST /api/auth/google, /refresh, /logout +│ │ ├── AuthService.java +│ │ ├── JwtService.java # issue / validate JWT +│ │ ├── JwtAuthFilter.java # OncePerRequestFilter +│ │ └── GoogleTokenVerifier.java # verify Google ID token via Google API +│ ├── knowledge/ +│ │ ├── KnowledgeController.java # CRUD /api/knowledge +│ │ ├── KnowledgeService.java +│ │ ├── IngestService.java # async orchestrator +│ │ ├── extractor/ +│ │ │ ├── YoutubeExtractor.java # fetch transcript via YouTube Data API v3 +│ │ │ └── WebCrawler.java # Jsoup: URL → clean text +│ │ └── ChunkingService.java # sliding window text chunker +│ ├── embedding/ +│ │ └── OciEmbeddingService.java # call OCI GenAI embed endpoint +│ ├── search/ +│ │ └── SearchService.java # VECTOR_DISTANCE query + keyword fallback +│ ├── chat/ +│ │ ├── ChatController.java # /api/chat/sessions, /messages +│ │ ├── ChatService.java # RAG orchestration +│ │ └── OciChatService.java # OCI GenAI chat completion +│ ├── studycard/ +│ │ ├── StudyCardController.java # /api/study-cards +│ │ └── StudyCardService.java # SM-2 scheduler + AI generation +│ ├── todo/ +│ │ ├── TodoController.java # /api/todos +│ │ └── TodoService.java +│ ├── habit/ +│ │ ├── HabitController.java # /api/habits +│ │ ├── HabitService.java +│ │ └── HabitStreakScheduler.java # @Scheduled nightly streak job +│ └── tag/ +│ ├── TagController.java # /api/tags +│ └── TagService.java +└── src/main/resources/ + ├── application.yml + └── db/migration/ + ├── V1__create_users.sql + ├── V2__create_knowledge.sql + ├── V3__create_chunks_vector.sql + ├── V4__create_chat.sql + ├── V5__create_todos.sql + ├── V6__create_habits.sql + └── V7__create_tags.sql +``` + +### application.yml + +```yaml +spring: + datasource: + url: jdbc:oracle:thin:@${DB_HOST}:1521/${DB_SERVICE} + username: ${DB_USER} + password: ${DB_PASSWORD} + driver-class-name: oracle.jdbc.OracleDriver + jpa: + hibernate: + ddl-auto: validate # Flyway owns the schema, Hibernate only validates + show-sql: false + properties: + hibernate.dialect: org.hibernate.dialect.OracleDialect + flyway: + enabled: true + locations: classpath:db/migration + data: + redis: + host: ${REDIS_HOST:localhost} + port: ${REDIS_PORT:6379} + +app: + jwt: + secret: ${JWT_SECRET} # min 256-bit hex string + access-expiry-minutes: 15 + refresh-expiry-days: 30 + google: + client-id: ${GOOGLE_CLIENT_ID} + oci: + compartment-id: ${OCI_COMPARTMENT_ID} + region: ${OCI_REGION} # e.g. us-chicago-1 + embedding-model-id: cohere.embed-multilingual-v3 # 1024 dims + chat-model-id: cohere.command-r-plus # or meta.llama-3-70b-instruct + chunking: + chunk-size-tokens: 512 + chunk-overlap-tokens: 64 + rag: + top-k: 5 # chunks to retrieve per query + min-score: 0.70 # cosine similarity threshold +``` + +### SecurityConfig — JWT Filter Chain + +```java +@Configuration +@EnableWebSecurity +public class SecurityConfig { + + @Bean + public SecurityFilterChain filterChain(HttpSecurity http, JwtAuthFilter jwtAuthFilter) throws Exception { + http + .csrf(AbstractHttpConfigurer::disable) + .sessionManagement(s -> s.sessionCreationPolicy(STATELESS)) + .authorizeHttpRequests(auth -> auth + .requestMatchers("/api/auth/**", "/actuator/health").permitAll() + .anyRequest().authenticated() + ) + .addFilterBefore(jwtAuthFilter, UsernamePasswordAuthenticationFilter.class); + return http.build(); + } +} +``` + +### JwtAuthFilter skeleton + +```java +@Component +public class JwtAuthFilter extends OncePerRequestFilter { + @Override + protected void doFilterInternal(HttpServletRequest req, HttpServletResponse res, + FilterChain chain) throws ServletException, IOException { + String header = req.getHeader("Authorization"); + if (header != null && header.startsWith("Bearer ")) { + String token = header.substring(7); + if (jwtService.isValid(token)) { + String userId = jwtService.extractUserId(token); + // load UserDetails, set SecurityContextHolder authentication + } + } + chain.doFilter(req, res); + } +} +``` + +### IngestService — Async Pipeline + +```java +@Service +public class IngestService { + + @Async + public void ingest(KnowledgeItem item) { + try { + // 1. Mark EXTRACTING + updateStatus(item, "EXTRACTING"); + String text = switch (item.getType()) { + case "YOUTUBE" -> youtubeExtractor.extract(item.getSourceUrl()); + case "BLOG", "NEWS", "URL" -> webCrawler.crawl(item.getSourceUrl()); + case "TEXT" -> item.getRawContent(); + default -> throw new IllegalArgumentException("Unknown type: " + item.getType()); + }; + item.setExtractedText(text); + + // 2. Mark CHUNKING + updateStatus(item, "CHUNKING"); + List chunks = chunkingService.chunk(text); + + // 3. Mark EMBEDDING + updateStatus(item, "EMBEDDING"); + for (int i = 0; i < chunks.size(); i++) { + float[] embedding = ociEmbeddingService.embed(chunks.get(i)); + chunkRepository.save(new Chunk(item.getId(), i, chunks.get(i), embedding)); + } + + // 4. Optionally auto-generate study cards via OCI GenAI + studyCardService.generateFromItem(item); + + updateStatus(item, "READY"); + } catch (Exception e) { + updateStatus(item, "FAILED"); + } + } +} +``` + +### SearchService — Vector Query + +```java +@Repository +public interface ChunkRepository extends JpaRepository { + + // Native Oracle VECTOR_DISTANCE query + @Query(value = """ + SELECT c.* FROM chunks c + JOIN knowledge_items ki ON c.knowledge_item_id = ki.id + WHERE ki.user_id = :userId + AND VECTOR_DISTANCE(c.embedding, :queryVec, COSINE) < :threshold + ORDER BY VECTOR_DISTANCE(c.embedding, :queryVec, COSINE) + FETCH FIRST :topK ROWS ONLY + """, nativeQuery = true) + List findSimilar( + @Param("userId") byte[] userId, + @Param("queryVec") float[] queryVec, + @Param("threshold") double threshold, + @Param("topK") int topK + ); +} +``` + +### HabitStreakScheduler + +```java +@Component +public class HabitStreakScheduler { + + // Runs every night at 00:05 + @Scheduled(cron = "0 5 0 * * *") + public void recalculateStreaks() { + List activeHabits = habitRepository.findByIsActive(true); + LocalDate yesterday = LocalDate.now().minusDays(1); + for (Habit habit : activeHabits) { + boolean checkedIn = habitLogRepository + .existsByHabitIdAndLogDate(habit.getId(), yesterday); + if (checkedIn) { + habit.setStreakCurrent(habit.getStreakCurrent() + 1); + habit.setStreakBest(Math.max(habit.getStreakBest(), habit.getStreakCurrent())); + } else { + habit.setStreakCurrent(0); // streak broken + } + habitRepository.save(habit); + } + } +} +``` + +--- + +## 5. AI Layer — OCI Generative AI + +### OCI GenAI Services Used + +| Purpose | Model | Notes | +|---------|-------|-------| +| Text embeddings | cohere.embed-multilingual-v3 | 1024-dim, multilingual, batch up to 96 texts | +| Chat completion | cohere.command-r-plus | Strong RAG / grounding support | +| Chat completion (alt) | meta.llama-3-70b-instruct | Alternative if Cohere unavailable | + +### OciEmbeddingService + +```java +@Service +public class OciEmbeddingService { + + private final GenerativeAiInferenceClient client; + + public float[] embed(String text) { + EmbedTextDetails details = EmbedTextDetails.builder() + .inputs(List.of(text)) + .servingMode(OnDemandServingMode.builder() + .modelId(embeddingModelId) + .build()) + .compartmentId(compartmentId) + .inputType(EmbedTextDetails.InputType.SearchDocument) + .build(); + + EmbedTextResponse response = client.embedText(EmbedTextRequest.builder() + .embedTextDetails(details) + .build()); + + List vec = response.getEmbedTextResult().getEmbeddings().get(0); + float[] result = new float[vec.size()]; + for (int i = 0; i < vec.size(); i++) result[i] = vec.get(i).floatValue(); + return result; + } +} +``` + +### RAG Prompt Template + +``` +You are Sundol, your personal secretary dog. Answer the user's question using ONLY +the context provided below. If the context does not contain enough information, +say so clearly. Always cite the source by referencing [Source N]. + +CONTEXT: +[Source 1] (from: {title_1}) +{chunk_content_1} + +[Source 2] (from: {title_2}) +{chunk_content_2} + +... (up to top-K chunks) + +CHAT HISTORY (last 5 turns): +{chat_history} + +USER QUESTION: +{user_query} + +ANSWER: +``` + +### YouTube Transcript Extraction + +Use the YouTube Data API v3 captions endpoint, or as a fallback use the youtube-transcript-api compatible approach: + +```java +// Option A: youtube-transcript-api (Python microservice or direct HTTP) +// GET https://www.youtube.com/watch?v={videoId} +// Parse timedtext XML from: https://www.youtube.com/api/timedtext?lang=en&v={videoId} + +// Option B: YouTube Data API v3 captions.list +// Requires OAuth if captions are private; works for auto-generated captions +String transcriptUrl = "https://www.googleapis.com/youtube/v3/captions" + + "?part=snippet&videoId=" + videoId + + "&key=" + youtubeApiKey; +``` + +### Web Crawler (Jsoup) + +```java +@Service +public class WebCrawler { + public String crawl(String url) throws IOException { + Document doc = Jsoup.connect(url) + .userAgent("Mozilla/5.0 (compatible; SUNDOL-bot/1.0)") + .timeout(10_000) + .get(); + // Remove nav, footer, ads + doc.select("nav, footer, header, script, style, .ad, #cookie-banner").remove(); + // Prefer article body + Element article = doc.selectFirst("article, main, .post-content, .article-body"); + return (article != null ? article : doc.body()).text(); + } +} +``` + +### Text Chunker — Sliding Window + +```java +@Service +public class ChunkingService { + + // Simple word-count approximation (1 token ≈ 0.75 words) + public List chunk(String text) { + String[] words = text.split("\\s+"); + int chunkWords = (int)(chunkSizeTokens * 0.75); + int overlapWords = (int)(chunkOverlapTokens * 0.75); + List chunks = new ArrayList<>(); + int i = 0; + while (i < words.length) { + int end = Math.min(i + chunkWords, words.length); + chunks.add(String.join(" ", Arrays.copyOfRange(words, i, end))); + i += chunkWords - overlapWords; + } + return chunks; + } +} +``` + +--- + +## 6. Frontend — Next.js 14 + +### Tech Stack + +| Package | Purpose | +|---------|---------| +| next 14 (App Router) | Framework, SSR/CSR | +| typescript | Type safety | +| tailwindcss | Styling | +| next-auth v5 | Google OAuth + session management | +| @tanstack/react-query | Server state, caching, mutations | +| axios | HTTP client with JWT interceptor | +| zustand | Lightweight client state | +| react-markdown | Render AI responses with markdown | +| @radix-ui/react-* | Accessible UI primitives | + +### Project Structure + +``` +sundol-frontend/ +├── app/ +│ ├── (auth)/ +│ │ └── login/page.tsx # Google sign-in button +│ ├── (app)/ +│ │ ├── layout.tsx # Sidebar + auth guard +│ │ ├── dashboard/page.tsx # Overview: recent items, due cards, todos +│ │ ├── knowledge/ +│ │ │ ├── page.tsx # Knowledge list with search +│ │ │ ├── [id]/page.tsx # Item detail + chunks viewer +│ │ │ └── add/page.tsx # Add form: URL / text / YouTube +│ │ ├── chat/ +│ │ │ ├── page.tsx # Session list +│ │ │ └── [sessionId]/page.tsx # Chat interface with citations +│ │ ├── study/page.tsx # SRS flashcard review +│ │ ├── todos/page.tsx # Todo list with subtasks +│ │ ├── habits/page.tsx # Habit grid + check-in +│ │ └── settings/page.tsx +│ ├── api/auth/[...nextauth]/route.ts # next-auth handler +│ └── layout.tsx +├── components/ +│ ├── knowledge/ +│ │ ├── AddKnowledgeForm.tsx # Detect URL type, show appropriate fields +│ │ ├── KnowledgeCard.tsx +│ │ └── StatusBadge.tsx # PENDING → READY pipeline status +│ ├── chat/ +│ │ ├── ChatWindow.tsx +│ │ ├── MessageBubble.tsx # Renders markdown, shows citations +│ │ └── SourceCitation.tsx +│ ├── habits/ +│ │ ├── HabitCard.tsx +│ │ └── StreakBadge.tsx +│ ├── study/ +│ │ └── FlashCard.tsx # Flip animation, SM-2 rating buttons +│ └── ui/ # Shared: Button, Input, Modal, etc. +├── lib/ +│ ├── api.ts # Axios instance with JWT interceptor +│ ├── auth.ts # next-auth config +│ └── utils.ts +└── types/ + └── index.ts # Shared TypeScript interfaces +``` + +### Axios JWT Interceptor + +```typescript +// lib/api.ts +import axios from 'axios'; +import { getSession } from 'next-auth/react'; + +const api = axios.create({ baseURL: process.env.NEXT_PUBLIC_API_URL }); + +api.interceptors.request.use(async (config) => { + const session = await getSession(); + if (session?.accessToken) { + config.headers.Authorization = `Bearer ${session.accessToken}`; + } + return config; +}); + +// Auto-refresh on 401 +api.interceptors.response.use( + (res) => res, + async (error) => { + if (error.response?.status === 401) { + // Trigger token refresh via next-auth + window.location.href = '/api/auth/signin'; + } + return Promise.reject(error); + } +); + +export default api; +``` + +### Knowledge Ingest Flow (Frontend) + +1. User pastes URL or text in `AddKnowledgeForm` +2. Frontend detects type (YouTube URL regex, plain URL, raw text) +3. `POST /api/knowledge/ingest` → receives `202 Accepted` with `item.id` +4. Frontend polls `GET /api/knowledge/{id}` every 3s, shows status badge: `PENDING → EXTRACTING → CHUNKING → EMBEDDING → READY` +5. On `READY`, navigates to item detail page + +--- + +## 7. Security — Google SSO + JWT + +### Flow + +``` +1. User clicks "Sign in with Google" +2. Next.js (next-auth) redirects → Google OAuth consent +3. Google returns authorization code → next-auth exchanges for ID token +4. next-auth calls POST /api/auth/google with { idToken } +5. Spring: + a. Verifies ID token via Google API (audience = GOOGLE_CLIENT_ID) + b. Extracts { sub, email, name, picture } + c. Upserts user in DB (insert if new, update avatar if existing) + d. Issues: accessToken (JWT, 15min) + refreshToken (JWT, 30d) + e. Stores hashed refreshToken in users.refresh_token +6. next-auth stores tokens in encrypted session cookie (httpOnly) +7. All subsequent API calls send Authorization: Bearer +8. On expiry, next-auth calls POST /api/auth/refresh with { refreshToken } +9. Spring validates refresh token hash, issues new token pair (rotation) +``` + +### JWT Claims + +```json +{ + "sub": "user-uuid", + "email": "user@example.com", + "type": "ACCESS", + "iat": 1711800000, + "exp": 1711800900 +} +``` + +### next-auth Config Sketch + +```typescript +// lib/auth.ts +export const authOptions: NextAuthOptions = { + providers: [ + GoogleProvider({ + clientId: process.env.GOOGLE_CLIENT_ID!, + clientSecret: process.env.GOOGLE_CLIENT_SECRET!, + }), + ], + callbacks: { + async signIn({ account }) { + // Exchange Google ID token for our own JWT pair + const res = await fetch(`${process.env.API_URL}/api/auth/google`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ idToken: account?.id_token }), + }); + const data = await res.json(); + account!.accessToken = data.accessToken; + account!.refreshToken = data.refreshToken; + return res.ok; + }, + async jwt({ token, account }) { + if (account) { + token.accessToken = account.accessToken; + token.refreshToken = account.refreshToken; + } + return token; + }, + async session({ session, token }) { + session.accessToken = token.accessToken as string; + return session; + }, + }, +}; +``` + +--- + +## 8. REST API Reference + +All endpoints require `Authorization: Bearer ` unless marked **public**. + +### Auth + +| Method | Path | Auth | Description | +|--------|------|------|-------------| +| POST | /api/auth/google | public | Exchange Google ID token → JWT pair | +| POST | /api/auth/refresh | public | Refresh access token | +| POST | /api/auth/logout | yes | Invalidate refresh token | + +### Knowledge + +| Method | Path | Description | +|--------|------|-------------| +| GET | /api/knowledge | List items (filter: type, status, tag, search) | +| POST | /api/knowledge/ingest | Submit new item for ingestion → 202 | +| GET | /api/knowledge/{id} | Get item detail + status | +| PATCH | /api/knowledge/{id} | Update title / tags | +| DELETE | /api/knowledge/{id} | Delete item + all chunks | +| GET | /api/knowledge/{id}/chunks | List chunks for an item | + +### Search + +| Method | Path | Description | +|--------|------|-------------| +| GET | /api/search?q={query}&topK=5 | Semantic search across user's chunks | + +### Chat + +| Method | Path | Description | +|--------|------|-------------| +| GET | /api/chat/sessions | List chat sessions | +| POST | /api/chat/sessions | Create new session | +| GET | /api/chat/sessions/{id}/messages | Get message history | +| POST | /api/chat/sessions/{id}/messages | Send message → RAG response | +| DELETE | /api/chat/sessions/{id} | Delete session | + +### Study Cards + +| Method | Path | Description | +|--------|------|-------------| +| GET | /api/study-cards/due | Cards due for review today | +| GET | /api/study-cards?knowledgeItemId={id} | Cards for a specific item | +| POST | /api/study-cards/generate/{knowledgeItemId} | Trigger AI generation | +| POST | /api/study-cards/{id}/review | Submit review result {rating: 1-5} | + +### Todos + +| Method | Path | Description | +|--------|------|-------------| +| GET | /api/todos | List (filter: status, priority, dueDate) | +| POST | /api/todos | Create todo | +| PATCH | /api/todos/{id} | Update fields | +| DELETE | /api/todos/{id} | Delete (cascades subtasks) | +| GET | /api/todos/{id}/subtasks | List subtasks | + +### Habits + +| Method | Path | Description | +|--------|------|-------------| +| GET | /api/habits | List habits | +| POST | /api/habits | Create habit | +| PATCH | /api/habits/{id} | Update habit | +| DELETE | /api/habits/{id} | Delete habit | +| POST | /api/habits/{id}/checkin | Check in for today {note?: string} | +| GET | /api/habits/{id}/logs | Log history (filter: from, to dates) | + +### Tags + +| Method | Path | Description | +|--------|------|-------------| +| GET | /api/tags | List user's tags | +| POST | /api/tags | Create tag | +| PATCH | /api/tags/{id} | Rename / recolor | +| DELETE | /api/tags/{id} | Delete tag (removes from items) | + +--- + +## 9. Key Algorithms + +### SM-2 Spaced Repetition (Study Cards) + +``` +Input: ease_factor (EF), interval, rating (1–5 from user) + +if rating >= 3 (pass): + if interval == 0: new_interval = 1 + elif interval == 1: new_interval = 6 + else: new_interval = round(interval * EF) + new_EF = EF + (0.1 - (5 - rating) * (0.08 + (5 - rating) * 0.02)) + new_EF = max(1.3, new_EF) +else (fail, rating < 3): + new_interval = 1 # reset + new_EF = max(1.3, EF - 0.2) + +next_review_at = now + new_interval days +``` + +### RAG Pipeline (ChatService) + +``` +1. embed(user_query) → queryVector [1024 floats] +2. searchService.findSimilar(userId, queryVector, topK=5, minScore=0.70) +3. Build prompt: + - System: SUNDOL persona + citation instructions + - Context: top-K chunks with source titles + - History: last 5 messages from session + - User: current query +4. ociChatService.complete(prompt) → response text +5. Parse citations from response +6. Save message to DB with source_chunks JSON +7. Return { content, sourceCitations[], tokensUsed } +``` + +### Habit Streak Logic (Nightly Job) + +``` +for each active habit: + yesterday = today - 1 day + if habit.target_days == "DAILY" or yesterday.dayOfWeek in habit.target_days: + if habit_log exists for yesterday AND checked_in = true: + streak_current += 1 + streak_best = max(streak_best, streak_current) + else: + streak_current = 0 // broken + // days not in target_days are ignored (streak preserved) +``` + +--- + +## 10. Environment Variables + +### Backend (.env / application.yml) + +``` +# Database +DB_HOST=localhost +DB_SERVICE=XEPDB1 +DB_USER=sundol +DB_PASSWORD=yourpassword + +# JWT +JWT_SECRET=your-256-bit-hex-secret-at-least-32-chars + +# Google OAuth +GOOGLE_CLIENT_ID=xxx.apps.googleusercontent.com + +# OCI GenAI +OCI_COMPARTMENT_ID=ocid1.compartment.oc1..xxx +OCI_REGION=us-chicago-1 +# OCI credentials via ~/.oci/config or Instance Principal + +# YouTube (optional, for private caption access) +YOUTUBE_API_KEY=AIza... + +# Redis +REDIS_HOST=localhost +REDIS_PORT=6379 +``` + +### Frontend (.env.local) + +``` +NEXT_PUBLIC_API_URL=http://localhost:8080 +NEXTAUTH_URL=http://localhost:3000 +NEXTAUTH_SECRET=your-nextauth-secret + +GOOGLE_CLIENT_ID=xxx.apps.googleusercontent.com +GOOGLE_CLIENT_SECRET=your-google-secret + +API_URL=http://localhost:8080 # server-side fetch +``` + +--- + +## 11. Project Structure (Monorepo) + +``` +sundol/ +├── sundol-backend/ # Spring Boot 3 (Java 21) +│ ├── pom.xml +│ └── src/ +├── sundol-frontend/ # Next.js 14 (TypeScript) +│ ├── package.json +│ └── app/ +├── db/ +│ └── migration/ # Flyway SQL scripts (V1–V7) +├── docs/ +│ └── SUNDOL_SPEC.md # ← this file +├── docker-compose.yml # Oracle 23ai + Redis + backend + frontend +└── README.md +``` + +### docker-compose.yml (development) + +```yaml +version: '3.9' +services: + oracle: + image: gvenzl/oracle-free:23-slim + environment: + ORACLE_PASSWORD: sundolpass + APP_USER: sundol + APP_USER_PASSWORD: sundol123 + ports: + - "1521:1521" + volumes: + - oracle_data:/opt/oracle/oradata + + redis: + image: redis:7-alpine + ports: + - "6379:6379" + + backend: + build: ./sundol-backend + ports: + - "8080:8080" + environment: + DB_HOST: oracle + DB_SERVICE: FREEPDB1 + DB_USER: sundol + DB_PASSWORD: sundol123 + JWT_SECRET: ${JWT_SECRET} + GOOGLE_CLIENT_ID: ${GOOGLE_CLIENT_ID} + OCI_COMPARTMENT_ID: ${OCI_COMPARTMENT_ID} + OCI_REGION: ${OCI_REGION} + REDIS_HOST: redis + volumes: + - ~/.oci:/root/.oci:ro # OCI credentials + depends_on: + - oracle + - redis + + frontend: + build: ./sundol-frontend + ports: + - "3000:3000" + environment: + NEXT_PUBLIC_API_URL: http://localhost:8080 + API_URL: http://backend:8080 + NEXTAUTH_URL: http://localhost:3000 + NEXTAUTH_SECRET: ${NEXTAUTH_SECRET} + GOOGLE_CLIENT_ID: ${GOOGLE_CLIENT_ID} + GOOGLE_CLIENT_SECRET: ${GOOGLE_CLIENT_SECRET} + depends_on: + - backend + +volumes: + oracle_data: +``` + +--- + +## Implementation Order (Recommended for Claude Code) + +1. **DB migrations** — run all V1–V7 Flyway scripts, verify Oracle VECTOR index created +2. **Backend: Auth** — Google SSO flow, JWT issue/validate, filter chain +3. **Backend: Knowledge CRUD** — ingest endpoint (202), status polling +4. **Backend: Ingest pipeline** — YoutubeExtractor, WebCrawler, ChunkingService +5. **Backend: OCI Embedding** — embed chunks, store VECTOR in Oracle +6. **Backend: Search** — VECTOR_DISTANCE query, SearchService +7. **Backend: Chat / RAG** — ChatService, OciChatService, prompt builder +8. **Backend: Todo + Habit + Tag** — straightforward CRUD + streak scheduler +9. **Backend: Study Cards** — AI generation + SM-2 review endpoint +10. **Frontend: Auth** — next-auth Google provider, session, axios interceptor +11. **Frontend: Knowledge** — add form, status polling, list view +12. **Frontend: Chat** — session list, chat window, citation rendering +13. **Frontend: Todos + Habits** — list views, check-in, streak display +14. **Frontend: Study Cards** — flip card UI, SM-2 rating buttons +15. **Docker Compose** — wire all services, test end-to-end