feat: initial knowledge-inbox pipeline implementation

- Oracle ADB queue table (sql/schema.sql)
- Queue CRUD: core/queue_db.py
- YouTube transcript: core/youtube.py
- Web page fetch: core/web.py
- LLM enrichment via OCI GenAI Gemini Flash: core/enricher.py
- Text chunker: core/chunker.py
- Obsidian note writer: core/obsidian.py
- Oracle vector store insertion: core/vector.py
- Polling daemon: daemon/worker.py
- Telegram bot: bot/telegram_bot.py
- Main runner: main.py
This commit is contained in:
joungmin
2026-02-28 08:16:11 +09:00
commit 86a4104ae3
18 changed files with 926 additions and 0 deletions

28
core/chunker.py Normal file
View File

@@ -0,0 +1,28 @@
"""Simple sliding-window text chunking."""
def chunk_text(text: str, size: int = 2000, overlap: int = 200) -> list[str]:
"""Split text into overlapping chunks.
Args:
text: The full text to split.
size: Maximum characters per chunk.
overlap: Characters of overlap between consecutive chunks.
Returns:
List of text chunks. Returns single-item list for short text.
"""
if len(text) <= size:
return [text]
chunks: list[str] = []
step = size - overlap
start = 0
while start < len(text):
end = start + size
chunks.append(text[start:end])
if end >= len(text):
break
start += step
return chunks