feat: initial knowledge-inbox pipeline implementation
- Oracle ADB queue table (sql/schema.sql) - Queue CRUD: core/queue_db.py - YouTube transcript: core/youtube.py - Web page fetch: core/web.py - LLM enrichment via OCI GenAI Gemini Flash: core/enricher.py - Text chunker: core/chunker.py - Obsidian note writer: core/obsidian.py - Oracle vector store insertion: core/vector.py - Polling daemon: daemon/worker.py - Telegram bot: bot/telegram_bot.py - Main runner: main.py
This commit is contained in:
86
core/obsidian.py
Normal file
86
core/obsidian.py
Normal file
@@ -0,0 +1,86 @@
|
||||
"""Save processed knowledge items as Obsidian markdown notes."""
|
||||
|
||||
import os
|
||||
import re
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def _slugify(text: str, max_len: int = 50) -> str:
|
||||
"""Convert text to a filesystem-safe slug."""
|
||||
text = re.sub(r"[^\w\s-]", "", text, flags=re.UNICODE)
|
||||
text = re.sub(r"[\s_]+", "-", text).strip("-")
|
||||
return text[:max_len].lower()
|
||||
|
||||
|
||||
def save_note(
|
||||
content_type: str,
|
||||
title: str,
|
||||
summary: str,
|
||||
body: str,
|
||||
tags: list[str],
|
||||
source_url: str = "",
|
||||
author: str = "",
|
||||
date: str = "",
|
||||
) -> Path:
|
||||
"""Save a processed knowledge item as an Obsidian markdown file.
|
||||
|
||||
Args:
|
||||
content_type: One of 'youtube', 'url', 'text'.
|
||||
title: The note title.
|
||||
summary: LLM-generated summary.
|
||||
body: Full content text.
|
||||
tags: List of topic tags.
|
||||
source_url: Original URL (empty for plain text).
|
||||
author: Author name (may be empty).
|
||||
date: Publication date in ISO 8601 format (may be empty).
|
||||
|
||||
Returns:
|
||||
Path of the created markdown file.
|
||||
"""
|
||||
vault = os.environ.get("OBSIDIAN_VAULT", "/Users/joungmin/Documents/Obsidian Vault")
|
||||
today = datetime.now().strftime("%Y-%m-%d")
|
||||
now_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
slug = _slugify(title) or "untitled"
|
||||
|
||||
# Determine subfolder by content type
|
||||
subfolder_map = {
|
||||
"youtube": "20 Sources/YouTube",
|
||||
"url": "20 Sources/Web",
|
||||
"text": "20 Sources/Notes",
|
||||
}
|
||||
subfolder = subfolder_map.get(content_type, "20 Sources/Notes")
|
||||
note_dir = Path(vault) / subfolder
|
||||
note_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
filename = f"{today}-{slug}.md"
|
||||
note_path = note_dir / filename
|
||||
|
||||
# Build YAML frontmatter tags
|
||||
tags_yaml = ", ".join(tags) if tags else ""
|
||||
|
||||
content = f"""---
|
||||
title: {title}
|
||||
source_type: {content_type}
|
||||
url: {source_url}
|
||||
author: {author}
|
||||
date: {date}
|
||||
tags: [{tags_yaml}]
|
||||
created: {today}
|
||||
---
|
||||
|
||||
# {title}
|
||||
|
||||
## 요약
|
||||
{summary}
|
||||
|
||||
## 원문
|
||||
{body}
|
||||
|
||||
---
|
||||
*Source: {source_url}*
|
||||
*Saved: {now_str}*
|
||||
"""
|
||||
|
||||
note_path.write_text(content, encoding="utf-8")
|
||||
return note_path
|
||||
Reference in New Issue
Block a user