feat: add English vocab extraction and Anki card registration

- core/vocab.py: extract B1-B2 level vocabulary from English content via Gemini Flash
- core/anki.py: register vocab cards to AnkiConnect (English::Vocabulary deck)
- core/enricher.py: add language detection field + summary_ko (Korean summary)
- core/obsidian.py: render Korean + English summary in note
- daemon/worker.py: call vocab extraction and Anki registration for English content
This commit is contained in:
joungmin
2026-02-28 08:39:58 +09:00
parent 86a4104ae3
commit a9db6a8771
5 changed files with 208 additions and 8 deletions

View File

@@ -17,11 +17,13 @@ from oci.generative_ai_inference.models import (
_PROMPT = """\
You are a knowledge extraction assistant. Analyze the content below and return ONLY a valid JSON object with these fields:
- "title": concise descriptive title for this content (string)
- "summary": 3-5 sentence summary capturing key insights (string)
- "summary": 3-5 sentence summary capturing key insights, written in English (string)
- "summary_ko": the same summary translated into Korean (string)
- "tags": list of 3-7 relevant keywords or topics (string[])
- "author": author or creator name, or null if not found (string | null)
- "date": publication date in ISO 8601 format (YYYY-MM-DD), or null if not found (string | null)
- "content_type": one of "youtube", "article", "documentation", "news", "forum", "code", "other" (string)
- "language": primary language of the content, ISO 639-1 code, e.g. "en", "ko", "ja" (string)
Content type: {content_type}
Source URL: {url}
@@ -33,10 +35,10 @@ Return only the JSON object, no markdown, no explanation."""
def _get_client() -> GenerativeAiInferenceClient:
config = oci.config.from_file()
return GenerativeAiInferenceClient(
config,
service_endpoint=os.environ["OCI_GENAI_ENDPOINT"],
)
# Gemini models live in us-ashburn-1; use OCI_CHAT_ENDPOINT if set,
# otherwise fall back to OCI_GENAI_ENDPOINT.
endpoint = os.environ.get("OCI_CHAT_ENDPOINT") or os.environ["OCI_GENAI_ENDPOINT"]
return GenerativeAiInferenceClient(config, service_endpoint=endpoint)
def enrich(content_type: str, title: str, url: str, text: str) -> dict:
@@ -92,5 +94,6 @@ def enrich(content_type: str, title: str, url: str, text: str) -> dict:
metadata.setdefault("author", None)
metadata.setdefault("date", None)
metadata.setdefault("content_type", content_type)
metadata.setdefault("language", "en")
return metadata