"""Polling daemon that processes knowledge_queue items.""" import logging import os import time from core.chunker import chunk_text from core.enricher import enrich from core.obsidian import save_note from core.queue_db import fetch_pending, set_done, set_error, set_processing from core.vector import save_to_vector from core.web import fetch_page_text from core.youtube import get_transcript logging.basicConfig( level=logging.INFO, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", ) logger = logging.getLogger(__name__) def process_item(item: dict) -> None: """Process a single queue item end-to-end. Args: item: Dict from fetch_pending() with keys: id, input_type, content. """ row_id = item["id"] input_type = item["input_type"] content = item["content"] set_processing(row_id) logger.info("Processing %s [%s]", row_id[:8], input_type) try: url = "" yt_title = "" if input_type == "youtube": result = get_transcript(content) text = result["text"] url = content yt_title = result["title"] elif input_type == "url": text = fetch_page_text(content) url = content else: # text text = content if not text: raise ValueError("No text content extracted") meta = enrich(input_type, yt_title, url, text) title = meta.get("title") or yt_title or url or row_id[:8] note_path = save_note( content_type=input_type, title=title, summary=meta.get("summary", ""), body=text, tags=meta.get("tags", []), source_url=url, author=meta.get("author") or "", date=meta.get("date") or "", ) logger.info("Obsidian note saved: %s", note_path) chunks = chunk_text(text) doc_id = f"{input_type}:{row_id[:8]}" inserted = save_to_vector(doc_id, chunks) logger.info("Vector store: inserted %d chunks for doc_id=%s", len(inserted), doc_id) set_done(row_id, title, meta) logger.info("Done: %s → %s", row_id[:8], title[:60]) except Exception as exc: logger.error("Error processing %s: %s", row_id[:8], exc, exc_info=True) set_error(row_id, str(exc)) def run_loop(interval: int = 30) -> None: """Poll for pending items indefinitely. Args: interval: Seconds to sleep between polling cycles. """ interval = int(os.environ.get("DAEMON_INTERVAL", interval)) logger.info("Daemon started (interval=%ds)", interval) while True: try: items = fetch_pending(limit=5) if items: logger.info("Found %d pending item(s)", len(items)) for item in items: process_item(item) else: logger.debug("No pending items") except Exception as exc: logger.error("Polling error: %s", exc, exc_info=True) time.sleep(interval) if __name__ == "__main__": from dotenv import load_dotenv load_dotenv() run_loop()