204 lines
5.8 KiB
Python
Executable File
204 lines
5.8 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Oracle RAG CLI - Ultra lightweight RAG query tool
|
|
Usage: python rag_cli.py "your question here"
|
|
"""
|
|
|
|
import sys
|
|
import os
|
|
|
|
# Try to import oracledb, use placeholder if not available
|
|
try:
|
|
import oracledb
|
|
ORACLE_AVAILABLE = True
|
|
except ImportError:
|
|
ORACLE_AVAILABLE = False
|
|
print("⚠️ oracledb not installed. Run: pip install oracledb")
|
|
|
|
# Oracle connection config (for when oracledb is available)
|
|
ORACLE_CONFIG = {
|
|
"user": "admin",
|
|
"password": "Carter55@26@1",
|
|
"dsn": "h8i4i0g8cxtd2lpf_high",
|
|
"wallet_location": "/Users/joungmin/devkit/db_conn/Wallet_H8I4I0G8CXTD2LPF"
|
|
}
|
|
|
|
|
|
def get_connection():
|
|
"""Get Oracle connection"""
|
|
if not ORACLE_AVAILABLE:
|
|
return None
|
|
|
|
try:
|
|
os.environ['TNS_ADMIN'] = ORACLE_CONFIG['wallet_location']
|
|
return oracledb.connect(
|
|
user=ORACLE_CONFIG['user'],
|
|
password=ORACLE_CONFIG['password'],
|
|
dsn=ORACLE_CONFIG['dsn'],
|
|
wallet_location=ORACLE_CONFIG['wallet_location']
|
|
)
|
|
except Exception as e:
|
|
print(f"❌ Oracle connection failed: {e}")
|
|
return None
|
|
|
|
|
|
def check_rag_procedures(cursor):
|
|
"""Check which RAG procedures exist"""
|
|
cursor.execute("""
|
|
SELECT object_name, object_type
|
|
FROM user_objects
|
|
WHERE object_name LIKE '%RAG%' OR object_name LIKE '%EMBED%'
|
|
ORDER BY object_name
|
|
""")
|
|
results = cursor.fetchall()
|
|
return results
|
|
|
|
|
|
def rag_query(question: str, top_k: int = 5) -> str:
|
|
"""Query Oracle RAG system"""
|
|
conn = get_connection()
|
|
if not conn:
|
|
return "❌ No Oracle connection available"
|
|
|
|
cursor = conn.cursor()
|
|
|
|
try:
|
|
# Check available procedures
|
|
procedures = check_rag_procedures(cursor)
|
|
|
|
if procedures:
|
|
proc_names = [p[0] for p in procedures]
|
|
print(f"📦 Found RAG procedures: {', '.join(proc_names)}")
|
|
|
|
# Try rag_ask if exists
|
|
if 'RAG_ASK' in [p.upper() for p in proc_names]:
|
|
cursor.execute("SELECT rag_ask(:1, :2) FROM DUAL", [question, top_k])
|
|
result = cursor.fetchone()
|
|
if result and result[0]:
|
|
return result[0]
|
|
else:
|
|
print("📦 No RAG procedures found. Checking doc_chunks table...")
|
|
|
|
# Check if doc_chunks exists
|
|
cursor.execute("""
|
|
SELECT table_name FROM user_tables
|
|
WHERE table_name LIKE '%CHUNK%' OR table_name LIKE '%DOC%'
|
|
""")
|
|
tables = cursor.fetchall()
|
|
if tables:
|
|
print(f"📦 Found tables: {', '.join([t[0] for t in tables])}")
|
|
return vector_search_fallback(question, cursor, top_k)
|
|
else:
|
|
return "❌ No document tables found. Please run your ingestion pipeline first."
|
|
|
|
return "⚠️ RAG query returned no results"
|
|
|
|
except Exception as e:
|
|
return f"❌ Query failed: {e}"
|
|
finally:
|
|
cursor.close()
|
|
conn.close()
|
|
|
|
|
|
def vector_search_fallback(question: str, cursor, top_k: int = 5) -> str:
|
|
"""Direct vector search if RAG procedure not available"""
|
|
# Check if embed_vector column exists
|
|
try:
|
|
cursor.execute("""
|
|
SELECT column_name
|
|
FROM user_tab_columns
|
|
WHERE table_name = 'DOC_CHUNKS' AND column_name = 'EMBED_VECTOR'
|
|
""")
|
|
if not cursor.fetchone():
|
|
return "⚠️ doc_chunks exists but no EMBED_VECTOR column found."
|
|
|
|
# Check for data
|
|
cursor.execute("SELECT COUNT(*) FROM doc_chunks")
|
|
count = cursor.fetchone()[0]
|
|
if count == 0:
|
|
return f"⚠️ doc_chunks is empty (0 rows). Ingest documents first."
|
|
|
|
# For now, just show status
|
|
return f"""📊 doc_chunks status:
|
|
- Total chunks: {count}
|
|
- Vector search: Available (VECTOR column exists)
|
|
- RAG procedure: Not yet created
|
|
|
|
To enable RAG:
|
|
1. Create RAG procedures (see Oracle RAG Lightweight.md)
|
|
2. Or ingest documents via your pipeline"""
|
|
|
|
except Exception as e:
|
|
return f"❌ Vector search failed: {e}"
|
|
|
|
|
|
def embed_text(text: str) -> str:
|
|
"""Generate embedding using MiniMax API"""
|
|
try:
|
|
from openai import OpenAI
|
|
|
|
api_key = os.environ.get("MINIMAX_API_KEY")
|
|
if not api_key:
|
|
return None
|
|
|
|
client = OpenAI(api_key=api_key, base_url="https://api.minimax.chat/v1")
|
|
|
|
response = client.embeddings.create(
|
|
model="embo-01",
|
|
input=text
|
|
)
|
|
|
|
embedding = response.data[0].embedding
|
|
return "[" + ",".join([str(x) for x in embedding]) + "]"
|
|
|
|
except Exception as e:
|
|
print(f"⚠️ MiniMax embedding failed: {e}")
|
|
return None
|
|
|
|
|
|
def main():
|
|
print("""
|
|
🔮 Oracle RAG CLI v1.0
|
|
|
|
Usage: python rag_cli.py "your question here"
|
|
|
|
Options:
|
|
-k, --top-k N Number of results (default: 5)
|
|
-h, --help Show this help
|
|
""")
|
|
|
|
if len(sys.argv) < 2:
|
|
sys.exit(0)
|
|
|
|
# Parse arguments
|
|
question = ""
|
|
top_k = 5
|
|
|
|
i = 1
|
|
while i < len(sys.argv):
|
|
arg = sys.argv[i]
|
|
if arg in ["-k", "--top-k"] and i + 1 < len(sys.argv):
|
|
top_k = int(sys.argv[i + 1])
|
|
i += 2
|
|
elif arg in ["-h", "--help"]:
|
|
print(__doc__)
|
|
sys.exit(0)
|
|
else:
|
|
question += sys.argv[i] + " "
|
|
i += 1
|
|
|
|
question = question.strip()
|
|
|
|
if not question:
|
|
print("❌ Please provide a question")
|
|
sys.exit(1)
|
|
|
|
print(f"\n🔍 Querying Oracle RAG: \"{question[:50]}{'...' if len(question) > 50 else ''}\"\n")
|
|
|
|
result = rag_query(question, top_k)
|
|
print(result)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|