Fix TTS: switch to 1.7B with ref_audio, speakable text on all lines
- Use 1.7B model (0.6B had tensor mismatch with cached prompts) - Speak endpoint uses ref_audio directly (not cached pkl) as fallback - Cache voice clone prompts in memory on startup - Add SpeakableText component: 🔊 icon on each p and li element - Remove old TTSReader sequential approach - Add global exception handler to TTS server - Fix profile localStorage caching - inference_mode + bf16 optimization Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -6,6 +6,7 @@ import AuthGuard from "@/components/auth-guard";
|
||||
import NavBar from "@/components/nav-bar";
|
||||
import { useApi } from "@/lib/use-api";
|
||||
import ReactMarkdown from "react-markdown";
|
||||
import SpeakableText from "@/components/speakable-text";
|
||||
|
||||
interface Category {
|
||||
ID: string;
|
||||
@@ -314,10 +315,18 @@ export default function KnowledgeDetailPage() {
|
||||
h1: ({children}) => <h1 className="text-xl font-bold mt-6 mb-3">{children}</h1>,
|
||||
h2: ({children}) => <h2 className="text-lg font-bold mt-5 mb-2">{children}</h2>,
|
||||
h3: ({children}) => <h3 className="text-base font-bold mt-4 mb-2">{children}</h3>,
|
||||
p: ({children}) => <p className="mb-3">{children}</p>,
|
||||
p: ({children, node}) => {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const txt = node?.children?.map((c: any) => c.type === 'text' ? c.value : '').join('') || '';
|
||||
return <p className="mb-3"><SpeakableText text={txt}>{children}</SpeakableText></p>;
|
||||
},
|
||||
ul: ({children}) => <ul className="list-disc ml-5 mb-3 space-y-1">{children}</ul>,
|
||||
ol: ({children}) => <ol className="list-decimal ml-5 mb-3 space-y-1">{children}</ol>,
|
||||
li: ({children}) => <li className="leading-relaxed">{children}</li>,
|
||||
li: ({children, node}) => {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const txt = node?.children?.map((c: any) => c.type === 'text' ? c.value : '').join('') || '';
|
||||
return <li className="leading-relaxed"><SpeakableText text={txt}>{children}</SpeakableText></li>;
|
||||
},
|
||||
strong: ({children}) => <strong className="font-bold">{children}</strong>,
|
||||
blockquote: ({children}) => <blockquote className="border-l-2 border-[var(--color-primary)] pl-4 my-3 italic text-[var(--color-text-muted)]">{children}</blockquote>,
|
||||
code: ({children}) => <code className="bg-[var(--color-bg-hover)] px-1.5 py-0.5 rounded text-xs">{children}</code>,
|
||||
|
||||
@@ -6,6 +6,7 @@ import AuthGuard from "@/components/auth-guard";
|
||||
import NavBar from "@/components/nav-bar";
|
||||
import { useApi } from "@/lib/use-api";
|
||||
import ReactMarkdown from "react-markdown";
|
||||
import SpeakableText from "@/components/speakable-text";
|
||||
|
||||
interface NoteDetail {
|
||||
ID: string;
|
||||
@@ -183,10 +184,18 @@ export default function NoteDetailPage() {
|
||||
h1: ({children}) => <h1 className="text-xl font-bold mt-6 mb-3">{children}</h1>,
|
||||
h2: ({children}) => <h2 className="text-lg font-bold mt-5 mb-2">{children}</h2>,
|
||||
h3: ({children}) => <h3 className="text-base font-bold mt-4 mb-2">{children}</h3>,
|
||||
p: ({children}) => <p className="mb-3">{children}</p>,
|
||||
p: ({children, node}) => {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const txt = node?.children?.map((c: any) => c.type === 'text' ? c.value : '').join('') || '';
|
||||
return <p className="mb-3"><SpeakableText text={txt}>{children}</SpeakableText></p>;
|
||||
},
|
||||
ul: ({children}) => <ul className="list-disc ml-5 mb-3 space-y-1">{children}</ul>,
|
||||
ol: ({children}) => <ol className="list-decimal ml-5 mb-3 space-y-1">{children}</ol>,
|
||||
li: ({children}) => <li className="leading-relaxed">{children}</li>,
|
||||
li: ({children, node}) => {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const txt = node?.children?.map((c: any) => c.type === 'text' ? c.value : '').join('') || '';
|
||||
return <li className="leading-relaxed"><SpeakableText text={txt}>{children}</SpeakableText></li>;
|
||||
},
|
||||
strong: ({children}) => <strong className="font-bold">{children}</strong>,
|
||||
blockquote: ({children}) => <blockquote className="border-l-2 border-[var(--color-primary)] pl-4 my-3 italic text-[var(--color-text-muted)]">{children}</blockquote>,
|
||||
}}
|
||||
|
||||
@@ -45,8 +45,21 @@ export default function TTSPage() {
|
||||
}, []);
|
||||
|
||||
const fetchProfiles = () => {
|
||||
// 캐시 먼저
|
||||
const cached = localStorage.getItem("tts_profiles");
|
||||
if (cached) {
|
||||
try {
|
||||
const data = JSON.parse(cached);
|
||||
setProfiles(data);
|
||||
if (data.length > 0 && !selectedProfile) setSelectedProfile(data[0].id);
|
||||
} catch {}
|
||||
}
|
||||
fetch("/api/tts/profiles").then(r => r.json())
|
||||
.then(setProfiles).catch(() => {});
|
||||
.then(data => {
|
||||
setProfiles(data);
|
||||
localStorage.setItem("tts_profiles", JSON.stringify(data));
|
||||
if (data.length > 0 && !selectedProfile) setSelectedProfile(data[0].id);
|
||||
}).catch(() => {});
|
||||
};
|
||||
|
||||
const startRecording = async () => {
|
||||
@@ -97,6 +110,7 @@ export default function TTSPage() {
|
||||
setRecordedUrl(null);
|
||||
setUploadedFile(null);
|
||||
fetchProfiles();
|
||||
localStorage.removeItem("tts_profiles"); // 캐시 강제 갱신
|
||||
setSelectedProfile(result.id);
|
||||
setTab("generate");
|
||||
} catch (err) {
|
||||
@@ -125,9 +139,10 @@ export default function TTSPage() {
|
||||
fd.append("text", text);
|
||||
fd.append("profile_id", selectedProfile);
|
||||
fd.append("language", language);
|
||||
const res = await fetch("/api/tts/generate", { method: "POST", body: fd });
|
||||
const res = await fetch("/api/tts/speak", { method: "POST", body: fd });
|
||||
if (!res.ok) throw new Error(`HTTP ${res.status}`);
|
||||
const blob = await res.blob();
|
||||
if (blob.size < 100) throw new Error("Empty audio");
|
||||
setOutputUrl(URL.createObjectURL(blob));
|
||||
} catch (err) {
|
||||
setError("생성 실패: " + (err instanceof Error ? err.message : ""));
|
||||
|
||||
85
sundol-frontend/src/components/speakable-text.tsx
Normal file
85
sundol-frontend/src/components/speakable-text.tsx
Normal file
@@ -0,0 +1,85 @@
|
||||
"use client";
|
||||
|
||||
import { useState, useRef, useEffect } from "react";
|
||||
|
||||
interface SpeakableProps {
|
||||
children: React.ReactNode;
|
||||
text: string;
|
||||
}
|
||||
|
||||
let cachedProfileId: string | null = null;
|
||||
let profileChecked = false;
|
||||
|
||||
export default function SpeakableText({ children, text }: SpeakableProps) {
|
||||
const [playing, setPlaying] = useState(false);
|
||||
const [loading, setLoading] = useState(false);
|
||||
const [hasProfile, setHasProfile] = useState(false);
|
||||
const audioRef = useRef<HTMLAudioElement | null>(null);
|
||||
|
||||
useEffect(() => {
|
||||
if (profileChecked) {
|
||||
setHasProfile(!!cachedProfileId);
|
||||
return;
|
||||
}
|
||||
try {
|
||||
const profiles = JSON.parse(localStorage.getItem("tts_profiles") || "[]");
|
||||
if (profiles.length > 0) {
|
||||
cachedProfileId = profiles[0].id;
|
||||
setHasProfile(true);
|
||||
}
|
||||
profileChecked = true;
|
||||
} catch {}
|
||||
}, []);
|
||||
|
||||
const handleSpeak = async (e: React.MouseEvent) => {
|
||||
e.preventDefault();
|
||||
e.stopPropagation();
|
||||
|
||||
if (playing) {
|
||||
audioRef.current?.pause();
|
||||
setPlaying(false);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!cachedProfileId || text.length < 5) return;
|
||||
|
||||
setLoading(true);
|
||||
try {
|
||||
const fd = new FormData();
|
||||
fd.append("text", text);
|
||||
fd.append("profile_id", cachedProfileId);
|
||||
fd.append("language", "Korean");
|
||||
const res = await fetch("/api/tts/speak", { method: "POST", body: fd });
|
||||
if (!res.ok) { setLoading(false); return; }
|
||||
const blob = await res.blob();
|
||||
if (blob.size < 200) { setLoading(false); return; }
|
||||
|
||||
const url = URL.createObjectURL(blob);
|
||||
const audio = new Audio(url);
|
||||
audioRef.current = audio;
|
||||
audio.onended = () => setPlaying(false);
|
||||
setPlaying(true);
|
||||
setLoading(false);
|
||||
audio.play();
|
||||
} catch {
|
||||
setLoading(false);
|
||||
}
|
||||
};
|
||||
|
||||
if (!hasProfile || text.length < 5) return <>{children}</>;
|
||||
|
||||
return (
|
||||
<>
|
||||
{children}
|
||||
<button
|
||||
onClick={handleSpeak}
|
||||
disabled={loading}
|
||||
className="inline-flex items-center ml-1 text-[var(--color-text-muted)] hover:text-[var(--color-primary)] disabled:opacity-30 align-middle"
|
||||
title={playing ? "중지" : "읽어주기"}
|
||||
style={{ fontSize: "0.85em", verticalAlign: "middle", cursor: "pointer" }}
|
||||
>
|
||||
{loading ? "⏳" : playing ? "⏹" : "🔊"}
|
||||
</button>
|
||||
</>
|
||||
);
|
||||
}
|
||||
159
sundol-frontend/src/components/tts-reader.tsx
Normal file
159
sundol-frontend/src/components/tts-reader.tsx
Normal file
@@ -0,0 +1,159 @@
|
||||
"use client";
|
||||
|
||||
import { useState, useEffect, useRef } from "react";
|
||||
|
||||
interface TTSReaderProps {
|
||||
text: string;
|
||||
}
|
||||
|
||||
interface VoiceProfile {
|
||||
id: string;
|
||||
name: string;
|
||||
}
|
||||
|
||||
export default function TTSReader({ text }: TTSReaderProps) {
|
||||
const [profiles, setProfiles] = useState<VoiceProfile[]>([]);
|
||||
const [selectedProfile, setSelectedProfile] = useState("");
|
||||
const [generating, setGenerating] = useState(false);
|
||||
const [playing, setPlaying] = useState(false);
|
||||
const [progress, setProgress] = useState("");
|
||||
const audioRef = useRef<HTMLAudioElement | null>(null);
|
||||
const stoppedRef = useRef(false);
|
||||
const audioUrlsRef = useRef<string[]>([]);
|
||||
|
||||
useEffect(() => {
|
||||
// localStorage 캐시
|
||||
const cached = localStorage.getItem("tts_profiles");
|
||||
if (cached) {
|
||||
try {
|
||||
const data = JSON.parse(cached);
|
||||
setProfiles(data);
|
||||
if (data.length > 0) setSelectedProfile(data[0].id);
|
||||
} catch {}
|
||||
}
|
||||
// 백그라운드에서 갱신 (블록 안 됨)
|
||||
fetch("/api/tts/profiles").then(r => r.json()).then(data => {
|
||||
setProfiles(data);
|
||||
if (data.length > 0 && !selectedProfile) setSelectedProfile(data[0].id);
|
||||
localStorage.setItem("tts_profiles", JSON.stringify(data));
|
||||
}).catch(() => {});
|
||||
}, []);
|
||||
|
||||
const toSentences = (md: string): string[] => {
|
||||
return md
|
||||
.replace(/^#+\s+.*$/gm, "")
|
||||
.replace(/\*\*/g, "")
|
||||
.replace(/^[-*]\s+/gm, "")
|
||||
.replace(/^>\s+/gm, "")
|
||||
.replace(/---+/g, "")
|
||||
.replace(/\[([^\]]+)\]\([^)]+\)/g, "$1")
|
||||
.split("\n")
|
||||
.map(s => s.trim())
|
||||
.filter(s => s.length >= 10);
|
||||
};
|
||||
|
||||
// 직접 동기 호출 — 바로 wav 반환
|
||||
const speak = async (chunk: string): Promise<string | null> => {
|
||||
const fd = new FormData();
|
||||
fd.append("text", chunk);
|
||||
fd.append("profile_id", selectedProfile);
|
||||
fd.append("language", "Korean");
|
||||
const res = await fetch("/api/tts/speak", { method: "POST", body: fd });
|
||||
if (!res.ok) return null;
|
||||
const blob = await res.blob();
|
||||
return blob.size > 100 ? URL.createObjectURL(blob) : null;
|
||||
};
|
||||
|
||||
const handleGenerate = async () => {
|
||||
if (!selectedProfile || !text.trim()) return;
|
||||
setGenerating(true);
|
||||
setPlaying(true);
|
||||
stoppedRef.current = false;
|
||||
audioUrlsRef.current = [];
|
||||
|
||||
const sentences = toSentences(text);
|
||||
let isAudioPlaying = false;
|
||||
let playIdx = 0;
|
||||
|
||||
const playNext = () => {
|
||||
if (stoppedRef.current) return;
|
||||
if (playIdx >= audioUrlsRef.current.length) { isAudioPlaying = false; return; }
|
||||
isAudioPlaying = true;
|
||||
const a = new Audio(audioUrlsRef.current[playIdx++]);
|
||||
audioRef.current = a;
|
||||
a.onended = () => {
|
||||
if (stoppedRef.current) return;
|
||||
playIdx < audioUrlsRef.current.length ? playNext() : (isAudioPlaying = false);
|
||||
};
|
||||
a.play();
|
||||
};
|
||||
|
||||
for (let i = 0; i < sentences.length; i++) {
|
||||
if (stoppedRef.current) break;
|
||||
setProgress(`${i + 1}/${sentences.length}`);
|
||||
const url = await speak(sentences[i]);
|
||||
if (url && !stoppedRef.current) {
|
||||
audioUrlsRef.current.push(url);
|
||||
if (!isAudioPlaying) playNext();
|
||||
}
|
||||
}
|
||||
|
||||
setGenerating(false);
|
||||
setProgress("");
|
||||
if (!isAudioPlaying) setPlaying(false);
|
||||
};
|
||||
|
||||
const handleStop = () => {
|
||||
stoppedRef.current = true;
|
||||
audioRef.current?.pause();
|
||||
setPlaying(false);
|
||||
setGenerating(false);
|
||||
setProgress("");
|
||||
};
|
||||
|
||||
const handleReplay = () => {
|
||||
if (audioUrlsRef.current.length === 0) return;
|
||||
stoppedRef.current = false;
|
||||
setPlaying(true);
|
||||
let idx = 0;
|
||||
const play = () => {
|
||||
if (idx >= audioUrlsRef.current.length || stoppedRef.current) { setPlaying(false); return; }
|
||||
const audio = new Audio(audioUrlsRef.current[idx]);
|
||||
audioRef.current = audio;
|
||||
idx++;
|
||||
audio.onended = play;
|
||||
audio.play();
|
||||
};
|
||||
play();
|
||||
};
|
||||
|
||||
if (profiles.length === 0) return null;
|
||||
|
||||
return (
|
||||
<div className="flex items-center gap-2 flex-wrap">
|
||||
<select value={selectedProfile} onChange={e => setSelectedProfile(e.target.value)}
|
||||
className="text-xs px-2 py-1 rounded bg-[var(--color-bg-hover)] border border-[var(--color-border)]">
|
||||
{profiles.map(p => <option key={p.id} value={p.id}>{p.name}</option>)}
|
||||
</select>
|
||||
|
||||
{playing || generating ? (
|
||||
<button onClick={handleStop}
|
||||
className="text-xs px-3 py-1 bg-red-500/20 text-red-400 rounded hover:bg-red-500/30">
|
||||
{progress || "중지"}
|
||||
</button>
|
||||
) : (
|
||||
<button onClick={handleGenerate} disabled={!selectedProfile}
|
||||
className="text-xs px-3 py-1 bg-[var(--color-primary)]/20 text-[var(--color-primary)] rounded hover:bg-[var(--color-primary)]/30 disabled:opacity-40">
|
||||
읽어주기
|
||||
</button>
|
||||
)}
|
||||
|
||||
{audioUrlsRef.current.length > 0 && !playing && !generating && (
|
||||
<button onClick={handleReplay}
|
||||
className="text-xs px-3 py-1 bg-[var(--color-bg-hover)] border border-[var(--color-border)] rounded">
|
||||
다시 재생
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
Reference in New Issue
Block a user