Add Notes, Voice Clone TTS, fix auth persistence and maxTokens

Notes:
- notes table with TEXT/AUDIO types, category support
- Audio upload → OpenRouter Gemini STT → OCI GenAI polish/summary
- Raw STT saved separately in raw_content column
- Polish/summary button for manual re-processing
- Async processing with real-time polling

Voice Clone TTS:
- Qwen3-TTS 1.7B model on A10 GPU via FastAPI server
- Voice profile registration (record/upload → save embedding)
- Profile-based TTS generation API
- TTS web page with recording, profile management, generation

Auth fixes:
- Store both access + refresh tokens in localStorage
- Initialize state from localStorage synchronously (no flash)
- Request interceptor reads token from localStorage every request
- Refresh via body (not just cookie)

Other fixes:
- maxTokens 4096 → 65536 (OCI GenAI Gemini supports up to 65536)
- Fix broken Korean chars in source files
- OpenRouter config for STT
- ffmpeg installed for audio conversion
- Ollama + Gemma 4 E4B installed (STT fallback)
- nginx proxy for TTS server (/api/tts/)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-13 07:34:18 +00:00
parent 6c2129d42e
commit 1088b23790
14 changed files with 1863 additions and 120 deletions

3
.gitignore vendored
View File

@@ -68,3 +68,6 @@ oracle_data/
# ========================
.claude/
cookies.txt
audio-uploads/
voice-profiles/
*.wav

View File

@@ -45,11 +45,18 @@ public class AuthController {
}
@PostMapping("/refresh")
public Mono<ResponseEntity<LoginResponse>> refresh(ServerHttpRequest request, ServerHttpResponse response) {
HttpCookie cookie = request.getCookies().getFirst("refreshToken");
String refreshToken = cookie != null ? cookie.getValue() : null;
public Mono<ResponseEntity<LoginResponse>> refresh(
ServerHttpRequest request, ServerHttpResponse response,
@RequestBody(required = false) Map<String, String> body) {
// 1차: body에서 refreshToken
String refreshToken = (body != null) ? body.get("refreshToken") : null;
// 2차: cookie에서 refreshToken
if (refreshToken == null || refreshToken.isBlank()) {
HttpCookie cookie = request.getCookies().getFirst("refreshToken");
refreshToken = cookie != null ? cookie.getValue() : null;
}
if (refreshToken == null) {
if (refreshToken == null || refreshToken.isBlank()) {
return Mono.just(ResponseEntity.status(401).build());
}

View File

@@ -0,0 +1,561 @@
package com.sundol.controller;
import com.sundol.repository.CategoryRepository;
import com.sundol.repository.NoteRepository;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.http.ResponseEntity;
import org.springframework.http.codec.multipart.FilePart;
import org.springframework.security.core.annotation.AuthenticationPrincipal;
import org.springframework.web.bind.annotation.*;
import reactor.core.publisher.Mono;
import reactor.core.scheduler.Schedulers;
import java.io.IOException;
import java.net.URI;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Base64;
import java.util.List;
import java.util.Map;
@RestController
@RequestMapping("/api/notes")
public class NoteController {
private static final Logger log = LoggerFactory.getLogger(NoteController.class);
private static final Path AUDIO_DIR = Path.of(System.getProperty("user.dir"), "audio-uploads");
private static final HttpClient httpClient = HttpClient.newHttpClient();
@Value("${openrouter.api-key:}")
private String openRouterApiKey;
@Value("${openrouter.model:google/gemini-2.5-flash}")
private String openRouterModel;
private final NoteRepository noteRepository;
private final CategoryRepository categoryRepository;
private final com.sundol.service.OciGenAiService genAiService;
public NoteController(NoteRepository noteRepository, CategoryRepository categoryRepository,
com.sundol.service.OciGenAiService genAiService) {
this.noteRepository = noteRepository;
this.categoryRepository = categoryRepository;
this.genAiService = genAiService;
try { Files.createDirectories(AUDIO_DIR); } catch (Exception ignored) {}
}
@GetMapping
public Mono<ResponseEntity<List<Map<String, Object>>>> list(
@AuthenticationPrincipal String userId,
@RequestParam(required = false) String categoryId) {
return Mono.fromCallable(() -> noteRepository.list(userId, categoryId))
.subscribeOn(Schedulers.boundedElastic())
.map(ResponseEntity::ok);
}
@GetMapping("/{id}")
public Mono<ResponseEntity<Map<String, Object>>> getById(
@AuthenticationPrincipal String userId,
@PathVariable String id) {
return Mono.fromCallable(() -> {
Map<String, Object> note = noteRepository.findById(userId, id);
if (note == null) return ResponseEntity.notFound().<Map<String, Object>>build();
return ResponseEntity.ok(note);
}).subscribeOn(Schedulers.boundedElastic());
}
@PostMapping
public Mono<ResponseEntity<Map<String, Object>>> create(
@AuthenticationPrincipal String userId,
@RequestBody Map<String, String> body) {
return Mono.fromCallable(() -> {
String title = body.getOrDefault("title", "");
String content = body.getOrDefault("content", "");
String categoryId = body.get("categoryId");
String id = noteRepository.insert(userId, title, content, "TEXT", null, categoryId);
return ResponseEntity.ok(Map.<String, Object>of("id", id));
}).subscribeOn(Schedulers.boundedElastic());
}
@PatchMapping("/{id}")
public Mono<ResponseEntity<Map<String, Object>>> update(
@AuthenticationPrincipal String userId,
@PathVariable String id,
@RequestBody Map<String, String> body) {
return Mono.fromCallable(() -> {
String title = body.get("title");
String content = body.get("content");
String categoryId = body.get("categoryId");
noteRepository.update(id, userId, title, content, categoryId);
return ResponseEntity.ok(noteRepository.findById(userId, id));
}).subscribeOn(Schedulers.boundedElastic());
}
/**
* 기존 노트의 내용을 LLM으로 교정 + 요약 재실행
*/
@PostMapping("/{id}/polish")
public Mono<ResponseEntity<Map<String, Object>>> polishNote(
@AuthenticationPrincipal String userId,
@PathVariable String id) {
return Mono.fromCallable(() -> {
Map<String, Object> note = noteRepository.findById(userId, id);
if (note == null) return ResponseEntity.notFound().<Map<String, Object>>build();
String content = note.get("CONTENT") != null ? note.get("CONTENT").toString() : "";
if (content.isBlank()) return ResponseEntity.badRequest().<Map<String, Object>>build();
// raw_content가 있으면 그걸 사용, 없으면 content에서 전문 추출
Object rawObj = note.get("RAW_CONTENT");
String rawText = (rawObj != null && !rawObj.toString().isBlank()) ? rawObj.toString() : content;
if (rawText.contains("# 전문")) {
int idx = rawText.indexOf("# 전문");
rawText = rawText.substring(idx + "# 전문".length()).strip();
}
String noteType = note.get("NOTE_TYPE") != null ? note.get("NOTE_TYPE").toString() : "TEXT";
boolean isAudio = "AUDIO".equals(noteType) || note.get("AUDIO_PATH") != null;
noteRepository.updateNoteType(id, "TRANSCRIBING");
final String finalRawText = rawText;
Schedulers.boundedElastic().schedule(() -> {
try {
noteRepository.updateContent(id, finalRawText + "\n\n--- 텍스트 교정 중... ---");
String polished = polishTranscription(finalRawText);
noteRepository.updateContent(id, polished + "\n\n--- 요약 생성 중... ---");
String summary = summarizeTranscription(polished);
String result = "# 요약\n\n" + summary + "\n\n---\n\n# 전문\n\n" + polished;
String newTitle = generateAudioTitle(summary, java.time.LocalDateTime.now());
noteRepository.update(id, null, newTitle, result, null);
noteRepository.updateNoteType(id, isAudio ? "AUDIO" : "TEXT");
log.info("Polish + summary complete for note {}", id);
} catch (Exception e) {
log.error("Polish failed for note {}", id, e);
noteRepository.updateNoteType(id, isAudio ? "AUDIO" : "TEXT");
}
});
return ResponseEntity.ok(Map.<String, Object>of("status", "processing"));
}).subscribeOn(Schedulers.boundedElastic());
}
@DeleteMapping("/{id}")
public Mono<ResponseEntity<Void>> delete(
@AuthenticationPrincipal String userId,
@PathVariable String id) {
return Mono.fromRunnable(() -> noteRepository.delete(id, userId))
.subscribeOn(Schedulers.boundedElastic())
.then(Mono.just(ResponseEntity.ok().<Void>build()));
}
/**
* 오디오 파일 업로드 → Gemma 4 STT → 텍스트 노트 생성
*/
@PostMapping("/audio")
public Mono<ResponseEntity<Map<String, Object>>> uploadAudio(
@AuthenticationPrincipal String userId,
@RequestPart("file") FilePart filePart,
@RequestPart(value = "title", required = false) String title,
@RequestPart(value = "categoryId", required = false) String categoryId) {
final String inputTitle = (title != null && !title.isBlank()) ? title : "음성 변환 중...";
final String inputCategoryId = categoryId;
return Mono.fromCallable(() -> {
// 1. 파일 저장
String fileName = System.currentTimeMillis() + "_" + filePart.filename();
Path audioFile = AUDIO_DIR.resolve(fileName);
filePart.transferTo(audioFile).block();
log.info("Audio file saved: {} ({} bytes)", audioFile, Files.size(audioFile));
// 2. 노트 즉시 생성 (TRANSCRIBING 상태)
String id = noteRepository.insert(userId, inputTitle, "음성 변환을 시작합니다...", "TRANSCRIBING", fileName, inputCategoryId);
// 3. 백그라운드에서 STT 실행
Schedulers.boundedElastic().schedule(() -> {
try {
transcribeAsync(id, audioFile, inputTitle);
} catch (Exception e) {
log.error("Async transcription failed for note {}", id, e);
noteRepository.updateContent(id, "음성 변환에 실패했습니다: " + e.getMessage());
noteRepository.updateNoteType(id, "AUDIO_FAILED");
}
});
return ResponseEntity.ok(Map.<String, Object>of("id", id));
}).subscribeOn(Schedulers.boundedElastic());
}
/**
* 비동기 STT 처리.
* Step 1: OpenRouter (Gemini) STT → raw_content에 저장
* Step 2: OCI GenAI로 교정 → content에 저장
* Step 3: OCI GenAI로 요약 → content 앞에 추가
*/
private void transcribeAsync(String noteId, Path audioFile, String inputTitle) throws IOException, InterruptedException {
// === Step 1: STT (OpenRouter Gemini) ===
String rawResult = null;
if (openRouterApiKey != null && !openRouterApiKey.isBlank()) {
try {
noteRepository.updateContent(noteId, "Gemini로 음성 변환 중...");
rawResult = transcribeWithOpenRouter(audioFile);
log.info("OpenRouter STT: {} chars", rawResult != null ? rawResult.length() : 0);
} catch (Exception e) {
log.warn("OpenRouter STT failed: {}", e.getMessage());
noteRepository.updateContent(noteId, "Gemini STT 실패: " + e.getMessage());
}
}
// Gemma fallback (OpenRouter 실패 시)
if (rawResult == null || rawResult.isBlank()) {
try {
noteRepository.updateContent(noteId, "Gemma로 음성 변환 중...");
Path wavFile = convertToWav(audioFile);
rawResult = transcribeChunk(wavFile);
cleanup(wavFile, audioFile);
} catch (Exception e) {
log.error("All STT failed for note {}", noteId, e);
noteRepository.updateContent(noteId, "모든 음성 변환 실패: " + e.getMessage());
noteRepository.updateNoteType(noteId, "AUDIO_FAILED");
return;
}
}
if (rawResult == null || rawResult.isBlank()) {
noteRepository.updateContent(noteId, "음성 변환 결과가 비어있습니다.");
noteRepository.updateNoteType(noteId, "AUDIO_FAILED");
return;
}
// raw 텍스트를 별도 컬럼에 저장 + content에도 일단 저장
noteRepository.updateRawContent(noteId, rawResult);
noteRepository.updateContent(noteId, rawResult);
String sttTitle = java.time.LocalDateTime.now().format(
java.time.format.DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm")) + " 음성 메모";
noteRepository.update(noteId, null,
inputTitle.equals("음성 변환 중...") ? sttTitle : inputTitle,
rawResult, null);
noteRepository.updateNoteType(noteId, "AUDIO");
log.info("STT raw saved: {} chars", rawResult.length());
// === Step 2: 교정 (OCI GenAI) ===
try {
noteRepository.updateNoteType(noteId, "TRANSCRIBING");
noteRepository.updateContent(noteId, rawResult + "\n\n--- 텍스트 교정 중 (OCI GenAI)... ---");
String polished = polishTranscription(rawResult);
log.info("Polish complete: {} chars", polished.length());
// === Step 3: 요약 (OCI GenAI) ===
noteRepository.updateContent(noteId, polished + "\n\n--- 요약 생성 중... ---");
String summary = summarizeTranscription(polished);
log.info("Summary complete: {} chars", summary.length());
// 최종 결과 저장
String result = "# 요약\n\n" + summary + "\n\n---\n\n# 전문\n\n" + polished;
String finalTitle = inputTitle.equals("음성 변환 중...")
? generateAudioTitle(summary, java.time.LocalDateTime.now())
: inputTitle;
noteRepository.update(noteId, null, finalTitle, result, null);
log.info("Final note saved: {} chars", result.length());
} catch (Exception e) {
log.warn("Polish/summary failed, keeping raw STT text: {}", e.getMessage());
}
noteRepository.updateNoteType(noteId, "AUDIO");
}
/**
* Gemma 4 E4B를 사용하여 오디오 파일을 텍스트로 변환
*/
/**
* 오디오 파일을 wav로 변환한다 (Ollama 호환성).
*/
private Path convertToWav(Path audioFile) throws IOException, InterruptedException {
String name = audioFile.getFileName().toString();
if (name.toLowerCase().endsWith(".wav")) return audioFile;
Path wavFile = audioFile.getParent().resolve(name.replaceAll("\\.[^.]+$", "") + ".wav");
ProcessBuilder pb = new ProcessBuilder(
"ffmpeg", "-i", audioFile.toString(),
"-ar", "16000", "-ac", "1", "-y",
wavFile.toString()
);
pb.redirectErrorStream(true);
Process proc = pb.start();
String output = new String(proc.getInputStream().readAllBytes());
int exitCode = proc.waitFor();
if (exitCode != 0) {
log.error("ffmpeg conversion failed (exit {}): {}", exitCode, output.substring(0, Math.min(500, output.length())));
throw new IOException("오디오 변환 실패 (ffmpeg exit " + exitCode + ")");
}
log.info("Converted {} to wav: {} bytes", name, Files.size(wavFile));
return wavFile;
}
private static final int CHUNK_SECONDS = 180; // 3분 단위 분할
private String transcribeWithGemma(Path audioFile) throws IOException, InterruptedException {
Path wavFile = convertToWav(audioFile);
double duration = getAudioDuration(wavFile);
log.info("Audio duration: {}s", duration);
if (duration <= CHUNK_SECONDS) {
String result = transcribeChunk(wavFile);
cleanup(wavFile, audioFile);
return result;
}
// 긴 오디오: 3분 단위로 분할
int chunks = (int) Math.ceil(duration / CHUNK_SECONDS);
log.info("Splitting audio into {} chunks of {}s", chunks, CHUNK_SECONDS);
StringBuilder fullText = new StringBuilder();
for (int i = 0; i < chunks; i++) {
int start = i * CHUNK_SECONDS;
Path chunkFile = wavFile.getParent().resolve("chunk_" + i + "_" + System.currentTimeMillis() + ".wav");
ProcessBuilder pb = new ProcessBuilder(
"ffmpeg", "-i", wavFile.toString(),
"-ss", String.valueOf(start), "-t", String.valueOf(CHUNK_SECONDS),
"-ar", "16000", "-ac", "1", "-y", chunkFile.toString()
);
pb.redirectErrorStream(true);
Process proc = pb.start();
proc.getInputStream().readAllBytes();
proc.waitFor();
log.info("Transcribing chunk {}/{} ({}s-{}s)", i + 1, chunks, start, Math.min(start + CHUNK_SECONDS, (int) duration));
try {
String chunkText = transcribeChunk(chunkFile);
if (!chunkText.isBlank()) {
if (fullText.length() > 0) fullText.append("\n\n");
fullText.append(chunkText);
}
} catch (Exception e) {
log.warn("Chunk {} failed: {}", i + 1, e.getMessage());
fullText.append("\n\n[chunk ").append(i + 1).append(" 변환 실패]");
} finally {
try { Files.deleteIfExists(chunkFile); } catch (Exception ignored) {}
}
}
cleanup(wavFile, audioFile);
String result = fullText.toString().strip();
if (result.isBlank()) throw new IOException("Gemma STT returned empty for all chunks");
return result;
}
private String transcribeChunk(Path wavFile) throws IOException, InterruptedException {
byte[] audioBytes = Files.readAllBytes(wavFile);
String base64Audio = Base64.getEncoder().encodeToString(audioBytes);
log.info("Chunk base64: {} chars ({} MB)", base64Audio.length(), audioBytes.length / 1024 / 1024);
String payload = """
{
"model": "gemma4:e4b",
"messages": [{"role": "user", "content": "Transcribe the following audio to text accurately. Output only the spoken content in its original language. Do not add any description or translation.", "images": ["%s"]}],
"stream": false,
"options": {"num_ctx": 8000}
}
""".formatted(base64Audio);
HttpRequest request = HttpRequest.newBuilder()
.uri(URI.create("http://localhost:11434/api/chat"))
.header("Content-Type", "application/json")
.POST(HttpRequest.BodyPublishers.ofString(payload))
.timeout(java.time.Duration.ofMinutes(10))
.build();
HttpResponse<String> response = httpClient.send(request, HttpResponse.BodyHandlers.ofString());
if (response.statusCode() != 200) {
log.error("Gemma STT error {}: {}", response.statusCode(), response.body().substring(0, Math.min(500, response.body().length())));
throw new IOException("Gemma STT failed: HTTP " + response.statusCode());
}
var root = new com.fasterxml.jackson.databind.ObjectMapper().readTree(response.body());
return root.path("message").path("content").asText("").strip();
}
/**
* STT 결과를 LLM으로 교정한다.
* 발음 오인식 보정, 문장 구분, 불필요한 추임새 제거, 가독성 향상.
*/
private String polishTranscription(String rawText) {
if (!genAiService.isConfigured()) {
log.info("GenAI not configured, skipping polish");
return rawText;
}
try {
String systemMsg =
"당신은 전문 속기사입니다. 음성 인식(STT) 텍스트를 교정해주세요.\n\n" +
"## 규칙\n" +
"1. 발음 오인식 단어를 문맥에 맞게 보정하세요.\n" +
"2. 추임새(어, 음, 그, 아, 뭐, 이제, 근데)를 제거하세요.\n" +
"3. 문장 부호를 넣고 단락을 나누세요.\n" +
"4. 절대 요약하지 마세요. 원문의 모든 내용을 빠짐없이 유지하세요.\n" +
"5. 내용을 추가하거나 삭제하지 마세요. 교정만 하세요.\n" +
"6. 전문 용어와 고유 명사는 올바르게 표기하세요.\n" +
"7. 입력 텍스트와 비슷한 분량으로 출력하세요. 줄이지 마세요.\n" +
"8. Markdown 형식으로 출력하세요.";
// maxTokens 65536이므로 대부분 한 번에 처리 가능
if (rawText.length() <= 30000) {
log.info("Polishing in single call: {} chars", rawText.length());
return genAiService.chat(systemMsg,
"아래 STT 텍스트를 교정해주세요. 전체 내용을 빠짐없이 유지하세요:\n\n" + rawText, null).strip();
}
// 30000자 이상만 분할
StringBuilder polished = new StringBuilder();
int chunkSize = 20000;
int totalChunks = (int) Math.ceil((double) rawText.length() / chunkSize);
for (int i = 0; i < rawText.length(); i += chunkSize) {
int chunkNum = (i / chunkSize) + 1;
String chunk = rawText.substring(i, Math.min(i + chunkSize, rawText.length()));
log.info("Polishing chunk {}/{}: {} chars", chunkNum, totalChunks, chunk.length());
String result = genAiService.chat(systemMsg,
"아래 STT 텍스트를 교정해주세요. 전체 내용을 빠짐없이 유지하세요:\n\n" + chunk, null).strip();
if (polished.length() > 0) polished.append("\n\n");
polished.append(result);
}
return polished.toString();
} catch (Exception e) {
log.warn("Polish transcription failed, returning raw text: {}", e.getMessage());
return rawText;
}
}
/**
* 교정된 텍스트를 요약한다.
*/
private String summarizeTranscription(String polishedText) {
if (!genAiService.isConfigured()) return "";
try {
String systemMsg =
"당신은 회의록/녹음 요약 전문가입니다. 아래 텍스트를 요약해주세요.\n\n" +
"## 규칙\n" +
"1. 주요 논의 주제별로 소제목(##)을 나누어 요약하세요.\n" +
"2. 각 주제 아래 핵심 내용을 불릿 포인트로 정리하세요.\n" +
"3. 주요 결정 사항, 액션 아이템이 있다면 별도로 표시하세요.\n" +
"4. 원문과 같은 언어로 작성하세요.\n" +
"5. Markdown 형식으로 작성하세요.\n" +
"6. 원본 길이에 비례하여 요약하세요. 긴 내용은 상세하게, 짧은 내용은 간결하게.\n" +
"7. 중요한 수치, 이름, 기술명은 빠뜨리지 마세요.";
String content = polishedText.length() > 15000
? polishedText.substring(0, 15000) : polishedText;
return genAiService.chat(systemMsg, "아래 내용을 요약해주세요:\n\n" + content, null).strip();
} catch (Exception e) {
log.warn("Summarization failed: {}", e.getMessage());
return "";
}
}
/**
* LLM으로 음성 메모 제목을 생성한다. "일시 - 핵심 주제" 형태.
*/
private String generateAudioTitle(String summary, java.time.LocalDateTime dateTime) {
if (!genAiService.isConfigured() || summary.isBlank()) {
return dateTime.format(java.time.format.DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm")) + " 음성 메모";
}
try {
String systemMsg = "음성 메모의 요약을 보고 10자 이내의 짧은 제목을 생성해주세요. " +
"제목만 출력하세요. 따옴표, 설명, 접두사 없이 제목만.";
String title = genAiService.chat(systemMsg, summary, null).strip()
.replaceAll("^\"|\"$", "").replaceAll("^'|'$", "");
if (title.length() > 40) title = title.substring(0, 40);
return dateTime.format(java.time.format.DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm")) + " " + title;
} catch (Exception e) {
log.warn("Title generation failed: {}", e.getMessage());
return dateTime.format(java.time.format.DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm")) + " 음성 메모";
}
}
/**
* OpenRouter API (Gemini 2.5 Flash)를 사용하여 오디오 STT. 한 번에 전체 파일 처리 가능.
*/
private String transcribeWithOpenRouter(Path audioFile) throws IOException, InterruptedException {
byte[] audioBytes = Files.readAllBytes(audioFile);
String base64Audio = Base64.getEncoder().encodeToString(audioBytes);
String mimeType = "audio/wav";
String name = audioFile.getFileName().toString().toLowerCase();
if (name.endsWith(".mp3")) mimeType = "audio/mpeg";
else if (name.endsWith(".m4a")) mimeType = "audio/mp4";
else if (name.endsWith(".ogg")) mimeType = "audio/ogg";
else if (name.endsWith(".webm")) mimeType = "audio/webm";
else if (name.endsWith(".flac")) mimeType = "audio/flac";
log.info("OpenRouter STT: {} ({} MB, {})", name, audioBytes.length / 1024 / 1024, mimeType);
// OpenRouter chat/completions API with audio input
String payload = new com.fasterxml.jackson.databind.ObjectMapper().writeValueAsString(Map.of(
"model", openRouterModel,
"messages", List.of(Map.of(
"role", "user",
"content", List.of(
Map.of("type", "input_audio", "input_audio", Map.of(
"data", base64Audio,
"format", mimeType.substring(mimeType.indexOf('/') + 1)
)),
Map.of("type", "text", "text",
"Transcribe the audio accurately. Output only the spoken content in its original language. " +
"Do not add description, annotation, timestamps, or translation. " +
"If the audio contains Korean, output in Korean.")
)
))
));
HttpRequest request = HttpRequest.newBuilder()
.uri(URI.create("https://openrouter.ai/api/v1/chat/completions"))
.header("Content-Type", "application/json")
.header("Authorization", "Bearer " + openRouterApiKey)
.POST(HttpRequest.BodyPublishers.ofString(payload))
.timeout(java.time.Duration.ofMinutes(10))
.build();
HttpResponse<String> response = httpClient.send(request, HttpResponse.BodyHandlers.ofString());
if (response.statusCode() != 200) {
log.error("OpenRouter STT error {}: {}", response.statusCode(),
response.body().substring(0, Math.min(500, response.body().length())));
throw new IOException("OpenRouter STT failed: HTTP " + response.statusCode());
}
var root = new com.fasterxml.jackson.databind.ObjectMapper().readTree(response.body());
String text = root.path("choices").path(0).path("message").path("content").asText("").strip();
if (text.isBlank()) {
throw new IOException("OpenRouter STT returned empty result");
}
return text;
}
private double getAudioDuration(Path audioFile) throws IOException, InterruptedException {
ProcessBuilder pb = new ProcessBuilder("ffprobe", "-i", audioFile.toString(),
"-show_entries", "format=duration", "-v", "quiet", "-of", "csv=p=0");
pb.redirectErrorStream(true);
Process proc = pb.start();
String output = new String(proc.getInputStream().readAllBytes()).strip();
proc.waitFor();
try { return Double.parseDouble(output); } catch (NumberFormatException e) { return 0; }
}
private void cleanup(Path wavFile, Path originalFile) {
if (!wavFile.equals(originalFile)) {
try { Files.deleteIfExists(wavFile); } catch (Exception ignored) {}
}
}
}

View File

@@ -0,0 +1,148 @@
package com.sundol.repository;
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.stereotype.Repository;
import java.sql.Clob;
import java.util.List;
import java.util.Map;
@Repository
public class NoteRepository {
private final JdbcTemplate jdbcTemplate;
public NoteRepository(JdbcTemplate jdbcTemplate) {
this.jdbcTemplate = jdbcTemplate;
}
public String insert(String userId, String title, String content, String noteType, String audioPath, String categoryId) {
if (categoryId != null) {
jdbcTemplate.update(
"INSERT INTO notes (id, user_id, title, content, note_type, audio_path, category_id, created_at, updated_at) " +
"VALUES (SYS_GUID(), HEXTORAW(?), ?, ?, ?, ?, HEXTORAW(?), SYSTIMESTAMP, SYSTIMESTAMP)",
new Object[]{userId, title, content, noteType, audioPath, categoryId},
new int[]{java.sql.Types.VARCHAR, java.sql.Types.VARCHAR, java.sql.Types.CLOB, java.sql.Types.VARCHAR, java.sql.Types.VARCHAR, java.sql.Types.VARCHAR}
);
} else {
jdbcTemplate.update(
"INSERT INTO notes (id, user_id, title, content, note_type, audio_path, created_at, updated_at) " +
"VALUES (SYS_GUID(), HEXTORAW(?), ?, ?, ?, ?, SYSTIMESTAMP, SYSTIMESTAMP)",
new Object[]{userId, title, content, noteType, audioPath},
new int[]{java.sql.Types.VARCHAR, java.sql.Types.VARCHAR, java.sql.Types.CLOB, java.sql.Types.VARCHAR, java.sql.Types.VARCHAR}
);
}
var result = jdbcTemplate.queryForList(
"SELECT RAWTOHEX(id) AS id FROM notes WHERE user_id = HEXTORAW(?) ORDER BY created_at DESC FETCH FIRST 1 ROW ONLY",
userId
);
return (String) result.get(0).get("ID");
}
public List<Map<String, Object>> list(String userId, String categoryId) {
if (categoryId != null && !categoryId.isBlank()) {
return jdbcTemplate.queryForList(
"SELECT RAWTOHEX(n.id) AS id, n.title, n.note_type, n.audio_path, " +
" RAWTOHEX(n.category_id) AS category_id, c.full_path AS category_path, " +
" n.created_at, n.updated_at " +
"FROM notes n LEFT JOIN categories c ON c.id = n.category_id " +
"WHERE n.user_id = HEXTORAW(?) AND n.category_id = HEXTORAW(?) " +
"ORDER BY n.created_at DESC",
userId, categoryId
);
}
return jdbcTemplate.queryForList(
"SELECT RAWTOHEX(n.id) AS id, n.title, n.note_type, n.audio_path, " +
" RAWTOHEX(n.category_id) AS category_id, c.full_path AS category_path, " +
" n.created_at, n.updated_at " +
"FROM notes n LEFT JOIN categories c ON c.id = n.category_id " +
"WHERE n.user_id = HEXTORAW(?) " +
"ORDER BY n.created_at DESC",
userId
);
}
public Map<String, Object> findById(String userId, String id) {
var results = jdbcTemplate.queryForList(
"SELECT RAWTOHEX(n.id) AS id, n.title, n.content, n.raw_content, n.note_type, n.audio_path, " +
" RAWTOHEX(n.category_id) AS category_id, c.full_path AS category_path, " +
" n.created_at, n.updated_at " +
"FROM notes n LEFT JOIN categories c ON c.id = n.category_id " +
"WHERE RAWTOHEX(n.id) = ? AND n.user_id = HEXTORAW(?)",
id, userId
);
if (results.isEmpty()) return null;
return convertClobFields(results.get(0));
}
public void update(String id, String userId, String title, String content, String categoryId) {
if (userId != null) {
if (categoryId != null) {
jdbcTemplate.update(
"UPDATE notes SET title = ?, content = ?, category_id = HEXTORAW(?), updated_at = SYSTIMESTAMP " +
"WHERE RAWTOHEX(id) = ? AND user_id = HEXTORAW(?)",
new Object[]{title, content, categoryId, id, userId},
new int[]{java.sql.Types.VARCHAR, java.sql.Types.CLOB, java.sql.Types.VARCHAR, java.sql.Types.VARCHAR, java.sql.Types.VARCHAR}
);
} else {
jdbcTemplate.update(
"UPDATE notes SET title = ?, content = ?, updated_at = SYSTIMESTAMP " +
"WHERE RAWTOHEX(id) = ? AND user_id = HEXTORAW(?)",
new Object[]{title, content, id, userId},
new int[]{java.sql.Types.VARCHAR, java.sql.Types.CLOB, java.sql.Types.VARCHAR, java.sql.Types.VARCHAR}
);
}
} else {
// 내부 호출 (userId 없음)
jdbcTemplate.update(
"UPDATE notes SET title = ?, content = ?, updated_at = SYSTIMESTAMP WHERE RAWTOHEX(id) = ?",
new Object[]{title, content, id},
new int[]{java.sql.Types.VARCHAR, java.sql.Types.CLOB, java.sql.Types.VARCHAR}
);
}
}
public void updateRawContent(String id, String rawContent) {
jdbcTemplate.update(
"UPDATE notes SET raw_content = ?, updated_at = SYSTIMESTAMP WHERE RAWTOHEX(id) = ?",
new Object[]{rawContent, id},
new int[]{java.sql.Types.CLOB, java.sql.Types.VARCHAR}
);
}
public void updateContent(String id, String content) {
jdbcTemplate.update(
"UPDATE notes SET content = ?, updated_at = SYSTIMESTAMP WHERE RAWTOHEX(id) = ?",
new Object[]{content, id},
new int[]{java.sql.Types.CLOB, java.sql.Types.VARCHAR}
);
}
public void updateNoteType(String id, String noteType) {
jdbcTemplate.update(
"UPDATE notes SET note_type = ?, updated_at = SYSTIMESTAMP WHERE RAWTOHEX(id) = ?",
noteType, id
);
}
public void delete(String id, String userId) {
jdbcTemplate.update(
"DELETE FROM notes WHERE RAWTOHEX(id) = ? AND user_id = HEXTORAW(?)",
id, userId
);
}
private Map<String, Object> convertClobFields(Map<String, Object> row) {
for (var entry : row.entrySet()) {
Object val = entry.getValue();
if (val instanceof Clob clob) {
try {
entry.setValue(clob.getSubString(1, (int) clob.length()));
} catch (Exception e) {
entry.setValue(null);
}
}
}
return row;
}
}

View File

@@ -96,7 +96,7 @@ public class OciGenAiService {
Map.of("role", "SYSTEM", "content", List.of(Map.of("type", "TEXT", "text", systemMessage))),
Map.of("role", "USER", "content", List.of(Map.of("type", "TEXT", "text", userMessage)))
),
"maxTokens", 4096,
"maxTokens", 65536,
"temperature", 0.3
)
);

View File

@@ -29,6 +29,10 @@ oci:
model: ${OCI_GENAI_MODEL:google.gemini-2.5-flash}
base-url: ${OCI_GENAI_BASE_URL:https://inference.generativeai.us-chicago-1.oci.oraclecloud.com/20231130/actions}
openrouter:
api-key: ${OPENROUTER_API_KEY:}
model: ${OPENROUTER_MODEL:google/gemini-2.5-flash}
jina:
reader:
api-key: ${JINA_READER_API_KEY:}

View File

@@ -0,0 +1,275 @@
"use client";
import { useEffect, useState } from "react";
import { useParams, useRouter } from "next/navigation";
import AuthGuard from "@/components/auth-guard";
import NavBar from "@/components/nav-bar";
import { useApi } from "@/lib/use-api";
import ReactMarkdown from "react-markdown";
interface NoteDetail {
ID: string;
TITLE: string;
CONTENT: string;
RAW_CONTENT: string | null;
NOTE_TYPE: string;
AUDIO_PATH: string | null;
CATEGORY_PATH: string | null;
CREATED_AT: string;
UPDATED_AT: string;
}
export default function NoteDetailPage() {
const { request } = useApi();
const router = useRouter();
const params = useParams();
const id = params.id as string;
const [note, setNote] = useState<NoteDetail | null>(null);
const [loading, setLoading] = useState(true);
const [editing, setEditing] = useState(false);
const [editTitle, setEditTitle] = useState("");
const [editContent, setEditContent] = useState("");
const [saving, setSaving] = useState(false);
const [deleting, setDeleting] = useState(false);
const [polishing, setPolishing] = useState(false);
const [showRaw, setShowRaw] = useState(false);
const fetchNote = async () => {
try {
const data = await request<NoteDetail>({ method: "GET", url: `/api/notes/${id}` });
setNote(data);
setEditTitle(data.TITLE || "");
setEditContent(data.CONTENT || "");
} catch (err) {
console.error("Failed to load note:", err);
} finally {
setLoading(false);
}
};
useEffect(() => {
fetchNote();
}, [id]);
// TRANSCRIBING 상태면 3초 폴링
useEffect(() => {
if (!note || note.NOTE_TYPE !== "TRANSCRIBING") return;
const interval = setInterval(fetchNote, 3000);
return () => clearInterval(interval);
}, [note?.NOTE_TYPE]);
const handleSave = async () => {
setSaving(true);
try {
const updated = await request<NoteDetail>({
method: "PATCH",
url: `/api/notes/${id}`,
data: { title: editTitle, content: editContent },
});
setNote(updated);
setEditing(false);
} catch (err) {
console.error("Failed to update note:", err);
alert("저장에 실패했습니다.");
} finally {
setSaving(false);
}
};
const handleDelete = async () => {
if (!confirm("정말 삭제하시겠습니까?")) return;
setDeleting(true);
try {
await request({ method: "DELETE", url: `/api/notes/${id}` });
router.push("/notes");
} catch (err) {
console.error("Failed to delete note:", err);
setDeleting(false);
}
};
if (loading) {
return (
<AuthGuard><NavBar />
<main className="max-w-4xl mx-auto px-4 py-8">
<p className="text-[var(--color-text-muted)]">Loading...</p>
</main>
</AuthGuard>
);
}
if (!note) {
return (
<AuthGuard><NavBar />
<main className="max-w-4xl mx-auto px-4 py-8">
<p className="text-red-400"> .</p>
<button onClick={() => router.push("/notes")} className="mt-4 text-sm text-[var(--color-primary)] hover:underline">
&larr; Back to Notes
</button>
</main>
</AuthGuard>
);
}
return (
<AuthGuard>
<NavBar />
<main className="max-w-4xl mx-auto px-4 py-8">
<button
onClick={() => router.push("/notes")}
className="text-sm text-[var(--color-text-muted)] hover:text-[var(--color-primary)] mb-4 inline-block"
>
&larr; Back to Notes
</button>
{/* 헤더 */}
<div className="bg-[var(--color-bg-card)] rounded-xl p-6 border border-[var(--color-border)] mb-6">
<div className="flex items-center gap-2 mb-3">
<span className={`text-xs px-2 py-0.5 rounded ${
note.NOTE_TYPE === "TRANSCRIBING" ? "bg-yellow-500/20 text-yellow-400" :
note.NOTE_TYPE === "AUDIO_FAILED" ? "bg-red-500/20 text-red-400" :
note.NOTE_TYPE === "AUDIO" ? "bg-purple-500/20 text-purple-400" : "bg-blue-500/20 text-blue-400"
}`}>
{note.NOTE_TYPE === "TRANSCRIBING" ? "변환 중..." :
note.NOTE_TYPE === "AUDIO_FAILED" ? "변환 실패" :
note.NOTE_TYPE === "AUDIO" ? "음성" : "텍스트"}
</span>
{note.CATEGORY_PATH && (
<span className="text-xs px-1.5 py-0.5 rounded bg-[var(--color-primary)]/10 text-[var(--color-primary)]">
{note.CATEGORY_PATH}
</span>
)}
</div>
{editing ? (
<input
type="text"
value={editTitle}
onChange={(e) => setEditTitle(e.target.value)}
className="w-full px-3 py-1 rounded-lg bg-[var(--color-bg-hover)] border border-[var(--color-border)] focus:border-[var(--color-primary)] focus:outline-none text-xl font-bold mb-2"
/>
) : (
<h1 className="text-xl font-bold mb-2">{note.TITLE || "제목 없음"}</h1>
)}
<div className="flex gap-4 text-xs text-[var(--color-text-muted)]">
<span>: {new Date(note.CREATED_AT).toLocaleString("ko-KR")}</span>
<span>: {new Date(note.UPDATED_AT).toLocaleString("ko-KR")}</span>
</div>
</div>
{/* 변환 중 인디케이터 */}
{note.NOTE_TYPE === "TRANSCRIBING" && (
<div className="bg-yellow-500/10 border border-yellow-500/30 rounded-xl p-4 mb-6 flex items-center gap-3">
<div className="w-4 h-4 border-2 border-yellow-400 border-t-transparent rounded-full animate-spin" />
<span className="text-sm text-yellow-400"> . .</span>
</div>
)}
{/* 내용 */}
<div className="bg-[var(--color-bg-card)] rounded-xl p-6 border border-[var(--color-border)] mb-6">
{editing ? (
<textarea
value={editContent}
onChange={(e) => setEditContent(e.target.value)}
rows={20}
className="w-full px-3 py-2 rounded-lg bg-[var(--color-bg-hover)] border border-[var(--color-border)] focus:border-[var(--color-primary)] focus:outline-none resize-y font-mono text-sm"
/>
) : (
<div className="text-sm leading-relaxed">
<ReactMarkdown
components={{
h1: ({children}) => <h1 className="text-xl font-bold mt-6 mb-3">{children}</h1>,
h2: ({children}) => <h2 className="text-lg font-bold mt-5 mb-2">{children}</h2>,
h3: ({children}) => <h3 className="text-base font-bold mt-4 mb-2">{children}</h3>,
p: ({children}) => <p className="mb-3">{children}</p>,
ul: ({children}) => <ul className="list-disc ml-5 mb-3 space-y-1">{children}</ul>,
ol: ({children}) => <ol className="list-decimal ml-5 mb-3 space-y-1">{children}</ol>,
li: ({children}) => <li className="leading-relaxed">{children}</li>,
strong: ({children}) => <strong className="font-bold">{children}</strong>,
blockquote: ({children}) => <blockquote className="border-l-2 border-[var(--color-primary)] pl-4 my-3 italic text-[var(--color-text-muted)]">{children}</blockquote>,
}}
>
{note.CONTENT || "내용 없음"}
</ReactMarkdown>
</div>
)}
</div>
{/* 원본 텍스트 (STT raw) */}
{note.RAW_CONTENT && (
<div className="mb-6">
<button
onClick={() => setShowRaw(!showRaw)}
className="text-sm text-[var(--color-text-muted)] hover:text-[var(--color-primary)] mb-2"
>
{showRaw ? "▼ 원본 텍스트 숨기기" : "▶ STT 원본 텍스트 보기"}
</button>
{showRaw && (
<div className="bg-[var(--color-bg-card)] rounded-xl p-6 border border-[var(--color-border)] opacity-70">
<p className="text-xs text-[var(--color-text-muted)] mb-2"> ( )</p>
<p className="text-sm whitespace-pre-wrap">{note.RAW_CONTENT}</p>
</div>
)}
</div>
)}
{/* 액션 */}
<div className="flex items-center gap-4">
{editing ? (
<>
<button
onClick={handleSave}
disabled={saving}
className="px-4 py-2 text-sm bg-[var(--color-primary)] rounded-lg disabled:opacity-40"
>
{saving ? "저장 중..." : "저장"}
</button>
<button
onClick={() => { setEditing(false); setEditTitle(note.TITLE || ""); setEditContent(note.CONTENT || ""); }}
className="px-4 py-2 text-sm bg-[var(--color-bg-hover)] border border-[var(--color-border)] rounded-lg"
>
</button>
</>
) : (
<>
<button
onClick={() => setEditing(true)}
className="text-sm text-[var(--color-primary)] hover:underline"
>
</button>
<button
onClick={async () => {
setPolishing(true);
try {
await request({ method: "POST", url: `/api/notes/${id}/polish` });
await fetchNote();
} catch (err) {
console.error("Failed to polish:", err);
alert("교정/요약에 실패했습니다.");
} finally {
setPolishing(false);
}
}}
disabled={polishing || note.NOTE_TYPE === "TRANSCRIBING"}
className="text-sm text-[var(--color-primary)] hover:underline disabled:opacity-40"
>
{polishing ? "처리 중..." : "교정/요약"}
</button>
</>
)}
<button
onClick={handleDelete}
disabled={deleting}
className="text-sm text-red-400 hover:text-red-300 disabled:opacity-40"
>
{deleting ? "삭제 중..." : "삭제"}
</button>
</div>
</main>
</AuthGuard>
);
}

View File

@@ -0,0 +1,166 @@
"use client";
import { useState, useRef, Suspense } from "react";
import { useRouter, useSearchParams } from "next/navigation";
import AuthGuard from "@/components/auth-guard";
import NavBar from "@/components/nav-bar";
import { useApi } from "@/lib/use-api";
export default function NewNotePageWrapper() {
return (
<Suspense fallback={<div className="p-8 text-center">Loading...</div>}>
<NewNotePage />
</Suspense>
);
}
function NewNotePage() {
const { request } = useApi();
const router = useRouter();
const searchParams = useSearchParams();
const isAudio = searchParams.get("type") === "audio";
const [title, setTitle] = useState("");
const [content, setContent] = useState("");
const [saving, setSaving] = useState(false);
const [audioFile, setAudioFile] = useState<File | null>(null);
const [transcribing, setTranscribing] = useState(false);
const [transcription, setTranscription] = useState("");
const fileInputRef = useRef<HTMLInputElement>(null);
const handleSaveText = async () => {
if (!title.trim() && !content.trim()) return;
setSaving(true);
try {
await request({ method: "POST", url: "/api/notes", data: { title, content } });
router.push("/notes");
} catch (err) {
console.error("Failed to save note:", err);
alert("노트 저장에 실패했습니다.");
} finally {
setSaving(false);
}
};
const handleUploadAudio = async () => {
if (!audioFile) return;
setTranscribing(true);
try {
const formData = new FormData();
formData.append("file", audioFile);
if (title.trim()) formData.append("title", title);
const result = await request<{ id: string; transcription: string }>({
method: "POST",
url: "/api/notes/audio",
data: formData,
headers: { "Content-Type": "multipart/form-data" },
});
setTranscription(result.transcription);
router.push(`/notes/${result.id}`);
} catch (err) {
console.error("Failed to transcribe audio:", err);
alert("음성 변환에 실패했습니다.");
} finally {
setTranscribing(false);
}
};
return (
<AuthGuard>
<NavBar />
<main className="max-w-4xl mx-auto px-4 py-8">
<button
onClick={() => router.push("/notes")}
className="text-sm text-[var(--color-text-muted)] hover:text-[var(--color-primary)] mb-4 inline-block"
>
&larr; Back to Notes
</button>
<h1 className="text-2xl font-bold mb-6">
{isAudio ? "음성 노트 작성" : "텍스트 노트 작성"}
</h1>
<div className="bg-[var(--color-bg-card)] rounded-xl p-6 border border-[var(--color-border)] space-y-4">
{/* 제목 */}
<div>
<label className="block text-sm text-[var(--color-text-muted)] mb-1"></label>
<input
type="text"
value={title}
onChange={(e) => setTitle(e.target.value)}
placeholder={isAudio ? "자동 생성됩니다 (선택)" : "노트 제목"}
className="w-full px-3 py-2 rounded-lg bg-[var(--color-bg-hover)] border border-[var(--color-border)] focus:border-[var(--color-primary)] focus:outline-none"
/>
</div>
{isAudio ? (
/* 음성 업로드 */
<div>
<label className="block text-sm text-[var(--color-text-muted)] mb-1"> </label>
<input
ref={fileInputRef}
type="file"
accept="audio/*"
onChange={(e) => setAudioFile(e.target.files?.[0] || null)}
className="hidden"
/>
<div
onClick={() => fileInputRef.current?.click()}
className="border-2 border-dashed border-[var(--color-border)] rounded-lg p-8 text-center cursor-pointer hover:border-[var(--color-primary)] transition-colors"
>
{audioFile ? (
<div>
<p className="font-medium">{audioFile.name}</p>
<p className="text-sm text-[var(--color-text-muted)] mt-1">
{(audioFile.size / 1024 / 1024).toFixed(1)} MB
</p>
</div>
) : (
<div>
<p className="text-[var(--color-text-muted)]"> </p>
<p className="text-xs text-[var(--color-text-muted)] mt-1">MP3, WAV, M4A, OGG, WebM</p>
</div>
)}
</div>
{transcription && (
<div className="mt-4 p-4 bg-[var(--color-bg-hover)] rounded-lg">
<p className="text-sm text-[var(--color-text-muted)] mb-2"> :</p>
<p className="text-sm whitespace-pre-wrap">{transcription}</p>
</div>
)}
<button
onClick={handleUploadAudio}
disabled={!audioFile || transcribing}
className="mt-4 px-6 py-2 bg-[var(--color-primary)] hover:bg-[var(--color-primary-hover)] rounded-lg transition-colors disabled:opacity-40"
>
{transcribing ? "변환 중..." : "음성 변환 및 저장"}
</button>
</div>
) : (
/* 텍스트 입력 */
<div>
<label className="block text-sm text-[var(--color-text-muted)] mb-1"> (Markdown)</label>
<textarea
value={content}
onChange={(e) => setContent(e.target.value)}
placeholder="노트 내용을 입력하세요..."
rows={15}
className="w-full px-3 py-2 rounded-lg bg-[var(--color-bg-hover)] border border-[var(--color-border)] focus:border-[var(--color-primary)] focus:outline-none resize-y font-mono text-sm"
/>
<button
onClick={handleSaveText}
disabled={saving || (!title.trim() && !content.trim())}
className="mt-4 px-6 py-2 bg-[var(--color-primary)] hover:bg-[var(--color-primary-hover)] rounded-lg transition-colors disabled:opacity-40"
>
{saving ? "저장 중..." : "저장"}
</button>
</div>
)}
</div>
</main>
</AuthGuard>
);
}

View File

@@ -0,0 +1,104 @@
"use client";
import { useEffect, useState } from "react";
import Link from "next/link";
import AuthGuard from "@/components/auth-guard";
import NavBar from "@/components/nav-bar";
import { useApi } from "@/lib/use-api";
interface Note {
ID: string;
TITLE: string;
NOTE_TYPE: string;
CATEGORY_PATH: string | null;
CREATED_AT: string;
UPDATED_AT: string;
}
export default function NotesPage() {
const { request } = useApi();
const [notes, setNotes] = useState<Note[]>([]);
const [loading, setLoading] = useState(true);
useEffect(() => {
(async () => {
try {
const data = await request<Note[]>({ method: "GET", url: "/api/notes" });
setNotes(data);
} catch (err) {
console.error("Failed to load notes:", err);
} finally {
setLoading(false);
}
})();
}, []);
return (
<AuthGuard>
<NavBar />
<main className="max-w-4xl mx-auto px-4 py-8">
<div className="flex justify-between items-center mb-6">
<h1 className="text-2xl font-bold">Notes</h1>
<div className="flex gap-2">
<Link
href="/notes/new"
className="px-4 py-2 bg-[var(--color-primary)] hover:bg-[var(--color-primary-hover)] rounded-lg transition-colors text-sm"
>
+
</Link>
<Link
href="/notes/new?type=audio"
className="px-4 py-2 bg-[var(--color-bg-card)] border border-[var(--color-border)] hover:border-[var(--color-primary)] rounded-lg transition-colors text-sm"
>
+
</Link>
</div>
</div>
{loading ? (
<p className="text-[var(--color-text-muted)]">Loading...</p>
) : notes.length === 0 ? (
<div className="bg-[var(--color-bg-card)] rounded-xl p-6 border border-[var(--color-border)]">
<p className="text-[var(--color-text-muted)]"> . .</p>
</div>
) : (
<div className="space-y-3">
{notes.map((note) => (
<Link
key={note.ID}
href={`/notes/${note.ID}`}
className="block bg-[var(--color-bg-card)] rounded-xl p-4 border border-[var(--color-border)] hover:border-[var(--color-primary)] transition-colors"
>
<div className="flex items-center justify-between">
<div className="flex-1 min-w-0">
<div className="flex items-center gap-2 mb-1">
<span className={`text-xs px-2 py-0.5 rounded ${
note.NOTE_TYPE === "TRANSCRIBING" ? "bg-yellow-500/20 text-yellow-400" :
note.NOTE_TYPE === "AUDIO_FAILED" ? "bg-red-500/20 text-red-400" :
note.NOTE_TYPE === "AUDIO" ? "bg-purple-500/20 text-purple-400" :
"bg-blue-500/20 text-blue-400"
}`}>
{note.NOTE_TYPE === "TRANSCRIBING" ? "변환 중..." :
note.NOTE_TYPE === "AUDIO_FAILED" ? "변환 실패" :
note.NOTE_TYPE === "AUDIO" ? "음성" : "텍스트"}
</span>
{note.CATEGORY_PATH && (
<span className="text-xs px-1.5 py-0.5 rounded bg-[var(--color-primary)]/10 text-[var(--color-primary)]">
{note.CATEGORY_PATH}
</span>
)}
</div>
<h3 className="font-medium truncate">{note.TITLE || "제목 없음"}</h3>
</div>
<span className="text-sm text-[var(--color-text-muted)] ml-4 whitespace-nowrap">
{new Date(note.CREATED_AT).toLocaleDateString()}
</span>
</div>
</Link>
))}
</div>
)}
</main>
</AuthGuard>
);
}

View File

@@ -0,0 +1,302 @@
"use client";
import { useState, useRef, useEffect } from "react";
import AuthGuard from "@/components/auth-guard";
import NavBar from "@/components/nav-bar";
interface VoiceProfile {
id: string;
name: string;
ref_text?: string;
}
export default function TTSPage() {
// 녹음
const [isRecording, setIsRecording] = useState(false);
const [recordedBlob, setRecordedBlob] = useState<Blob | null>(null);
const [recordedUrl, setRecordedUrl] = useState<string | null>(null);
const [uploadedFile, setUploadedFile] = useState<File | null>(null);
const mediaRecorderRef = useRef<MediaRecorder | null>(null);
const chunksRef = useRef<Blob[]>([]);
// 프로필
const [profiles, setProfiles] = useState<VoiceProfile[]>([]);
const [selectedProfile, setSelectedProfile] = useState<string>("");
const [profileName, setProfileName] = useState("");
const [refText, setRefText] = useState("");
const [registering, setRegistering] = useState(false);
// TTS
const [text, setText] = useState("");
const [language, setLanguage] = useState("korean");
const [generating, setGenerating] = useState(false);
const [outputUrl, setOutputUrl] = useState<string | null>(null);
const [error, setError] = useState<string | null>(null);
const [serverStatus, setServerStatus] = useState("checking...");
// 탭
const [tab, setTab] = useState<"generate" | "register">("generate");
useEffect(() => {
fetch("/api/tts/health").then(r => r.json())
.then(d => setServerStatus(d.model_loaded ? "ready" : "loading..."))
.catch(() => setServerStatus("offline"));
fetchProfiles();
}, []);
const fetchProfiles = () => {
fetch("/api/tts/profiles").then(r => r.json())
.then(setProfiles).catch(() => {});
};
const startRecording = async () => {
try {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
const mr = new MediaRecorder(stream, { mimeType: "audio/webm" });
mediaRecorderRef.current = mr;
chunksRef.current = [];
mr.ondataavailable = (e) => { if (e.data.size > 0) chunksRef.current.push(e.data); };
mr.onstop = () => {
const blob = new Blob(chunksRef.current, { type: "audio/webm" });
setRecordedBlob(blob);
setRecordedUrl(URL.createObjectURL(blob));
setUploadedFile(null);
stream.getTracks().forEach(t => t.stop());
};
mr.start();
setIsRecording(true);
} catch (err) {
setError("마이크 접근 실패");
}
};
const stopRecording = () => { mediaRecorderRef.current?.stop(); setIsRecording(false); };
const handleFileUpload = (e: React.ChangeEvent<HTMLInputElement>) => {
const file = e.target.files?.[0];
if (file) { setUploadedFile(file); setRecordedBlob(null); setRecordedUrl(URL.createObjectURL(file)); }
};
// 프로필 등록
const handleRegister = async () => {
const audio = uploadedFile || recordedBlob;
if (!audio || !profileName.trim()) return;
setRegistering(true);
setError(null);
try {
const fd = new FormData();
fd.append("name", profileName);
fd.append("ref_audio", audio, uploadedFile?.name || "recording.webm");
if (refText.trim()) fd.append("ref_text", refText);
const res = await fetch("/api/tts/profiles", { method: "POST", body: fd });
if (!res.ok) throw new Error(await res.text());
const result = await res.json();
setProfileName("");
setRefText("");
setRecordedBlob(null);
setRecordedUrl(null);
setUploadedFile(null);
fetchProfiles();
setSelectedProfile(result.id);
setTab("generate");
} catch (err) {
setError("프로필 등록 실패: " + (err instanceof Error ? err.message : ""));
} finally {
setRegistering(false);
}
};
// 프로필 삭제
const handleDeleteProfile = async (id: string) => {
if (!confirm("삭제하시겠습니까?")) return;
await fetch(`/api/tts/profiles/${id}`, { method: "DELETE" });
fetchProfiles();
if (selectedProfile === id) setSelectedProfile("");
};
// TTS 생성 (프로필 기반)
const handleGenerate = async () => {
if (!text.trim() || !selectedProfile) return;
setGenerating(true);
setError(null);
setOutputUrl(null);
try {
const fd = new FormData();
fd.append("text", text);
fd.append("profile_id", selectedProfile);
fd.append("language", language);
const res = await fetch("/api/tts/generate", { method: "POST", body: fd });
if (!res.ok) throw new Error(`HTTP ${res.status}`);
const blob = await res.blob();
setOutputUrl(URL.createObjectURL(blob));
} catch (err) {
setError("생성 실패: " + (err instanceof Error ? err.message : ""));
} finally {
setGenerating(false);
}
};
return (
<AuthGuard>
<NavBar />
<main className="max-w-4xl mx-auto px-4 py-8">
<div className="flex justify-between items-center mb-6">
<h1 className="text-2xl font-bold">Voice Clone (TTS)</h1>
<span className={`text-xs px-2 py-1 rounded ${
serverStatus === "ready" ? "bg-green-500/20 text-green-400" :
serverStatus === "offline" ? "bg-red-500/20 text-red-400" :
"bg-yellow-500/20 text-yellow-400"
}`}>{serverStatus}</span>
</div>
{/* 탭 */}
<div className="flex gap-2 mb-6">
<button onClick={() => setTab("generate")}
className={`px-4 py-2 text-sm rounded-lg ${tab === "generate" ? "bg-[var(--color-primary)] text-white" : "bg-[var(--color-bg-card)] border border-[var(--color-border)]"}`}>
</button>
<button onClick={() => setTab("register")}
className={`px-4 py-2 text-sm rounded-lg ${tab === "register" ? "bg-[var(--color-primary)] text-white" : "bg-[var(--color-bg-card)] border border-[var(--color-border)]"}`}>
</button>
</div>
{tab === "register" ? (
/* 프로필 등록 */
<div className="space-y-6">
<div className="bg-[var(--color-bg-card)] rounded-xl p-6 border border-[var(--color-border)]">
<h2 className="text-lg font-semibold mb-4"> </h2>
<p className="text-sm text-[var(--color-text-muted)] mb-4">
15~30 .
</p>
<div className="space-y-4">
<div>
<label className="block text-sm text-[var(--color-text-muted)] mb-1"> </label>
<input type="text" value={profileName} onChange={e => setProfileName(e.target.value)}
placeholder="예: 내 목소리"
className="w-full px-3 py-2 rounded-lg bg-[var(--color-bg-hover)] border border-[var(--color-border)] focus:border-[var(--color-primary)] focus:outline-none text-sm" />
</div>
<div className="flex gap-3">
{isRecording ? (
<button onClick={stopRecording}
className="px-4 py-2 bg-red-500 hover:bg-red-600 rounded-lg text-sm flex items-center gap-2">
<span className="w-3 h-3 bg-white rounded-full animate-pulse" />
</button>
) : (
<button onClick={startRecording}
className="px-4 py-2 bg-[var(--color-primary)] hover:bg-[var(--color-primary-hover)] rounded-lg text-sm">
</button>
)}
<label className="px-4 py-2 bg-[var(--color-bg-hover)] border border-[var(--color-border)] rounded-lg text-sm cursor-pointer hover:border-[var(--color-primary)]">
<input type="file" accept="audio/*" onChange={handleFileUpload} className="hidden" />
</label>
</div>
{recordedUrl && (
<audio controls src={recordedUrl} className="w-full" />
)}
<div>
<label className="block text-sm text-[var(--color-text-muted)] mb-1"> ( - )</label>
<input type="text" value={refText} onChange={e => setRefText(e.target.value)}
placeholder="녹음에서 말한 내용"
className="w-full px-3 py-2 rounded-lg bg-[var(--color-bg-hover)] border border-[var(--color-border)] focus:border-[var(--color-primary)] focus:outline-none text-sm" />
</div>
<button onClick={handleRegister}
disabled={registering || !profileName.trim() || !(uploadedFile || recordedBlob)}
className="px-6 py-2 bg-[var(--color-primary)] hover:bg-[var(--color-primary-hover)] rounded-lg disabled:opacity-40">
{registering ? "등록 중..." : "프로필 등록"}
</button>
</div>
</div>
{/* 등록된 프로필 목록 */}
{profiles.length > 0 && (
<div className="bg-[var(--color-bg-card)] rounded-xl p-6 border border-[var(--color-border)]">
<h2 className="text-lg font-semibold mb-4"> </h2>
<div className="space-y-2">
{profiles.map(p => (
<div key={p.id} className="flex items-center justify-between p-3 bg-[var(--color-bg-hover)] rounded-lg">
<span className="text-sm font-medium">{p.name}</span>
<button onClick={() => handleDeleteProfile(p.id)}
className="text-xs text-red-400 hover:text-red-300"></button>
</div>
))}
</div>
</div>
)}
</div>
) : (
/* 음성 생성 */
<div className="space-y-6">
{/* 프로필 선택 */}
<div className="bg-[var(--color-bg-card)] rounded-xl p-6 border border-[var(--color-border)]">
<h2 className="text-lg font-semibold mb-4">1. </h2>
{profiles.length === 0 ? (
<p className="text-sm text-[var(--color-text-muted)]">
.{" "}
<button onClick={() => setTab("register")} className="text-[var(--color-primary)] hover:underline">
</button>
</p>
) : (
<div className="flex flex-wrap gap-2">
{profiles.map(p => (
<button key={p.id} onClick={() => setSelectedProfile(p.id)}
className={`px-4 py-2 text-sm rounded-lg transition-colors ${
selectedProfile === p.id
? "bg-[var(--color-primary)] text-white"
: "bg-[var(--color-bg-hover)] border border-[var(--color-border)] hover:border-[var(--color-primary)]"
}`}>
{p.name}
</button>
))}
</div>
)}
</div>
{/* 텍스트 입력 */}
<div className="bg-[var(--color-bg-card)] rounded-xl p-6 border border-[var(--color-border)]">
<h2 className="text-lg font-semibold mb-4">2. </h2>
<select value={language} onChange={e => setLanguage(e.target.value)}
className="px-3 py-2 rounded-lg bg-[var(--color-bg-hover)] border border-[var(--color-border)] text-sm mb-3">
<option value="korean"></option>
<option value="english">English</option>
<option value="japanese"></option>
<option value="chinese"></option>
</select>
<textarea value={text} onChange={e => setText(e.target.value)}
placeholder="음성으로 변환할 텍스트를 입력하세요..." rows={5}
className="w-full px-3 py-2 rounded-lg bg-[var(--color-bg-hover)] border border-[var(--color-border)] focus:border-[var(--color-primary)] focus:outline-none text-sm resize-y" />
</div>
{/* 생성 */}
<button onClick={handleGenerate}
disabled={generating || !text.trim() || !selectedProfile || serverStatus !== "ready"}
className="w-full py-3 bg-[var(--color-primary)] hover:bg-[var(--color-primary-hover)] rounded-xl text-lg font-semibold disabled:opacity-40">
{generating ? "생성 중..." : "음성 생성"}
</button>
{error && (
<div className="bg-red-500/10 border border-red-500/30 rounded-xl p-4 text-sm text-red-400">{error}</div>
)}
{outputUrl && (
<div className="bg-[var(--color-bg-card)] rounded-xl p-6 border border-[var(--color-border)]">
<h2 className="text-lg font-semibold mb-4"> </h2>
<audio controls src={outputUrl} className="w-full mb-3" />
<a href={outputUrl} download="tts_output.wav"
className="text-sm text-[var(--color-primary)] hover:underline"></a>
</div>
)}
</div>
)}
</main>
</AuthGuard>
);
}

View File

@@ -7,10 +7,12 @@ import { useAuth } from "@/lib/auth-context";
const navItems = [
{ href: "/dashboard", label: "Dashboard" },
{ href: "/knowledge", label: "Knowledge" },
{ href: "/notes", label: "Notes" },
{ href: "/chat", label: "Chat" },
{ href: "/study", label: "Study" },
{ href: "/todos", label: "Todos" },
{ href: "/habits", label: "Habits" },
{ href: "/tts", label: "TTS" },
{ href: "/settings", label: "Settings" },
];

View File

@@ -5,7 +5,7 @@ export const api = axios.create({
withCredentials: true,
});
// --- 공통 토큰 refresh 로직 (mutex 패턴) ---
// --- refresh 로직 ---
let isRefreshing = false;
let pendingQueue: {
@@ -13,7 +13,6 @@ let pendingQueue: {
reject: (error: unknown) => void;
}[] = [];
// auth-context에서 주입하는 콜백
let onTokenRefreshed: ((token: string) => void) | null = null;
let onRefreshFailed: (() => void) | null = null;
@@ -27,21 +26,27 @@ export function setAuthCallbacks(
function processQueue(token: string | null, error: unknown) {
pendingQueue.forEach(({ resolve, reject }) => {
if (token) {
resolve(token);
} else {
reject(error);
}
if (token) resolve(token);
else reject(error);
});
pendingQueue = [];
}
// 요청 인터셉터: 매 요청마다 localStorage에서 최신 토큰 읽기
api.interceptors.request.use((config) => {
const token = localStorage.getItem("accessToken");
if (token) {
config.headers["Authorization"] = `Bearer ${token}`;
}
return config;
});
// 응답 인터셉터: 401이면 refresh 후 재시도
api.interceptors.response.use(
(response) => response,
async (error: AxiosError) => {
const originalRequest = error.config as InternalAxiosRequestConfig & { _retry?: boolean };
// 401이 아니거나, refresh 요청 자체가 실패한 경우, 이미 retry한 경우 → 그냥 throw
if (
error.response?.status !== 401 ||
originalRequest.url?.includes("/api/auth/") ||
@@ -50,7 +55,6 @@ api.interceptors.response.use(
return Promise.reject(error);
}
// 이미 refresh 진행 중이면 큐에 대기
if (isRefreshing) {
return new Promise((resolve, reject) => {
pendingQueue.push({
@@ -64,36 +68,25 @@ api.interceptors.response.use(
});
}
// refresh 시작
isRefreshing = true;
originalRequest._retry = true;
const attemptRefresh = async (retryCount: number): Promise<string> => {
try {
const res = await api.post<LoginResponse>("/api/auth/refresh");
return res.data.accessToken;
} catch (err) {
const isNetworkError = !((err as AxiosError).response);
if (isNetworkError && retryCount < 2) {
// 네트워크 에러(서버 재시작 등)면 3초 후 재시도
await new Promise((r) => setTimeout(r, 3000));
return attemptRefresh(retryCount + 1);
}
throw err;
}
};
try {
const newToken = await attemptRefresh(0);
const rt = localStorage.getItem("refreshToken");
if (!rt) throw new Error("No refresh token");
api.defaults.headers.common["Authorization"] = `Bearer ${newToken}`;
onTokenRefreshed?.(newToken);
const res = await api.post<LoginResponse>("/api/auth/refresh", { refreshToken: rt });
const newAccess = res.data.accessToken;
const newRefresh = res.data.refreshToken;
// 대기 중인 요청들 처리
processQueue(newToken, null);
localStorage.setItem("accessToken", newAccess);
if (newRefresh) localStorage.setItem("refreshToken", newRefresh);
api.defaults.headers.common["Authorization"] = `Bearer ${newAccess}`;
// 원래 요청 retry
originalRequest.headers["Authorization"] = `Bearer ${newToken}`;
onTokenRefreshed?.(newAccess);
processQueue(newAccess, null);
originalRequest.headers["Authorization"] = `Bearer ${newAccess}`;
return api.request(originalRequest);
} catch (refreshError) {
processQueue(null, refreshError);

View File

@@ -1,6 +1,6 @@
"use client";
import React, { createContext, useContext, useState, useCallback, useEffect, useRef } from "react";
import React, { createContext, useContext, useState, useCallback, useEffect } from "react";
import { api, LoginResponse, setAuthCallbacks } from "./api";
interface AuthContextType {
@@ -21,93 +21,60 @@ const AuthContext = createContext<AuthContextType>({
setAccessToken: () => {},
});
export function AuthProvider({ children }: { children: React.ReactNode }) {
const [accessToken, setAccessTokenState] = useState<string | null>(null);
const [isLoading, setIsLoading] = useState(true);
const logoutRef = useRef<() => void>(() => {});
function getStoredToken(): string | null {
if (typeof window === "undefined") return null;
return localStorage.getItem("accessToken");
}
// localStorage와 동기화하는 setter
const setAccessToken = useCallback((token: string | null) => {
setAccessTokenState(token);
if (token) {
localStorage.setItem("accessToken", token);
} else {
localStorage.removeItem("accessToken");
export function AuthProvider({ children }: { children: React.ReactNode }) {
const [accessToken, setAccessTokenState] = useState<string | null>(getStoredToken);
const [isLoading, setIsLoading] = useState(false);
const saveTokens = useCallback((access: string, refresh?: string) => {
setAccessTokenState(access);
localStorage.setItem("accessToken", access);
if (refresh) localStorage.setItem("refreshToken", refresh);
api.defaults.headers.common["Authorization"] = `Bearer ${access}`;
}, []);
const clearTokens = useCallback(() => {
setAccessTokenState(null);
localStorage.removeItem("accessToken");
localStorage.removeItem("refreshToken");
delete api.defaults.headers.common["Authorization"];
}, []);
const login = useCallback((response: LoginResponse) => {
saveTokens(response.accessToken, response.refreshToken);
}, [saveTokens]);
const logout = useCallback(async () => {
try { await api.post("/api/auth/logout"); } catch {}
clearTokens();
window.location.href = "/login";
}, [clearTokens]);
// 인터셉터 콜백: 토큰 갱신 성공/실패 처리
useEffect(() => {
setAuthCallbacks(
(token: string) => {
setAccessTokenState(token);
localStorage.setItem("accessToken", token);
},
() => logout()
);
}, [logout]);
// 앱 로드 시 localStorage 토큰으로 헤더 설정
useEffect(() => {
const access = localStorage.getItem("accessToken");
if (access) {
api.defaults.headers.common["Authorization"] = `Bearer ${access}`;
}
}, []);
// interceptor 콜백 등록
useEffect(() => {
setAuthCallbacks(
(token: string) => setAccessToken(token),
() => logoutRef.current()
);
}, [setAccessToken]);
useEffect(() => {
const restoreSession = async () => {
// 1차: localStorage에서 복원
const stored = localStorage.getItem("accessToken");
if (stored) {
setAccessTokenState(stored);
api.defaults.headers.common["Authorization"] = `Bearer ${stored}`;
setIsLoading(false);
return;
}
// 2차: refresh token cookie로 복원
try {
const res = await api.post<LoginResponse>("/api/auth/refresh");
const token = res.data.accessToken;
setAccessToken(token);
api.defaults.headers.common["Authorization"] = `Bearer ${token}`;
} catch {
// No valid session
} finally {
setIsLoading(false);
}
};
restoreSession();
}, [setAccessToken]);
useEffect(() => {
if (accessToken) {
api.defaults.headers.common["Authorization"] = `Bearer ${accessToken}`;
} else {
delete api.defaults.headers.common["Authorization"];
}
}, [accessToken]);
const login = useCallback((response: LoginResponse) => {
setAccessToken(response.accessToken);
}, [setAccessToken]);
const logout = useCallback(async () => {
try {
await api.post("/api/auth/logout");
} catch {
// Ignore logout errors
}
setAccessToken(null);
window.location.href = "/login";
}, [setAccessToken]);
// ref로 최신 logout 유지 (interceptor에서 사용)
useEffect(() => {
logoutRef.current = logout;
}, [logout]);
return (
<AuthContext.Provider
value={{
isAuthenticated: !!accessToken,
isLoading,
accessToken,
login,
logout,
setAccessToken,
}}
>
<AuthContext.Provider value={{ isAuthenticated: !!accessToken, isLoading, accessToken, login, logout, setAccessToken: (t) => saveTokens(t) }}>
{children}
</AuthContext.Provider>
);

211
tts-server.py Normal file
View File

@@ -0,0 +1,211 @@
"""
Qwen3-TTS Voice Clone API Server
별도 프로세스로 실행 (GPU 메모리 관리를 위해)
"""
import os
import io
import base64
import tempfile
import torch
import soundfile as sf
import numpy as np
from fastapi import FastAPI, UploadFile, File, Form
from fastapi.responses import StreamingResponse
from fastapi.middleware.cors import CORSMiddleware
import json
import pickle
app = FastAPI()
app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
model = None
PROFILES_DIR = os.path.join(os.path.dirname(__file__), "voice-profiles")
os.makedirs(PROFILES_DIR, exist_ok=True)
def get_model():
global model
if model is None:
from qwen_tts import Qwen3TTSModel
print("Loading Qwen3-TTS model...")
model = Qwen3TTSModel.from_pretrained(
"Qwen/Qwen3-TTS-12Hz-1.7B-Base",
device_map="cuda:0",
dtype=torch.bfloat16,
)
print("Model loaded!")
return model
@app.get("/health")
@app.get("/api/tts/health")
def health():
return {"status": "ok", "model_loaded": model is not None}
@app.post("/api/tts/clone")
async def voice_clone(
text: str = Form(...),
language: str = Form("korean"),
ref_audio: UploadFile = File(...),
ref_text: str = Form(""),
):
"""참조 음성으로 보이스 클로닝"""
m = get_model()
# 참조 음성 저장
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
content = await ref_audio.read()
tmp.write(content)
tmp_path = tmp.name
try:
# wav 변환 (필요 시)
if not ref_audio.filename.endswith(".wav"):
wav_path = tmp_path + "_converted.wav"
os.system(f'ffmpeg -i "{tmp_path}" -ar 16000 -ac 1 -y "{wav_path}" 2>/dev/null')
os.unlink(tmp_path)
tmp_path = wav_path
kwargs = {
"text": text,
"language": language,
"ref_audio": tmp_path,
}
if ref_text and ref_text.strip():
kwargs["ref_text"] = ref_text
else:
kwargs["x_vector_only_mode"] = True
wavs, sr = m.generate_voice_clone(**kwargs)
print(f"Clone generated: wavs={len(wavs)}, samples={len(wavs[0]) if len(wavs) > 0 else 0}, sr={sr}")
audio_data = np.array(wavs[0], dtype=np.float32)
buf = io.BytesIO()
sf.write(buf, audio_data, sr, format="WAV")
buf.seek(0)
return StreamingResponse(buf, media_type="audio/wav",
headers={"Content-Disposition": "attachment; filename=tts_output.wav"})
finally:
if os.path.exists(tmp_path):
os.unlink(tmp_path)
@app.post("/api/tts/design")
async def voice_design(
text: str = Form(...),
language: str = Form("korean"),
instruct: str = Form("A calm, professional Korean male voice"),
):
"""음성 디자인으로 생성 (참조 음성 없이)"""
m = get_model()
wavs, sr = m.generate_voice_design(text=text, instruct=instruct, language=language)
buf = io.BytesIO()
sf.write(buf, wavs[0], sr, format="WAV")
buf.seek(0)
return StreamingResponse(buf, media_type="audio/wav",
headers={"Content-Disposition": "attachment; filename=tts_output.wav"})
@app.post("/api/tts/profiles")
async def create_profile(
name: str = Form(...),
ref_audio: UploadFile = File(...),
ref_text: str = Form(""),
):
"""음성 프로필 등록: 참조 음성으로 보이스 프로필 생성 후 저장"""
m = get_model()
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
content = await ref_audio.read()
tmp.write(content)
tmp_path = tmp.name
try:
if not ref_audio.filename.endswith(".wav"):
wav_path = tmp_path + "_converted.wav"
os.system(f'ffmpeg -i "{tmp_path}" -ar 16000 -ac 1 -y "{wav_path}" 2>/dev/null')
os.unlink(tmp_path)
tmp_path = wav_path
# 프로필 생성
kwargs = {"ref_audio": tmp_path}
if ref_text and ref_text.strip():
kwargs["ref_text"] = ref_text
prompt = m.create_voice_clone_prompt(**kwargs)
else:
kwargs["x_vector_only_mode"] = True
prompt = m.create_voice_clone_prompt(**kwargs)
# 저장
profile_id = name.replace(" ", "_").lower()
profile_path = os.path.join(PROFILES_DIR, f"{profile_id}.pkl")
meta_path = os.path.join(PROFILES_DIR, f"{profile_id}.json")
with open(profile_path, "wb") as f:
pickle.dump(prompt, f)
with open(meta_path, "w") as f:
json.dump({"id": profile_id, "name": name, "ref_text": ref_text}, f, ensure_ascii=False)
return {"id": profile_id, "name": name, "status": "created"}
finally:
if os.path.exists(tmp_path):
os.unlink(tmp_path)
@app.get("/api/tts/profiles")
def list_profiles():
"""저장된 음성 프로필 목록"""
profiles = []
for f in os.listdir(PROFILES_DIR):
if f.endswith(".json"):
with open(os.path.join(PROFILES_DIR, f)) as fh:
profiles.append(json.load(fh))
return profiles
@app.delete("/api/tts/profiles/{profile_id}")
def delete_profile(profile_id: str):
"""음성 프로필 삭제"""
pkl = os.path.join(PROFILES_DIR, f"{profile_id}.pkl")
meta = os.path.join(PROFILES_DIR, f"{profile_id}.json")
if os.path.exists(pkl): os.unlink(pkl)
if os.path.exists(meta): os.unlink(meta)
return {"status": "deleted"}
@app.post("/api/tts/generate")
async def generate_from_profile(
text: str = Form(...),
profile_id: str = Form(...),
language: str = Form("korean"),
):
"""저장된 음성 프로필로 TTS 생성"""
m = get_model()
profile_path = os.path.join(PROFILES_DIR, f"{profile_id}.pkl")
if not os.path.exists(profile_path):
return {"error": f"Profile '{profile_id}' not found"}, 404
with open(profile_path, "rb") as f:
prompt = pickle.load(f)
print(f"Generating with profile '{profile_id}', text='{text[:50]}...', language={language}")
wavs, sr = m.generate_voice_clone(
text=text,
language=language,
voice_clone_prompt=prompt,
)
print(f"Generated: wavs={len(wavs)}, samples={len(wavs[0]) if len(wavs) > 0 else 0}, sr={sr}")
if len(wavs) == 0 or len(wavs[0]) == 0:
return {"error": "Empty audio generated"}, 500
audio_data = np.array(wavs[0], dtype=np.float32)
buf = io.BytesIO()
sf.write(buf, audio_data, sr, format="WAV")
buf.seek(0)
return StreamingResponse(buf, media_type="audio/wav",
headers={"Content-Disposition": "attachment; filename=tts_output.wav"})
if __name__ == "__main__":
import uvicorn
get_model()
uvicorn.run(app, host="0.0.0.0", port=8090)