Skip to content

Commit b3dbf7c

Browse files
committed
up
1 parent bd5be9d commit b3dbf7c

File tree

11 files changed

+345
-179
lines changed

11 files changed

+345
-179
lines changed

app/api/transcribe/route.ts

Lines changed: 19 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -4,34 +4,29 @@ import Groq from "groq-sdk";
44
// Ensure this route runs on the Node.js runtime
55
export const runtime = "nodejs";
66

7-
const groq = new Groq({ apiKey: process.env.GROQ_API_KEY });
8-
97
export async function POST(req: NextRequest) {
10-
try {
11-
const contentType = req.headers.get("content-type") || "";
12-
if (!contentType.includes("multipart/form-data")) {
13-
return NextResponse.json({ error: "Expected multipart/form-data" }, { status: 400 });
14-
}
8+
const form = await req.formData();
9+
const file = form.get("file");
10+
const apiKey = form.get("apiKey") as string;
11+
12+
if (!file || !(file instanceof File)) {
13+
return NextResponse.json({ error: "Missing file" }, { status: 400 });
14+
}
15+
16+
if (!apiKey) {
17+
return NextResponse.json({ error: "API key required" }, { status: 400 });
18+
}
1519

16-
const form = await req.formData();
17-
const file = form.get("file");
18-
if (!file || !(file instanceof File)) {
19-
return NextResponse.json({ error: "Missing file" }, { status: 400 });
20-
}
20+
const groq = new Groq({ apiKey });
2121

22-
const transcription = await groq.audio.transcriptions.create({
23-
// Pass the web-standard File directly (supported by groq-sdk in Next.js routes)
24-
file,
25-
model: "whisper-large-v3",
26-
temperature: 0,
27-
response_format: "verbose_json",
28-
} as any);
22+
const transcription = await groq.audio.transcriptions.create({
23+
file: file,
24+
model: "whisper-large-v3",
25+
temperature: 0,
26+
response_format: "verbose_json",
27+
});
2928

30-
return NextResponse.json({ text: transcription.text ?? "" });
31-
} catch (error: any) {
32-
console.error("Transcription error", error);
33-
return NextResponse.json({ error: error?.message || "Failed to transcribe" }, { status: 500 });
34-
}
29+
return NextResponse.json({ text: transcription.text ?? "" });
3530
}
3631

3732

app/page.tsx

Lines changed: 127 additions & 141 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import { TranscriptionCard } from "@/components/transcription-card"
66
import { SettingsModal } from "@/components/settings-modal"
77
import { FloatingVoiceWidget } from "@/components/floating-voice-widget"
88
import { useToast } from "@/hooks/use-toast"
9+
import { SettingsStore } from "@/lib/settings-store"
910

1011
interface Transcript {
1112
id: number
@@ -47,160 +48,154 @@ export default function Home() {
4748
},
4849
])
4950

50-
// Handle Alt + M push-to-talk recording
51-
useEffect(() => {
52-
const ensureFloatingWindow = async () => {
53-
const tauri = (window as any).__TAURI__
54-
if (!tauri?.window) return
51+
const ensureFloatingWindow = async () => {
52+
const tauri = (window as any).__TAURI__
53+
if (!tauri?.window) return
54+
55+
try {
5556
const existing = await tauri.window.getAll?.()
5657
const found = existing?.find((w: any) => w.label === "floating")
58+
5759
if (!found) {
58-
new tauri.window.WebviewWindow("floating", {
60+
const { WebviewWindow } = tauri.window
61+
const floatingWindow = new WebviewWindow("floating", {
5962
url: "/floating",
6063
title: "Voice Widget",
6164
decorations: false,
6265
transparent: true,
6366
alwaysOnTop: true,
64-
width: 96,
65-
height: 96,
67+
width: 190,
68+
height: 64,
69+
resizable: false,
70+
skipTaskbar: true,
6671
})
72+
73+
// Position to right side of screen after window is created
74+
setTimeout(async () => {
75+
try {
76+
const mon = await tauri.window.currentMonitor?.()
77+
if (mon?.size) {
78+
const x = Math.max(0, mon.size.width - 200) // 190 width + margin
79+
const y = Math.max(0, Math.floor(mon.size.height / 2 - 32))
80+
await floatingWindow.setPosition({ x, y })
81+
}
82+
} catch {}
83+
}, 100)
6784
} else {
6885
await found.show?.()
6986
await found.setFocus?.()
7087
}
71-
// Position to right side of the primary screen
72-
try {
73-
const { currentMonitor } = tauri.window
74-
const mon = await currentMonitor?.()
75-
if (mon?.size) {
76-
const x = Math.max(0, mon.size.width - 112) // 96 width + margin
77-
const y = Math.max(0, Math.floor(mon.size.height / 2 - 48))
78-
const win = await tauri.window.getWindow?.("floating")
79-
await win?.setPosition({ x, y })
80-
}
81-
} catch {}
82-
}
88+
} catch {}
89+
}
8390

84-
const startRecording = async () => {
85-
if (mediaRecorderRef.current?.state === "recording") return
86-
try {
87-
const stream = await navigator.mediaDevices.getUserMedia({ audio: true })
88-
const mediaRecorder = new MediaRecorder(stream, { mimeType: "audio/webm" })
89-
mediaRecorderRef.current = mediaRecorder
90-
audioChunksRef.current = []
91+
const startRecording = async () => {
92+
if (mediaRecorderRef.current?.state === "recording") return
93+
94+
const tauri = (window as any).__TAURI__
95+
if (tauri?.invoke) {
96+
await tauri.invoke('request_microphone_permission')
97+
}
98+
99+
const stream = await navigator.mediaDevices.getUserMedia({ audio: true })
100+
const mediaRecorder = new MediaRecorder(stream, { mimeType: "audio/webm" })
101+
mediaRecorderRef.current = mediaRecorder
102+
audioChunksRef.current = []
91103

92-
mediaRecorder.ondataavailable = (event) => {
93-
if (event.data.size > 0) audioChunksRef.current.push(event.data)
94-
}
104+
mediaRecorder.ondataavailable = (event) => {
105+
if (event.data.size > 0) audioChunksRef.current.push(event.data)
106+
}
95107

96-
mediaRecorder.onstop = async () => {
97-
const blob = new Blob(audioChunksRef.current, { type: "audio/webm" })
98-
setIsProcessing(true)
99-
try {
100-
const form = new FormData()
101-
form.append("file", blob, `record_${Date.now()}.webm`)
102-
const res = await fetch("/api/transcribe", { method: "POST", body: form })
103-
if (!res.ok) throw new Error("Transcription failed")
104-
const data = await res.json()
105-
// Auto-copy text to clipboard once received
106-
try {
107-
if (data.text) {
108-
await navigator.clipboard.writeText(data.text)
109-
}
110-
} catch {
111-
// ignore clipboard failures
112-
}
113-
const newTranscript: Transcript = {
114-
id: Date.now(),
115-
file: `record_${String(transcripts.length + 1).padStart(3, "0")}.webm`,
116-
text: data.text || "",
117-
date: new Date().toLocaleDateString("en-US", {
118-
month: "short",
119-
day: "numeric",
120-
year: "numeric",
121-
}),
122-
}
123-
setTranscripts((prev) => [newTranscript, ...prev])
124-
toast({ title: "Transcribed & copied", description: "Text copied to clipboard." })
125-
} catch (err: any) {
126-
toast({ title: "Error", description: err?.message || "Failed to transcribe" })
127-
} finally {
128-
setIsProcessing(false)
129-
}
130-
}
108+
mediaRecorder.onstop = async () => {
109+
const blob = new Blob(audioChunksRef.current, { type: "audio/webm" })
110+
setIsProcessing(true)
111+
112+
const store = SettingsStore.getInstance()
113+
const apiKey = store.getApiKey()
114+
115+
if (!apiKey) {
116+
toast({ title: "API Key Required", description: "Please set your Whisper API key in settings." })
117+
setIsProcessing(false)
118+
return
119+
}
120+
121+
const form = new FormData()
122+
form.append("file", blob, `record_${Date.now()}.webm`)
123+
form.append("apiKey", apiKey)
124+
const res = await fetch("/api/transcribe", { method: "POST", body: form })
125+
const data = await res.json()
126+
127+
if (data.text) {
128+
await navigator.clipboard.writeText(data.text)
129+
}
130+
131+
const newTranscript: Transcript = {
132+
id: Date.now(),
133+
file: `record_${String(transcripts.length + 1).padStart(3, "0")}.webm`,
134+
text: data.text || "",
135+
date: new Date().toLocaleDateString("en-US", {
136+
month: "short",
137+
day: "numeric",
138+
year: "numeric",
139+
}),
140+
}
141+
setTranscripts((prev) => [newTranscript, ...prev])
142+
toast({ title: "Transcribed & copied", description: "Text copied to clipboard." })
143+
setIsProcessing(false)
144+
}
131145

132-
mediaRecorder.start(100)
133-
setIsListening(true)
134-
await ensureFloatingWindow()
135-
try { (window as any).__TAURI__?.event?.emit("voice:start") } catch {}
146+
mediaRecorder.start(100)
147+
setIsListening(true)
148+
await ensureFloatingWindow()
149+
try { (window as any).__TAURI__?.event?.emit("voice:start") } catch {}
136150

137-
// Setup analyser for audio level visualization
138-
const audioContext = new (window.AudioContext || (window as any).webkitAudioContext)()
139-
audioContextRef.current = audioContext
140-
const source = audioContext.createMediaStreamSource(stream)
141-
const analyser = audioContext.createAnalyser()
142-
analyser.fftSize = 256
143-
analyserRef.current = analyser
144-
source.connect(analyser)
151+
// Setup analyser for audio level visualization
152+
const audioContext = new (window.AudioContext || (window as any).webkitAudioContext)()
153+
audioContextRef.current = audioContext
154+
const source = audioContext.createMediaStreamSource(stream)
155+
const analyser = audioContext.createAnalyser()
156+
analyser.fftSize = 256
157+
analyserRef.current = analyser
158+
source.connect(analyser)
145159

146-
const dataArray = new Uint8Array(analyser.frequencyBinCount)
147-
const updateLevel = () => {
148-
analyser.getByteTimeDomainData(dataArray)
149-
let sum = 0
150-
for (let i = 0; i < dataArray.length; i++) {
151-
const v = (dataArray[i] - 128) / 128
152-
sum += v * v
153-
}
154-
const rms = Math.sqrt(sum / dataArray.length)
155-
const level = Math.min(1, Math.max(0.1, rms * 2))
156-
setAudioLevel(level)
157-
try { (window as any).__TAURI__?.event?.emit("voice:level", level) } catch {}
158-
animationFrameRef.current = requestAnimationFrame(updateLevel)
159-
}
160-
updateLevel()
161-
} catch (err: any) {
162-
toast({ title: "Mic permission error", description: err?.message || "Unable to record" })
163-
// If permission is blocked, attempt to reset WebView permissions by clearing browsing data
164-
;(async () => {
165-
try {
166-
const tauri = (window as any).__TAURI__
167-
if (tauri?.window?.getAll) {
168-
const wins = await tauri.window.getAll()
169-
for (const w of wins) {
170-
await w.clearAllBrowsingData?.()
171-
}
172-
} else if (tauri?.window?.appWindow?.clearAllBrowsingData) {
173-
await tauri.window.appWindow.clearAllBrowsingData()
174-
}
175-
toast({
176-
title: "Permissions reset",
177-
description: "Cache cleared. Try Alt+M again or restart the app.",
178-
})
179-
} catch {}
180-
})()
160+
const dataArray = new Uint8Array(analyser.frequencyBinCount)
161+
const updateLevel = () => {
162+
analyser.getByteTimeDomainData(dataArray)
163+
let sum = 0
164+
for (let i = 0; i < dataArray.length; i++) {
165+
const v = (dataArray[i] - 128) / 128
166+
sum += v * v
181167
}
168+
const rms = Math.sqrt(sum / dataArray.length)
169+
const level = Math.min(1, Math.max(0.1, rms * 2))
170+
setAudioLevel(level)
171+
try { (window as any).__TAURI__?.event?.emit("voice:level", level) } catch {}
172+
animationFrameRef.current = requestAnimationFrame(updateLevel)
182173
}
174+
updateLevel()
175+
}
183176

184-
const stopRecording = () => {
185-
if (mediaRecorderRef.current && mediaRecorderRef.current.state !== "inactive") {
186-
mediaRecorderRef.current.stop()
187-
}
188-
if (animationFrameRef.current) cancelAnimationFrame(animationFrameRef.current)
189-
if (audioContextRef.current) {
190-
audioContextRef.current.close()
191-
audioContextRef.current = null
192-
}
193-
setIsListening(false)
194-
try { (window as any).__TAURI__?.event?.emit("voice:stop") } catch {}
195-
;(async () => {
196-
try {
197-
const tauri = (window as any).__TAURI__
198-
const win = await tauri?.window?.getWindow?.("floating")
199-
await win?.hide?.()
200-
} catch {}
201-
})()
177+
const stopRecording = () => {
178+
if (mediaRecorderRef.current && mediaRecorderRef.current.state !== "inactive") {
179+
mediaRecorderRef.current.stop()
202180
}
181+
if (animationFrameRef.current) cancelAnimationFrame(animationFrameRef.current)
182+
if (audioContextRef.current) {
183+
audioContextRef.current.close()
184+
audioContextRef.current = null
185+
}
186+
setIsListening(false)
187+
try { (window as any).__TAURI__?.event?.emit("voice:stop") } catch {}
188+
;(async () => {
189+
try {
190+
const tauri = (window as any).__TAURI__
191+
const win = await tauri?.window?.getWindow?.("floating")
192+
await win?.hide?.()
193+
} catch {}
194+
})()
195+
}
203196

197+
// Handle Alt + M push-to-talk recording
198+
useEffect(() => {
204199
const handleKeyDown = (e: KeyboardEvent) => {
205200
if (e.altKey && (e.key === "m" || e.key === "M")) {
206201
e.preventDefault()
@@ -213,7 +208,7 @@ export default function Home() {
213208
}
214209

215210
const handleKeyUp = (e: KeyboardEvent) => {
216-
if ((e.key === "Alt" || e.key === "m" || e.key === "M") && isListening) {
211+
if (e.key === "Alt" && isListening) {
217212
e.preventDefault()
218213
stopRecording()
219214
}
@@ -226,7 +221,7 @@ export default function Home() {
226221
window.removeEventListener("keydown", handleKeyDown)
227222
window.removeEventListener("keyup", handleKeyUp)
228223
}
229-
}, [isListening, isProcessing, transcripts.length, toast])
224+
}, [isListening, isProcessing])
230225

231226
// audioLevel is updated from analyser during recording
232227

@@ -270,15 +265,6 @@ export default function Home() {
270265
</main>
271266

272267
{showSettings && <SettingsModal onClose={() => setShowSettings(false)} />}
273-
274-
{(isListening || isProcessing) && (
275-
<FloatingVoiceWidget
276-
isListening={isListening}
277-
isProcessing={isProcessing}
278-
audioLevel={audioLevel}
279-
onCancel={handleCancel}
280-
/>
281-
)}
282268
</div>
283269
)
284270
}

0 commit comments

Comments
 (0)