Skip to content

Commit 67c6b1f

Browse files
authored
Merge pull request transformerlab#749 from transformerlab/add/voice-audio
Add/voice audio
2 parents 4e1c81e + bd034c8 commit 67c6b1f

File tree

3 files changed

+78
-1
lines changed

3 files changed

+78
-1
lines changed

src/renderer/components/Experiment/Audio/Audio.tsx

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ export async function sendAndReceiveAudioPath(
3737
sample_rate: number,
3838
temperature: number,
3939
speed: number,
40+
voice?: string,
4041
audioPath?: string,
4142
) {
4243
const data: any = {
@@ -50,6 +51,10 @@ export async function sendAndReceiveAudioPath(
5051
speed: speed,
5152
};
5253

54+
if (voice) {
55+
data.voice = voice;
56+
}
57+
5358
// Add audio path if provided
5459
if (audioPath) {
5560
data.audio_path = audioPath;
@@ -128,6 +133,23 @@ export default function Audio() {
128133
experimentInfo?.config?.foundation_model_architecture;
129134
const adaptor = experimentInfo?.config?.adaptor || '';
130135

136+
// Fetch model config from gallery
137+
const processedModelId = currentModel
138+
? currentModel.replace(/\//g, '~~~')
139+
: null;
140+
141+
const { data: modelData } = useAPI(
142+
'models',
143+
['getModelDetailsFromGallery'],
144+
{
145+
modelId: processedModelId,
146+
},
147+
{
148+
enabled: !!currentModel,
149+
},
150+
);
151+
const modelConfigVoices = modelData?.model_config?.voices || {};
152+
131153
const { data: audioHistory, mutate: mutateHistory } = useAPI(
132154
'conversations',
133155
['getAudioHistory'],
@@ -145,6 +167,8 @@ export default function Audio() {
145167
const [filePrefix, setFilePrefix] = React.useState('output_audio');
146168
const [sampleRate, setSampleRate] = React.useState(24000);
147169
const [temperature, setTemperature] = React.useState(0.7);
170+
const [selectedLanguage, setSelectedLanguage] = React.useState('');
171+
const [selectedVoice, setSelectedVoice] = React.useState('');
148172

149173
const [showSettingsModal, setShowSettingsModal] = React.useState(false);
150174

@@ -173,6 +197,7 @@ export default function Audio() {
173197
sampleRate,
174198
temperature,
175199
speed,
200+
selectedVoice || undefined,
176201
uploadedAudioPath || undefined,
177202
);
178203

@@ -270,6 +295,48 @@ export default function Audio() {
270295
Generation Settings:
271296
</Typography>
272297
<Stack spacing={3} sx={{ py: 2 }}>
298+
{/* Voice Selection */}
299+
{Object.keys(modelConfigVoices).length > 0 && (
300+
<>
301+
<FormControl>
302+
<FormLabel>Language</FormLabel>
303+
<Select
304+
value={selectedLanguage}
305+
onChange={(_, v) => {
306+
setSelectedLanguage(v as string);
307+
setSelectedVoice(''); // Reset voice when language changes
308+
}}
309+
placeholder="Select language..."
310+
>
311+
{Object.keys(modelConfigVoices).map((language) => (
312+
<Option key={language} value={language}>
313+
{language}
314+
</Option>
315+
))}
316+
</Select>
317+
</FormControl>
318+
319+
{selectedLanguage && modelConfigVoices[selectedLanguage] && (
320+
<FormControl>
321+
<FormLabel>Voice</FormLabel>
322+
<Select
323+
value={selectedVoice}
324+
onChange={(_, v) => setSelectedVoice(v as string)}
325+
placeholder="Select voice..."
326+
>
327+
{modelConfigVoices[selectedLanguage].map(
328+
(voice: string) => (
329+
<Option key={voice} value={voice}>
330+
{voice}
331+
</Option>
332+
),
333+
)}
334+
</Select>
335+
</FormControl>
336+
)}
337+
</>
338+
)}
339+
273340
{/* Sample Rate */}
274341
<FormControl>
275342
<FormLabel>Sample Rate</FormLabel>

src/renderer/components/Experiment/Audio/AudioHistory.tsx

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ import { getAPIFullPath } from 'renderer/lib/transformerlab-api-sdk';
1313
import AudioPlayer from '../../Data/AudioPlayer';
1414

1515
interface AudioHistoryItem {
16-
id: string; // Added id property
16+
id: string;
1717
type: string;
1818
text: string;
1919
filename: string;
@@ -23,6 +23,7 @@ interface AudioHistoryItem {
2323
audio_format: string;
2424
sample_rate: number;
2525
temperature: number;
26+
voice?: string;
2627
audio_data_url?: string; // Add audio data URL for the AudioPlayer
2728
}
2829

@@ -103,6 +104,11 @@ const AudioHistory = React.forwardRef<HTMLDivElement, AudioHistoryProps>(
103104
<> + {item.adaptor.split('/').pop()}</>
104105
)}
105106
</Chip>
107+
{item.voice && (
108+
<Chip size="sm" variant="soft" color="neutral">
109+
Voice: {item.voice}
110+
</Chip>
111+
)}
106112
<Chip size="sm" variant="soft" color="neutral">
107113
{item.audio_format.toUpperCase()}
108114
</Chip>

src/renderer/lib/api-client/allEndpoints.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,10 @@
8181
"pipeline_tag": {
8282
"method": "GET",
8383
"path": "model/pipeline_tag?model_name={modelName}"
84+
},
85+
"getModelDetailsFromGallery": {
86+
"method": "GET",
87+
"path": "model/gallery/{modelId}"
8488
}
8589
},
8690
"jobs": {

0 commit comments

Comments
 (0)