@@ -37,6 +37,7 @@ export async function sendAndReceiveAudioPath(
3737 sample_rate : number ,
3838 temperature : number ,
3939 speed : number ,
40+ voice ?: string ,
4041 audioPath ?: string ,
4142) {
4243 const data : any = {
@@ -50,6 +51,10 @@ export async function sendAndReceiveAudioPath(
5051 speed : speed ,
5152 } ;
5253
54+ if ( voice ) {
55+ data . voice = voice ;
56+ }
57+
5358 // Add audio path if provided
5459 if ( audioPath ) {
5560 data . audio_path = audioPath ;
@@ -128,6 +133,23 @@ export default function Audio() {
128133 experimentInfo ?. config ?. foundation_model_architecture ;
129134 const adaptor = experimentInfo ?. config ?. adaptor || '' ;
130135
136+ // Fetch model config from gallery
137+ const processedModelId = currentModel
138+ ? currentModel . replace ( / \/ / g, '~~~' )
139+ : null ;
140+
141+ const { data : modelData } = useAPI (
142+ 'models' ,
143+ [ 'getModelDetailsFromGallery' ] ,
144+ {
145+ modelId : processedModelId ,
146+ } ,
147+ {
148+ enabled : ! ! currentModel ,
149+ } ,
150+ ) ;
151+ const modelConfigVoices = modelData ?. model_config ?. voices || { } ;
152+
131153 const { data : audioHistory , mutate : mutateHistory } = useAPI (
132154 'conversations' ,
133155 [ 'getAudioHistory' ] ,
@@ -145,6 +167,8 @@ export default function Audio() {
145167 const [ filePrefix , setFilePrefix ] = React . useState ( 'output_audio' ) ;
146168 const [ sampleRate , setSampleRate ] = React . useState ( 24000 ) ;
147169 const [ temperature , setTemperature ] = React . useState ( 0.7 ) ;
170+ const [ selectedLanguage , setSelectedLanguage ] = React . useState ( '' ) ;
171+ const [ selectedVoice , setSelectedVoice ] = React . useState ( '' ) ;
148172
149173 const [ showSettingsModal , setShowSettingsModal ] = React . useState ( false ) ;
150174
@@ -173,6 +197,7 @@ export default function Audio() {
173197 sampleRate ,
174198 temperature ,
175199 speed ,
200+ selectedVoice || undefined ,
176201 uploadedAudioPath || undefined ,
177202 ) ;
178203
@@ -270,6 +295,48 @@ export default function Audio() {
270295 Generation Settings:
271296 </ Typography >
272297 < Stack spacing = { 3 } sx = { { py : 2 } } >
298+ { /* Voice Selection */ }
299+ { Object . keys ( modelConfigVoices ) . length > 0 && (
300+ < >
301+ < FormControl >
302+ < FormLabel > Language</ FormLabel >
303+ < Select
304+ value = { selectedLanguage }
305+ onChange = { ( _ , v ) => {
306+ setSelectedLanguage ( v as string ) ;
307+ setSelectedVoice ( '' ) ; // Reset voice when language changes
308+ } }
309+ placeholder = "Select language..."
310+ >
311+ { Object . keys ( modelConfigVoices ) . map ( ( language ) => (
312+ < Option key = { language } value = { language } >
313+ { language }
314+ </ Option >
315+ ) ) }
316+ </ Select >
317+ </ FormControl >
318+
319+ { selectedLanguage && modelConfigVoices [ selectedLanguage ] && (
320+ < FormControl >
321+ < FormLabel > Voice</ FormLabel >
322+ < Select
323+ value = { selectedVoice }
324+ onChange = { ( _ , v ) => setSelectedVoice ( v as string ) }
325+ placeholder = "Select voice..."
326+ >
327+ { modelConfigVoices [ selectedLanguage ] . map (
328+ ( voice : string ) => (
329+ < Option key = { voice } value = { voice } >
330+ { voice }
331+ </ Option >
332+ ) ,
333+ ) }
334+ </ Select >
335+ </ FormControl >
336+ ) }
337+ </ >
338+ ) }
339+
273340 { /* Sample Rate */ }
274341 < FormControl >
275342 < FormLabel > Sample Rate</ FormLabel >
0 commit comments