Skip to content

Commit b43a999

Browse files
authored
Add a parameter to modify the generated audio length
Add a parameter to modify the generated audio length
1 parent 8e5130f commit b43a999

File tree

1 file changed

+4
-3
lines changed

1 file changed

+4
-3
lines changed

app.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -113,15 +113,15 @@ def adjust_f0_semitones(f0_sequence, n_semitones):
113113

114114
@torch.no_grad()
115115
@torch.inference_mode()
116-
def voice_conversion(source, target, diffusion_steps, length_adjust, inference_cfg_rate, n_quantizers, f0_condition, auto_f0_adjust, pitch_shift, concat_prompt):
116+
def voice_conversion(source, target, diffusion_steps, length_adjust, inference_cfg_rate, n_quantizers, f0_condition, auto_f0_adjust, pitch_shift, concat_prompt,seconds=30):
117117
inference_module = model if not f0_condition else model_f0
118118
# Load audio
119119
source_audio = librosa.load(source, sr=sr)[0]
120120
ref_audio = librosa.load(target, sr=sr)[0]
121121

122122
# Process audio
123-
source_audio = torch.tensor(source_audio[:sr * 30]).unsqueeze(0).float().to(device)
124-
ref_audio = torch.tensor(ref_audio[:sr * 30]).unsqueeze(0).float().to(device)
123+
source_audio = torch.tensor(source_audio[:sr * seconds]).unsqueeze(0).float().to(device)
124+
ref_audio = torch.tensor(ref_audio[:sr * seconds]).unsqueeze(0).float().to(device)
125125

126126
# Resample
127127
source_waves_16k = torchaudio.functional.resample(source_audio, sr, 16000)
@@ -244,6 +244,7 @@ def voice_conversion(source, target, diffusion_steps, length_adjust, inference_c
244244
gr.Slider(label='Pitch shift', minimum=-24, maximum=24, step=1, value=0, info='Pitch shift in semitones, only works when F0 conditioned model is used'),
245245
gr.Checkbox(label="Concat Prompt", value=True,
246246
info="Concat original speech as prompt"),
247+
gr.Slider(minimum=30, maximum=120, value=30, step=1, label="Generated audio length, in seconds", info="Generated audio length, in seconds"),
247248
]
248249

249250
examples = [["examples/source/yae_0.wav", "examples/reference/dingzhen_0.wav", 25, 1.0, 0.7, 1, False, True, 0, True],

0 commit comments

Comments
 (0)