Skip to content

Commit 800af65

Browse files
committed
Merge branch 'main' of https://github.com/Plachtaa/seed-vc
2 parents 99b572b + 09d0b5c commit 800af65

File tree

6 files changed

+130
-97
lines changed

6 files changed

+130
-97
lines changed

app_svc.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,9 @@
1111
from pydub import AudioSegment
1212
import argparse
1313
# Load model and configuration
14-
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
1514

1615
fp16 = False
16+
device = None
1717
def load_models(args):
1818
global sr, hop_length, fp16
1919
fp16 = args.fp16
@@ -433,5 +433,8 @@ def main(args):
433433
parser.add_argument("--config-path", type=str, help="Path to the config file", default=None)
434434
parser.add_argument("--share", type=str2bool, nargs="?", const=True, default=False, help="Whether to share the app")
435435
parser.add_argument("--fp16", type=str2bool, nargs="?", const=True, help="Whether to use fp16", default=True)
436+
parser.add_argument("--gpu", type=int, help="Which GPU id to use", default=0)
436437
args = parser.parse_args()
438+
cuda_target = f"cuda:{args.gpu}" if args.gpu else "cuda"
439+
device = torch.device(cuda_target if torch.cuda.is_available() else "cpu")
437440
main(args)

app_vc.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212
import argparse
1313

1414
# Load model and configuration
15-
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
1615
fp16 = False
16+
device = None
1717
def load_models(args):
1818
global sr, hop_length, fp16
1919
fp16 = args.fp16
@@ -386,5 +386,8 @@ def main(args):
386386
parser.add_argument("--config-path", type=str, help="Path to the config file", default=None)
387387
parser.add_argument("--share", type=str2bool, nargs="?", const=True, default=False, help="Whether to share the app")
388388
parser.add_argument("--fp16", type=str2bool, nargs="?", const=True, help="Whether to use fp16", default=True)
389+
parser.add_argument("--gpu", type=int, help="Which GPU id to use", default=0)
389390
args = parser.parse_args()
390-
main(args)
391+
cuda_target = f"cuda:{args.gpu}" if args.gpu else "cuda"
392+
device = torch.device(cuda_target if torch.cuda.is_available() else "cpu")
393+
main(args)

data/ft_dataset.py

Lines changed: 20 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -12,27 +12,26 @@
1212
"max": 30.0,
1313
}
1414
# assume single speaker
15+
def to_mel_fn(wave, mel_fn_args):
16+
return mel_spectrogram(wave, **mel_fn_args)
17+
1518
class FT_Dataset(torch.utils.data.Dataset):
16-
def __init__(self,
17-
data_path,
18-
spect_params,
19-
sr=22050,
20-
batch_size=1,
21-
):
19+
def __init__(
20+
self,
21+
data_path,
22+
spect_params,
23+
sr=22050,
24+
batch_size=1,
25+
):
2226
self.data_path = data_path
23-
# recursively find all files in data_path
2427
self.data = []
2528
for root, _, files in os.walk(data_path):
2629
for file in files:
27-
if (file.endswith(".wav") or
28-
file.endswith(".mp3") or
29-
file.endswith(".flac") or
30-
file.endswith(".ogg") or
31-
file.endswith(".m4a") or
32-
file.endswith(".opus")):
30+
if file.endswith((".wav", ".mp3", ".flac", ".ogg", ".m4a", ".opus")):
3331
self.data.append(os.path.join(root, file))
3432

35-
mel_fn_args = {
33+
self.sr = sr
34+
self.mel_fn_args = {
3635
"n_fft": spect_params['n_fft'],
3736
"win_size": spect_params['win_length'],
3837
"hop_size": spect_params['hop_length'],
@@ -42,11 +41,8 @@ def __init__(self,
4241
"fmax": None if spect_params['fmax'] == "None" else spect_params['fmax'],
4342
"center": False
4443
}
45-
self.to_mel = lambda x: mel_spectrogram(x, **mel_fn_args)
46-
self.sr = sr
4744

4845
assert len(self.data) != 0
49-
# if dataset length is less than batch size, repeat the dataset
5046
while len(self.data) < batch_size:
5147
self.data += self.data
5248

@@ -64,17 +60,14 @@ def __getitem__(self, idx):
6460
if len(speech) < self.sr * duration_setting["min"] or len(speech) > self.sr * duration_setting["max"]:
6561
print(f"Audio {wav_path} is too short or too long, skipping")
6662
return self.__getitem__(random.randint(0, len(self)))
67-
return_dict = {
68-
'audio': speech,
69-
'sr': orig_sr
70-
}
71-
wave, orig_sr = return_dict['audio'], return_dict['sr']
7263
if orig_sr != self.sr:
73-
wave = librosa.resample(wave, orig_sr, self.sr)
74-
wave = torch.from_numpy(wave).float()
75-
mel = self.to_mel(wave.unsqueeze(0)).squeeze(0)
64+
speech = librosa.resample(speech, orig_sr, self.sr)
65+
66+
wave = torch.from_numpy(speech).float().unsqueeze(0)
67+
mel = to_mel_fn(wave, self.mel_fn_args).squeeze(0)
68+
69+
return wave.squeeze(0), mel
7670

77-
return wave, mel
7871

7972
def build_ft_dataloader(data_path, spect_params, sr, batch_size=1, num_workers=0):
8073
dataset = FT_Dataset(data_path, spect_params, sr, batch_size)
@@ -130,4 +123,4 @@ def collate(batch):
130123
wave, mel, wave_lengths, mel_lengths = batch
131124
print(wave.shape, mel.shape)
132125
if idx == 10:
133-
break
126+
break

real-time-gui.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
import torch
3131
from modules.commons import str2bool
3232
# Load model and configuration
33-
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
33+
device = None
3434

3535
flag_vc = False
3636

@@ -328,7 +328,7 @@ def printt(strr, *args):
328328

329329
class Config:
330330
def __init__(self):
331-
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
331+
self.device = device
332332

333333

334334
if __name__ == "__main__":
@@ -1144,5 +1144,8 @@ def get_device_channels(self):
11441144
parser.add_argument("--checkpoint-path", type=str, default=None, help="Path to the model checkpoint")
11451145
parser.add_argument("--config-path", type=str, default=None, help="Path to the vocoder checkpoint")
11461146
parser.add_argument("--fp16", type=str2bool, nargs="?", const=True, help="Whether to use fp16", default=True)
1147+
parser.add_argument("--gpu", type=int, help="Which GPU id to use", default=0)
11471148
args = parser.parse_args()
1149+
cuda_target = f"cuda:{args.gpu}" if args.gpu else "cuda"
1150+
device = torch.device(cuda_target if torch.cuda.is_available() else "cpu")
11481151
gui = GUI(args)

requirements.txt

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
--extra-index-url https://download.pytorch.org/whl/cu113
2-
torch==2.1.0
3-
torchvision==0.16.0
4-
torchaudio==2.1.0
1+
--extra-index-url https://download.pytorch.org/whl/cu121
2+
torch==2.4.0
3+
torchvision==0.19.0
4+
torchaudio==2.4.0
55
scipy==1.13.1
66
librosa==0.10.2
77
huggingface-hub==0.23.4
@@ -17,4 +17,4 @@ FreeSimpleGUI==5.1.1
1717
soundfile==0.12.1
1818
sounddevice==0.5.0
1919
modelscope==1.18.1
20-
funasr==1.1.5
20+
funasr==1.1.5

0 commit comments

Comments
 (0)