noblebarkrr commited on
Commit
65f2f6a
·
verified ·
1 Parent(s): 954e865

Google Drive support added + minifixes

Browse files
Files changed (6) hide show
  1. app.py +321 -18
  2. args_parser.py +630 -630
  3. extra_utils.py +51 -88
  4. i18n.py +30 -2
  5. inference.py +17 -10
  6. vbach_lib/infer.py +398 -397
app.py CHANGED
@@ -1,3 +1,6 @@
 
 
 
1
  import gradio as gr
2
  import sys
3
  import json
@@ -6,7 +9,7 @@ from urllib.parse import urlparse
6
  from pathlib import Path, PurePosixPath
7
  BASE_DIR = Path(__file__).resolve().parent
8
  sys.path.append(str(BASE_DIR))
9
- from extra_utils import tz, define_audio_with_size, update_audio_with_size, base_c_params, UserDirectory, InputFilesDatabase, OutputDir, one_element_list_to_value, dw_file, dw_yt_dlp
10
  from inference import Separator, add_params, add_params_list, ensemble_types, BASE_DIR
11
  from vbach_lib.infer import VbachConverter, stereo_modes
12
  from vbach_lib.f0_extractor import f0_methods, crepe_like_f0_methods, f0_extract_and_write
@@ -18,10 +21,9 @@ from i18n import _i18n
18
  from args_parser import parse_app_args
19
  import tempfile
20
  import shutil
 
21
  from copy import deepcopy
22
 
23
-
24
-
25
  def generate_add_params_component():
26
  add_params_components = []
27
  for tab, components in add_params.items():
@@ -36,10 +38,183 @@ def generate_add_params_component():
36
  add_params_components.append(gr.Checkbox(label=_i18n(component_name), value=params["default"], info=_i18n(params.get("info", "")), **base_c_params["base"]))
37
  return add_params_components
38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  class History(UserDirectory):
40
  def __init__(self, name: str = "mvsepless"):
41
  super().__init__()
42
- self.history_dir_base = self.user_directory / "history"
43
  self.history_dir_base.mkdir(parents=True, exist_ok=True)
44
  self.history_dict_json = self.history_dir_base / f"{name}.json"
45
  self.history_dict = {}
@@ -67,6 +242,32 @@ class History(UserDirectory):
67
  self.history_dict = json.loads(self.history_dict_json.read_text("utf-8"))
68
  print(_i18n("history_loaded"))
69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  def get_list(self, update_from_file=False):
71
  if update_from_file:
72
  self.load()
@@ -78,7 +279,7 @@ class History(UserDirectory):
78
  self.history_dict.update([(f"{timestamp} | {model_name}", deepcopy(state))])
79
 
80
  def get_from_history(self, key: str):
81
- return deepcopy(self.history_dict.get(key, []))
82
 
83
  class HistoryAutoEnsemble(History):
84
  def __init__(self):
@@ -97,6 +298,30 @@ class HistoryAutoEnsemble(History):
97
  results_ = func(self, *args, **kwargs)
98
  return results_
99
  return wrapper
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
 
101
  @_write_decorator
102
  def add_to_history(self, etype: str, output: str, inverted_output: str, primary_stems_list: list = []):
@@ -124,6 +349,29 @@ class HistoryManualEnsemble(History):
124
  return results_
125
  return wrapper
126
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  @_write_decorator
128
  def add_to_history(self, etype: str, state: str):
129
  timestamp = datetime.now(tz).strftime("%Y-%m-%d_%H-%M-%S")
@@ -150,6 +398,29 @@ class HistorySubtractor(History):
150
  return results_
151
  return wrapper
152
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
  @_write_decorator
154
  def add_to_history(self, itype: str, state: str):
155
  timestamp = datetime.now(tz).strftime("%Y-%m-%d_%H-%M-%S")
@@ -173,6 +444,29 @@ class HistoryVbach(History):
173
  return results_
174
  return wrapper
175
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
  @_write_decorator
177
  def add_to_history(self, model_name: str, f0_method: str, pitch: int, output_files: list):
178
  timestamp = datetime.now(tz).strftime("%Y-%m-%d_%H-%M-%S")
@@ -185,7 +479,7 @@ class AutoEnsembleApp(UserDirectory):
185
  def __init__(self):
186
  super().__init__()
187
  self.state = []
188
- self.ensemble_base = self.user_directory / "ensemble_flows"
189
  self.ensemble_base.mkdir(parents=True, exist_ok=True)
190
 
191
  def write_flow(self, name: str):
@@ -294,7 +588,7 @@ class VbachModelsDir(UserDirectory):
294
 
295
  def __init__(self):
296
  super().__init__()
297
- self.vbach_models_base = self.user_directory / "vbach_models"
298
  self.pth_models_dir = self.vbach_models_base / "pth"
299
  self.index_models_dir = self.vbach_models_base / "index"
300
  self.pth_models_dir.mkdir(parents=True, exist_ok=True)
@@ -408,7 +702,7 @@ class VbachModelsDir(UserDirectory):
408
  class F0GenerateOutPath(UserDirectory):
409
  def __init__(self):
410
  super().__init__()
411
- self.f0_curves_dir = self.user_directory / "f0_curves"
412
  self.f0_curves_dir.mkdir(parents=True, exist_ok=True)
413
 
414
  def generate_output_path(self, name: str, f0_method: str):
@@ -421,7 +715,7 @@ class CustomSeparationModelsDir(UserDirectory):
421
 
422
  def __init__(self):
423
  super().__init__()
424
- self.custom_models_base = self.user_directory / "custom_separation_models"
425
  self.checkpoints_dir = self.custom_models_base / "checkpoints"
426
  self.configs_dir = self.custom_models_base / "configs"
427
  self.checkpoints_dir.mkdir(parents=True, exist_ok=True)
@@ -617,14 +911,8 @@ class App(Separator):
617
  return gr.skip()
618
  return gr.update(choices=current_configs, value=value), current_configs
619
 
620
- def get_actual_custom_sep_history_list(self, value, state):
621
- """Get updated history list"""
622
- current_history = self.custom_sep_history.get_list()
623
- if current_history == state:
624
- return gr.skip()
625
- return gr.update(choices=current_history, value=value), current_history
626
-
627
  def UI(self, theme=None, hf_space_mode=False):
 
628
  all_models = self.get_all_models()
629
  default_model = all_models[0]
630
  stems_default = self.get_stems(default_model)
@@ -1472,7 +1760,7 @@ class App(Separator):
1472
  gr.Warning(_i18n("model_not_selected"))
1473
  return [], gr.skip()
1474
 
1475
- output_dir = self.output_dir.generate("vbach_output")
1476
  download_hubert(embedder_model, use_transformers)
1477
  results = self.vbach_converter.convert_audio(
1478
  audio_input=input_files,
@@ -1769,7 +2057,7 @@ class App(Separator):
1769
  gr.Warning(_i18n("no_f0_file_selected"))
1770
  return update_audio_with_size(label=_i18n("vbach_result"), value=None), gr.skip()
1771
 
1772
- output_dir = self.output_dir.generate("vbach_custom_output")
1773
  download_hubert(embedder_model, use_transformers)
1774
 
1775
  result = self.vbach_converter.convert_audio_custom_f0(
@@ -2072,6 +2360,21 @@ class App(Separator):
2072
  def upload_vbach_index_fn(files: list, progress=gr.Progress(track_tqdm=True)):
2073
  self.vbach_model_manager.upload_index_model(files)
2074
  return gr.update(value=[])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2075
 
2076
  return mvsepless_app
2077
 
 
1
+ import warnings
2
+ warnings.filterwarnings("ignore", category=DeprecationWarning, message=".*show_api.*") # Предупреждения скрыты
3
+ warnings.filterwarnings("ignore", category=DeprecationWarning, message=".*theme.*")
4
  import gradio as gr
5
  import sys
6
  import json
 
9
  from pathlib import Path, PurePosixPath
10
  BASE_DIR = Path(__file__).resolve().parent
11
  sys.path.append(str(BASE_DIR))
12
+ from extra_utils import tz, define_audio_with_size, update_audio_with_size, base_c_params, easy_check_is_colab, get_gdrive_dir, one_element_list_to_value, dw_file, dw_yt_dlp, get_disk_usage
13
  from inference import Separator, add_params, add_params_list, ensemble_types, BASE_DIR
14
  from vbach_lib.infer import VbachConverter, stereo_modes
15
  from vbach_lib.f0_extractor import f0_methods, crepe_like_f0_methods, f0_extract_and_write
 
21
  from args_parser import parse_app_args
22
  import tempfile
23
  import shutil
24
+ from tqdm import tqdm
25
  from copy import deepcopy
26
 
 
 
27
  def generate_add_params_component():
28
  add_params_components = []
29
  for tab, components in add_params.items():
 
38
  add_params_components.append(gr.Checkbox(label=_i18n(component_name), value=params["default"], info=_i18n(params.get("info", "")), **base_c_params["base"]))
39
  return add_params_components
40
 
41
+ USER_DIR = ""
42
+ GDRIVE_DIR = get_gdrive_dir()
43
+ def generate_user_dir_from_gdrive():
44
+ global GDRIVE_DIR
45
+ if GDRIVE_DIR:
46
+ user_dir = Path(GDRIVE_DIR, "MyDrive", "mvsepless-data")
47
+ user_dir.mkdir(parents=True, exist_ok=True)
48
+ return user_dir.as_posix()
49
+ else:
50
+ return None
51
+ GDRIVE_USER_DIR = generate_user_dir_from_gdrive()
52
+
53
+ def get_default_user_dir():
54
+ if easy_check_is_colab():
55
+ if GDRIVE_DIR:
56
+ print(_i18n("gdrive_mount_found"))
57
+ return GDRIVE_USER_DIR
58
+ else:
59
+ return USER_DIR
60
+ else:
61
+ return USER_DIR
62
+
63
+ DEFAULT_USER_DIR = get_default_user_dir()
64
+
65
+ def rename_user_dir_path(path: str, mode=0):
66
+ global GDRIVE_USER_DIR, USER_DIR
67
+ if path:
68
+ if mode == 0:
69
+ return (PurePosixPath(GDRIVE_USER_DIR) / PurePosixPath(path).relative_to(USER_DIR)).as_posix()
70
+ elif mode == 1:
71
+ return (PurePosixPath(USER_DIR) / PurePosixPath(path).relative_to(GDRIVE_USER_DIR)).as_posix()
72
+ else:
73
+ return None
74
+
75
+ base_names_app_dirs = (
76
+ "input",
77
+ "output_mvsepless",
78
+ "history",
79
+ "ensemble_flows",
80
+ "vbach_models",
81
+ "f0_curves",
82
+ "custom_separation_models",
83
+ "vbach_output"
84
+ )
85
+
86
+ def copy_to_gdrive():
87
+ global GDRIVE_DIR, GDRIVE_USER_DIR, USER_DIR
88
+ if GDRIVE_DIR:
89
+ copied_dirs = []
90
+ dirs = [[dir, Path(USER_DIR, dir)] for dir in base_names_app_dirs]
91
+ for (dir_name, dir_path) in tqdm(dirs, desc=_i18n("copy_to_gdrive"), unit=_i18n("dirs")):
92
+ if dir_path.exists():
93
+ shutil.copytree(dir_path, Path(GDRIVE_USER_DIR, dir_name), dirs_exist_ok=True)
94
+ copied_dirs.append("")
95
+ print(_i18n("copied_dirs")+": "+str(len(copied_dirs)))
96
+ print(_i18n("copy_to_gdrive_done"))
97
+ gr.Info(title=_i18n("copy_to_gdrive_done"), message="")
98
+
99
+ def copy_to_runtime():
100
+ global GDRIVE_DIR, GDRIVE_USER_DIR, USER_DIR
101
+ if GDRIVE_DIR:
102
+ copied_dirs = []
103
+ dirs = [[dir, Path(GDRIVE_USER_DIR, dir)] for dir in base_names_app_dirs]
104
+ for (dir_name, dir_path) in tqdm(dirs, desc=_i18n("copy_to_current_user_dir"), unit=_i18n("dirs")):
105
+ if dir_path.exists():
106
+ shutil.copytree(dir_path, Path(USER_DIR, dir_name), dirs_exist_ok=True)
107
+ copied_dirs.append("")
108
+ print(_i18n("copied_dirs")+": "+str(len(copied_dirs)))
109
+ print(_i18n("copy_to_gdrive_done"))
110
+ gr.Info(title=_i18n("copy_to_gdrive_done"), message="")
111
+
112
+ class UserDirectory:
113
+ def __init__(self, custom_dir=USER_DIR):
114
+ self.user_directory = Path(custom_dir if custom_dir else DEFAULT_USER_DIR)
115
+
116
+ def change_dir(self, dir: str):
117
+ self.user_directory = Path(dir)
118
+
119
+ def generate(self, name: str):
120
+ timestamp = datetime.now(tz).strftime("%Y-%m-%d_%H-%M-%S")
121
+ generated_directory = self.user_directory / name / timestamp
122
+ generated_directory.mkdir(parents=True, exist_ok=True)
123
+ return generated_directory
124
+
125
+ def generate_from_dir(self, dir: str):
126
+ timestamp = datetime.now(tz).strftime("%Y-%m-%d_%H-%M-%S")
127
+ generated_directory = Path(dir) / timestamp
128
+ generated_directory.mkdir(parents=True, exist_ok=True)
129
+ return generated_directory
130
+
131
+ class InputFilesDatabase(UserDirectory):
132
+ def __init__(self):
133
+ super().__init__()
134
+ self.input_dir_base = self.user_directory / base_names_app_dirs[0]
135
+ self.input_dir_base.mkdir(parents=True, exist_ok=True)
136
+ self.input_base_json = self.input_dir_base / "inputs.json"
137
+ self.input_base = []
138
+ self.load()
139
+
140
+ def _write_decorator(func):
141
+ def wrapper(self, *args, **kwargs):
142
+ results_ = func(self, *args, **kwargs)
143
+ self.write()
144
+ return results_
145
+ return wrapper
146
+
147
+ def _load_decorator(func):
148
+ def wrapper(self, *args, **kwargs):
149
+ self.load()
150
+ results_ = func(self, *args, **kwargs)
151
+ return results_
152
+ return wrapper
153
+
154
+ @_write_decorator
155
+ def update_data(self, mode: int):
156
+ current_data = deepcopy(self.input_base)
157
+ new_data = []
158
+ if self.input_base_json.exists():
159
+ new_data: list = json.loads(self.input_base_json.read_text("utf-8"))
160
+
161
+ new_data2 = []
162
+ new_data_to_merge = []
163
+
164
+ for file_path in new_data:
165
+ new_data2.append(rename_user_dir_path(file_path, mode=mode))
166
+
167
+ for path2 in new_data2:
168
+ if path2 not in current_data:
169
+ new_data_to_merge.append(path2)
170
+
171
+ self.input_base = list(dict.fromkeys([*current_data, *new_data_to_merge]))
172
+
173
+ def write(self):
174
+ self.input_base_json.write_text(json.dumps(self.input_base, ensure_ascii=False, indent=4), encoding="utf-8")
175
+
176
+ def load(self):
177
+ if self.input_base_json.exists():
178
+ self.input_base = json.loads(self.input_base_json.read_text("utf-8"))
179
+ print(_i18n("input_base_loaded"))
180
+
181
+ @_write_decorator
182
+ def upload(self, files, copy=False):
183
+ input_dir = self.generate_from_dir(self.input_dir_base)
184
+ uploaded_input_files = []
185
+ valid_files = get_audio_files_from_list(files, only_files=True)
186
+ for file in valid_files:
187
+ new_file = Namer.iter(input_dir / Path(file).name)
188
+ if copy:
189
+ shutil.copy2(file, new_file)
190
+ else:
191
+ shutil.move(file, new_file)
192
+ uploaded_input_files.append(new_file)
193
+ self.input_base.extend(uploaded_input_files)
194
+ return uploaded_input_files
195
+
196
+ @_write_decorator
197
+ def clear(self):
198
+ for path in self.input_base:
199
+ Path(path).unlink(missing_ok=True)
200
+ self.input_base.clear()
201
+ print(_i18n("input_base_cleared"))
202
+
203
+ def get_input_list(self):
204
+ return list(reversed(self.input_base))
205
+
206
+ class OutputDir(UserDirectory):
207
+ def __init__(self, dir: str = base_names_app_dirs[1]):
208
+ super().__init__()
209
+ self.output_dir_name = dir
210
+
211
+ def gen_output_dir(self):
212
+ return self.generate(self.output_dir_name)
213
+
214
  class History(UserDirectory):
215
  def __init__(self, name: str = "mvsepless"):
216
  super().__init__()
217
+ self.history_dir_base = self.user_directory / base_names_app_dirs[2]
218
  self.history_dir_base.mkdir(parents=True, exist_ok=True)
219
  self.history_dict_json = self.history_dir_base / f"{name}.json"
220
  self.history_dict = {}
 
242
  self.history_dict = json.loads(self.history_dict_json.read_text("utf-8"))
243
  print(_i18n("history_loaded"))
244
 
245
+ @_write_decorator
246
+ def update_data(self, mode: int):
247
+ current_data = deepcopy(self.history_dict)
248
+ new_data = {}
249
+ if self.history_dict_json.exists():
250
+ new_data: dict = json.loads(self.history_dict_json.read_text("utf-8"))
251
+
252
+ new_data_to_merge = {}
253
+
254
+ for key, state in new_data.items():
255
+ new_state = []
256
+ for basename, stems_list in state:
257
+ new_stems_list = [basename]
258
+ new_stems_list.append([[stem_name, rename_user_dir_path(stem_path, mode=mode)] for stem_name, stem_path in stems_list])
259
+ new_state.extend(deepcopy(new_stems_list))
260
+ new_data[key] = deepcopy(new_state)
261
+
262
+ for key2, state2 in new_data.items():
263
+ if key2 not in list(current_data.keys()) and state2 != current_data.get(key2):
264
+ new_data_to_merge[key2] = state2
265
+
266
+ self.history_dict: dict = {
267
+ **current_data,
268
+ **new_data_to_merge
269
+ }
270
+
271
  def get_list(self, update_from_file=False):
272
  if update_from_file:
273
  self.load()
 
279
  self.history_dict.update([(f"{timestamp} | {model_name}", deepcopy(state))])
280
 
281
  def get_from_history(self, key: str):
282
+ return deepcopy(self.history_dict.get(key, None))
283
 
284
  class HistoryAutoEnsemble(History):
285
  def __init__(self):
 
298
  results_ = func(self, *args, **kwargs)
299
  return results_
300
  return wrapper
301
+
302
+ @_write_decorator
303
+ def update_data(self, mode: int):
304
+ current_data = deepcopy(self.history_dict)
305
+ new_data = {}
306
+ if self.history_dict_json.exists():
307
+ new_data: dict = json.loads(self.history_dict_json.read_text("utf-8"))
308
+ new_data_to_merge = {}
309
+
310
+ for key, state in new_data.items():
311
+ new_state = [
312
+ rename_user_dir_path(state[0], mode=mode), # result
313
+ rename_user_dir_path(state[1], mode=mode), # invert
314
+ [rename_user_dir_path(stem_path, mode=mode) for stem_path in state[2]] # primary_stems_list
315
+ ]
316
+ new_data[key] = deepcopy(new_state)
317
+ for key2, state2 in new_data.items():
318
+ if key2 not in list(current_data.keys()) and state2 != current_data.get(key2):
319
+ new_data_to_merge[key2] = state2
320
+
321
+ self.history_dict: dict = {
322
+ **current_data,
323
+ **new_data_to_merge
324
+ }
325
 
326
  @_write_decorator
327
  def add_to_history(self, etype: str, output: str, inverted_output: str, primary_stems_list: list = []):
 
349
  return results_
350
  return wrapper
351
 
352
+ @_write_decorator
353
+ def update_data(self, mode: int):
354
+ current_data = deepcopy(self.history_dict)
355
+ new_data = {}
356
+ if self.history_dict_json.exists():
357
+ new_data: dict = json.loads(self.history_dict_json.read_text("utf-8"))
358
+ new_data_to_merge = {}
359
+
360
+ for key, state in new_data.items():
361
+ new_state = None
362
+ if state:
363
+ new_state = rename_user_dir_path(state, mode=mode)
364
+ new_data[key] = deepcopy(new_state)
365
+
366
+ for key2, state2 in new_data.items():
367
+ if key2 not in list(current_data.keys()) and state2 != current_data.get(key2):
368
+ new_data_to_merge[key2] = state2
369
+
370
+ self.history_dict: dict = {
371
+ **current_data,
372
+ **new_data_to_merge
373
+ }
374
+
375
  @_write_decorator
376
  def add_to_history(self, etype: str, state: str):
377
  timestamp = datetime.now(tz).strftime("%Y-%m-%d_%H-%M-%S")
 
398
  return results_
399
  return wrapper
400
 
401
+ @_write_decorator
402
+ def update_data(self, mode: int):
403
+ current_data = deepcopy(self.history_dict)
404
+ new_data = {}
405
+ if self.history_dict_json.exists():
406
+ new_data: dict = json.loads(self.history_dict_json.read_text("utf-8"))
407
+ new_data_to_merge = {}
408
+
409
+ for key, state in new_data.items():
410
+ new_state = None
411
+ if state:
412
+ new_state = rename_user_dir_path(state, mode=mode)
413
+ new_data[key] = deepcopy(new_state)
414
+
415
+ for key2, state2 in new_data.items():
416
+ if key2 not in list(current_data.keys()) and state2 != current_data.get(key2):
417
+ new_data_to_merge[key2] = state2
418
+
419
+ self.history_dict: dict = {
420
+ **current_data,
421
+ **new_data_to_merge
422
+ }
423
+
424
  @_write_decorator
425
  def add_to_history(self, itype: str, state: str):
426
  timestamp = datetime.now(tz).strftime("%Y-%m-%d_%H-%M-%S")
 
444
  return results_
445
  return wrapper
446
 
447
+ @_write_decorator
448
+ def update_data(self, mode: int):
449
+ current_data = deepcopy(self.history_dict)
450
+ new_data = {}
451
+ if self.history_dict_json.exists():
452
+ new_data: dict = json.loads(self.history_dict_json.read_text("utf-8"))
453
+ new_data_to_merge = {}
454
+
455
+ for key, state in new_data.items():
456
+ new_state = []
457
+ if state:
458
+ new_state = [rename_user_dir_path(file_path, mode=mode) for file_path in state]
459
+ new_data[key] = deepcopy(new_state)
460
+
461
+ for key2, state2 in new_data.items():
462
+ if key2 not in list(current_data.keys()) and state2 != current_data.get(key2):
463
+ new_data_to_merge[key2] = state2
464
+
465
+ self.history_dict: dict = {
466
+ **current_data,
467
+ **new_data_to_merge
468
+ }
469
+
470
  @_write_decorator
471
  def add_to_history(self, model_name: str, f0_method: str, pitch: int, output_files: list):
472
  timestamp = datetime.now(tz).strftime("%Y-%m-%d_%H-%M-%S")
 
479
  def __init__(self):
480
  super().__init__()
481
  self.state = []
482
+ self.ensemble_base = self.user_directory / base_names_app_dirs[3]
483
  self.ensemble_base.mkdir(parents=True, exist_ok=True)
484
 
485
  def write_flow(self, name: str):
 
588
 
589
  def __init__(self):
590
  super().__init__()
591
+ self.vbach_models_base = self.user_directory / base_names_app_dirs[4]
592
  self.pth_models_dir = self.vbach_models_base / "pth"
593
  self.index_models_dir = self.vbach_models_base / "index"
594
  self.pth_models_dir.mkdir(parents=True, exist_ok=True)
 
702
  class F0GenerateOutPath(UserDirectory):
703
  def __init__(self):
704
  super().__init__()
705
+ self.f0_curves_dir = self.user_directory / base_names_app_dirs[5]
706
  self.f0_curves_dir.mkdir(parents=True, exist_ok=True)
707
 
708
  def generate_output_path(self, name: str, f0_method: str):
 
715
 
716
  def __init__(self):
717
  super().__init__()
718
+ self.custom_models_base = self.user_directory / base_names_app_dirs[6]
719
  self.checkpoints_dir = self.custom_models_base / "checkpoints"
720
  self.configs_dir = self.custom_models_base / "configs"
721
  self.checkpoints_dir.mkdir(parents=True, exist_ok=True)
 
911
  return gr.skip()
912
  return gr.update(choices=current_configs, value=value), current_configs
913
 
 
 
 
 
 
 
 
914
  def UI(self, theme=None, hf_space_mode=False):
915
+ global GDRIVE_DIR, IS_CUSTOM_DIR
916
  all_models = self.get_all_models()
917
  default_model = all_models[0]
918
  stems_default = self.get_stems(default_model)
 
1760
  gr.Warning(_i18n("model_not_selected"))
1761
  return [], gr.skip()
1762
 
1763
+ output_dir = self.output_dir.generate(base_names_app_dirs[7])
1764
  download_hubert(embedder_model, use_transformers)
1765
  results = self.vbach_converter.convert_audio(
1766
  audio_input=input_files,
 
2057
  gr.Warning(_i18n("no_f0_file_selected"))
2058
  return update_audio_with_size(label=_i18n("vbach_result"), value=None), gr.skip()
2059
 
2060
+ output_dir = self.output_dir.generate(base_names_app_dirs[7])
2061
  download_hubert(embedder_model, use_transformers)
2062
 
2063
  result = self.vbach_converter.convert_audio_custom_f0(
 
2360
  def upload_vbach_index_fn(files: list, progress=gr.Progress(track_tqdm=True)):
2361
  self.vbach_model_manager.upload_index_model(files)
2362
  return gr.update(value=[])
2363
+
2364
+ if GDRIVE_USER_DIR:
2365
+ with gr.Tab(_i18n("google_drive")):
2366
+ gdrive_info = gr.Textbox(lines=3, label=_i18n("status"), interactive=False)
2367
+ gr.Timer().tick(lambda: gr.update(value=get_disk_usage(GDRIVE_DIR)), outputs=gdrive_info)
2368
+ copy_to_gdrive_btn = gr.Button(_i18n("copy_from_current_user_dir_to_gdrive"), **base_c_params["base"])
2369
+ @copy_to_gdrive_btn.click()
2370
+ def copy_to_gdrive_fn():
2371
+ copy_to_gdrive()
2372
+ self.input_files.update_data(0)
2373
+ self.history.update_data(0)
2374
+ self.auto_ensemble_history_app.update_data(0)
2375
+ self.manual_ensemble_history_app.update_data(0)
2376
+ self.subtract_history_app.update_data(0)
2377
+ self.vbach_history_app.update_data(0)
2378
 
2379
  return mvsepless_app
2380
 
args_parser.py CHANGED
@@ -1,631 +1,631 @@
1
- import argparse
2
- from pathlib import Path
3
- from i18n import _i18n
4
- BASE_DIR = Path(__file__).resolve().parent
5
- from audio import output_formats
6
-
7
- def tobool(val: str | bool | int):
8
- if isinstance(val, int):
9
- return True if val >= 1 else False
10
- elif isinstance(val, str):
11
- if val in ["y", "yes", "Yes", "true", "True", "1"]:
12
- return True
13
- else:
14
- return False
15
- elif isinstance(val, bool):
16
- return val
17
-
18
- class NestedAction(argparse.Action):
19
- def __call__(self, parser, namespace, values, option_string=None):
20
- # Разбиваем dest по точке, например 'database.host'
21
- group, dest = self.dest.split('.', 1)
22
- # Получаем или создаем вложенный Namespace
23
- groupspace = getattr(namespace, group, argparse.Namespace())
24
- # Устанавливаем значение во вложенный объект
25
- setattr(groupspace, dest, values)
26
- # Сохраняем вложенный объект в основной
27
- setattr(namespace, group, groupspace)
28
-
29
- class NestedStoreTrue(argparse.Action):
30
- def __init__(self, option_strings, dest, default=False, help=None, **kwargs):
31
- # 1. Сразу при создании парсера готовим структуру во вложенном Namespace
32
- super().__init__(option_strings=option_strings, dest=dest, nargs=0, default=default, help=help, **kwargs)
33
-
34
- def __call__(self, parser, namespace, values, option_string=None):
35
- # 2. Если флаг передан, меняем False на True
36
- group, attr = self.dest.split('.', 1)
37
- groupspace = getattr(namespace, group, argparse.Namespace())
38
- setattr(groupspace, attr, True)
39
- setattr(namespace, group, groupspace)
40
-
41
- def parse_separator_args(add_params_args: dict = {}):
42
- parser = argparse.ArgumentParser(
43
- description=_i18n("arg_main_description"),
44
- epilog=_i18n("arg_main_epilog")
45
- )
46
- subparsers = parser.add_subparsers(
47
- title=_i18n("arg_subcommands_title"),
48
- dest="mode",
49
- description=_i18n("arg_subcommands_description"),
50
- help=_i18n("arg_subcommands_help")
51
- )
52
-
53
- # separate
54
- separate_parser = subparsers.add_parser(
55
- "separate",
56
- help=_i18n("arg_separate_help"),
57
- description=_i18n("arg_separate_description"),
58
- epilog=_i18n("arg_separate_epilog")
59
- )
60
-
61
- # custom_separate
62
- custom_separate_parser = subparsers.add_parser(
63
- "custom_separate",
64
- help=_i18n("arg_custom_separate_help"),
65
- description=_i18n("arg_custom_separate_description"),
66
- epilog=_i18n("arg_custom_separate_epilog")
67
- )
68
-
69
- # info
70
- info_parser = subparsers.add_parser(
71
- "info",
72
- help=_i18n("arg_info_help"),
73
- description=_i18n("arg_info_description"),
74
- epilog=_i18n("arg_info_epilog")
75
- )
76
-
77
- # auto_ensemble
78
- auto_ensemble_parser = subparsers.add_parser(
79
- "auto_ensemble",
80
- help=_i18n("arg_auto_ensemble_help"),
81
- description=_i18n("arg_auto_ensemble_description"),
82
- epilog=_i18n("arg_auto_ensemble_epilog")
83
- )
84
-
85
- # manual_ensemble
86
- manual_ensemble_parser = subparsers.add_parser(
87
- "manual_ensemble",
88
- help=_i18n("arg_manual_ensemble_help"),
89
- description=_i18n("arg_manual_ensemble_description"),
90
- epilog=_i18n("arg_manual_ensemble_epilog")
91
- )
92
-
93
- # subtract
94
- subtract_parser = subparsers.add_parser(
95
- "subtract",
96
- help=_i18n("arg_subtract_help"),
97
- description=_i18n("arg_subtract_description"),
98
- epilog=_i18n("arg_subtract_epilog")
99
- )
100
-
101
- # separate
102
- separate_parser.add_argument(
103
- "-i", "--i", "-input", "--input", "--input_files", "--input-files",
104
- nargs="+", dest="input",
105
- help=_i18n("arg_input_help")
106
- )
107
- separate_parser.add_argument(
108
- "-o", "-out", "-output", "--output", "--output_dir", "--output-dir",
109
- type=str, default=".", dest="output_dir",
110
- help=_i18n("arg_output_dir_help")
111
- )
112
- separate_parser.add_argument(
113
- "-of", "-output_fmt", "--output_format", "--output-format",
114
- type=str, choices=output_formats, default=output_formats[0], dest="output_format",
115
- help=_i18n("arg_output_format_help", formats=", ".join(output_formats), default=output_formats[0])
116
- )
117
- separate_parser.add_argument(
118
- "-tm", "-tmplt", "--template", type=str, default="NAME_STEM_MODEL", dest="template",
119
- help=_i18n("arg_template_help", keys=_i18n("template_keys_separate"), example="NAME_STEM_MODEL")
120
- )
121
- separate_parser.add_argument(
122
- "-mn", "-model", "--model_name", "--model-name",
123
- type=str, default="bs_6stem", dest="model_name",
124
- help=_i18n("arg_model_name_help")
125
- )
126
- separate_parser.add_argument(
127
- "-inst", "-ext_inst", "-ext-inst", "--extract_instrumental", "--extract-instrumental",
128
- action="store_true", dest="extract_instrumental",
129
- help=_i18n("arg_extract_instrumental_help")
130
- )
131
- separate_parser.add_argument(
132
- "-ispec", "-spec_invert", "-spec-invert", "--use_spec_invert", "--use-spec-invert",
133
- action="store_true", dest="use_spec_invert",
134
- help=_i18n("arg_use_spec_invert_help")
135
- )
136
- separate_parser.add_argument(
137
- "-st", "--st", "-stems", "--stems", "--selected_stems", "--selected-stems",
138
- nargs="*", metavar="STEM", dest="selected_stems",
139
- help=_i18n("arg_selected_stems_help")
140
- )
141
- for param_name, param_value in add_params_args.items():
142
- param_type = param_value.get("type")
143
- default = param_value.get("default")
144
- separate_parser.add_argument(
145
- f"--{param_name}",
146
- action=NestedStoreTrue if param_type == "bool" else NestedAction,
147
- type=None if param_type == "bool" else (int if param_type == "int" else (float if param_type == "float" else str)),
148
- default=default,
149
- dest=f"add_params.{param_name}",
150
- help=_i18n("arg_add_param_help")
151
- )
152
-
153
- # custom_separate
154
- custom_separate_parser.add_argument(
155
- "-i", "--i", "-input", "--input", "--input_files", "--input-files",
156
- nargs="+", dest="input",
157
- help=_i18n("arg_input_help")
158
- )
159
- custom_separate_parser.add_argument(
160
- "-o", "-out", "-output", "--output", "--output_dir", "--output-dir",
161
- type=str, default=".", dest="output_dir",
162
- help=_i18n("arg_output_dir_help")
163
- )
164
- custom_separate_parser.add_argument(
165
- "-of", "-output_fmt", "--output_format", "--output-format",
166
- type=str, choices=output_formats, default=output_formats[0], dest="output_format",
167
- help=_i18n("arg_output_format_help", formats=", ".join(output_formats), default=output_formats[0])
168
- )
169
- custom_separate_parser.add_argument(
170
- "-tm", "-tmplt", "--template", type=str, default="NAME_STEM_MODEL", dest="template",
171
- help=_i18n("arg_template_help", keys=_i18n("template_keys_separate"), example="NAME_STEM_MODEL")
172
- )
173
- custom_separate_parser.add_argument(
174
- "-mt", "-mtype", "--model_type", "--model-type",
175
- type=str, default="bs_roformer", dest="model_type",
176
- help=_i18n("arg_model_type_help")
177
- )
178
- custom_separate_parser.add_argument(
179
- "-ckpt", "--ckpt", "-checkpoint", "--checkpoint", "--checkpoint_path", "--checkpoint-path",
180
- type=str, required=True, dest="checkpoint_path",
181
- help=_i18n("arg_checkpoint_path_help")
182
- )
183
- custom_separate_parser.add_argument(
184
- "-conf", "--conf", "-config", "--config", "--config_path", "--config-path",
185
- type=str, required=True, dest="config_path",
186
- help=_i18n("arg_config_path_help")
187
- )
188
- custom_separate_parser.add_argument(
189
- "-inst", "-ext_inst", "-ext-inst", "--extract_instrumental", "--extract-instrumental",
190
- action="store_true", dest="extract_instrumental",
191
- help=_i18n("arg_extract_instrumental_help")
192
- )
193
- custom_separate_parser.add_argument(
194
- "-ispec", "-spec_invert", "-spec-invert", "--use_spec_invert", "--use-spec-invert",
195
- action="store_true", dest="use_spec_invert",
196
- help=_i18n("arg_use_spec_invert_help")
197
- )
198
- custom_separate_parser.add_argument(
199
- "-st", "--st", "-stems", "--stems", "--selected_stems", "--selected-stems",
200
- nargs="*", metavar="STEM", dest="selected_stems",
201
- help=_i18n("arg_selected_stems_help")
202
- )
203
- for param_name, param_value in add_params_args.items():
204
- param_type = param_value.get("type")
205
- default = param_value.get("default")
206
- custom_separate_parser.add_argument(
207
- f"--{param_name}",
208
- action=NestedStoreTrue if param_type == "bool" else NestedAction,
209
- type=None if param_type == "bool" else (int if param_type == "int" else (float if param_type == "float" else str)),
210
- default=default,
211
- dest=f"add_params.{param_name}",
212
- help=_i18n("arg_add_param_help")
213
- )
214
-
215
- # auto_ensemble
216
- auto_ensemble_parser.add_argument(
217
- "-i", "--i", "-input", "--input", "--input_file", "--input-file",
218
- type=str, required=True, dest="input",
219
- help=_i18n("arg_input_single_help")
220
- )
221
- auto_ensemble_parser.add_argument(
222
- "-o", "-out", "-output", "--output", "--output_dir", "--output-dir",
223
- type=str, default=".", dest="output_dir",
224
- help=_i18n("arg_output_dir_help")
225
- )
226
- auto_ensemble_parser.add_argument(
227
- "-of", "-output_fmt", "--output_format", "--output-format",
228
- type=str, choices=output_formats, default=output_formats[0], dest="output_format",
229
- help=_i18n("arg_output_format_help", formats=", ".join(output_formats), default=output_formats[0])
230
- )
231
- auto_ensemble_parser.add_argument(
232
- "-tm", "-tmplt", "--template", type=str, default="NAME_TYPE_COUNT", dest="template",
233
- help=_i18n("arg_template_help", keys=_i18n("template_keys_auto_ensemble"), example="NAME_COUNT_TYPE")
234
- )
235
- auto_ensemble_parser.add_argument(
236
- "-t", "-type", "-etype", "--ensemble_type", "--ensemble-type",
237
- type=str, default="avg_fft", dest="ensemble_type",
238
- help=_i18n("arg_ensemble_type_help")
239
- )
240
- auto_ensemble_parser.add_argument(
241
- "-ispec", "-spec_invert", "-spec-invert", "--use_spec_invert", "--use-spec-invert",
242
- action="store_true", dest="use_spec_invert",
243
- help=_i18n("arg_use_spec_invert_help")
244
- )
245
- auto_ensemble_parser.add_argument(
246
- "-save_stems", "-save-stems", "-save_primary_stems", "--save-primary-stems",
247
- action="store_true", dest="save_primary_stems",
248
- help=_i18n("arg_save_primary_stems_help")
249
- )
250
- auto_ensemble_flow_group = auto_ensemble_parser.add_mutually_exclusive_group(required=True)
251
- auto_ensemble_flow_group.add_argument(
252
- "-flow", "--flow", nargs="+", metavar="MODEL:PRIMARY_STEM:INVERT:WEIGHTS",
253
- dest="flow",
254
- help=_i18n("arg_flow_help")
255
- )
256
- auto_ensemble_flow_group.add_argument(
257
- "-json", "-preset", "-preset_json", "-preset-json", "--preset_json", "--preset-json",
258
- type=str, dest="preset",
259
- help=_i18n("arg_preset_json_help")
260
- )
261
-
262
- # manual_ensemble
263
- manual_ensemble_parser.add_argument(
264
- "-i", "--i", "-input", "--input", "--input_files", "--input-files",
265
- nargs="+", dest="input",
266
- help=_i18n("arg_input_help")
267
- )
268
- manual_ensemble_parser.add_argument(
269
- "-o", "-out", "-output", "--output", "--output_dir", "--output-dir",
270
- type=str, default=".", dest="output_dir",
271
- help=_i18n("arg_output_dir_help")
272
- )
273
- manual_ensemble_parser.add_argument(
274
- "-of", "-output_fmt", "--output_format", "--output-format",
275
- type=str, choices=output_formats, default=output_formats[0], dest="output_format",
276
- help=_i18n("arg_output_format_help", formats=", ".join(output_formats), default=output_formats[0])
277
- )
278
- manual_ensemble_parser.add_argument(
279
- "-tm", "-tmplt", "--template", type=str, default="NAME_TYPE", dest="template",
280
- help=_i18n("arg_template_help", keys=_i18n("template_keys_manual_ensemble"), example="NAME_TYPE")
281
- )
282
- manual_ensemble_parser.add_argument(
283
- "-t", "-type", "-etype", "--ensemble_type", "--ensemble-type",
284
- type=str, default="avg_fft", dest="ensemble_type",
285
- help=_i18n("arg_ensemble_type_help")
286
- )
287
- manual_ensemble_parser.add_argument(
288
- "-w", "-weights", "--weights", type=float, nargs="*", dest="weights",
289
- help=_i18n("arg_weights_help")
290
- )
291
-
292
- # subtract
293
- subtract_parser.add_argument(
294
- "-i1", "--i1", "-input1", "--input1", "--input_file1", "--input-file1",
295
- type=str, required=True, dest="input_1",
296
- help=_i18n("arg_input1_help")
297
- )
298
- subtract_parser.add_argument(
299
- "-i2", "--i2", "-input2", "--input2", "--input_file2", "--input-file2",
300
- type=str, required=True, dest="input_2",
301
- help=_i18n("arg_input2_help")
302
- )
303
- subtract_parser.add_argument(
304
- "-o", "-out", "-output", "--output", "--output_dir", "--output-dir",
305
- type=str, default=".", dest="output_dir",
306
- help=_i18n("arg_output_dir_help")
307
- )
308
- subtract_parser.add_argument(
309
- "-of", "-output_fmt", "--output_format", "--output-format",
310
- type=str, choices=output_formats, default=output_formats[0], dest="output_format",
311
- help=_i18n("arg_output_format_help", formats=", ".join(output_formats), default=output_formats[0])
312
- )
313
- subtract_parser.add_argument(
314
- "-tm", "-tmplt", "--template", type=str, default="NAME_TYPE", dest="template",
315
- help=_i18n("arg_template_help", keys=_i18n("template_keys_subtract"), example="NAME_TYPE")
316
- )
317
- subtract_parser.add_argument(
318
- "-ispec", "-spec_invert", "-spec-invert", "--use_spec_invert", "--use-spec-invert",
319
- action="store_true", dest="use_spec_invert",
320
- help=_i18n("arg_use_spec_invert_help")
321
- )
322
-
323
- # info
324
- info_parser.add_argument(
325
- "-u", "-update", "--update", action="store_true", dest="update",
326
- help=_i18n("arg_update_help")
327
- )
328
- info_parser.add_argument(
329
- "-clear", "-clear_cache", "-clear-cache", "--clear_cache", "--clear-cache",
330
- action="store_true", dest="clear_cache",
331
- help=_i18n("arg_clear_cache_help")
332
- )
333
- info_parser.add_argument(
334
- "-mn", "-model", "--model_name", "--model-name",
335
- type=str, default="bs_6stem", dest="model_name",
336
- help=_i18n("arg_model_name_help")
337
- )
338
- info_parser.add_argument(
339
- "-dw", "-download", "--download", action="store_true", dest="download",
340
- help=_i18n("arg_download_help")
341
- )
342
- info_parser.add_argument(
343
- "-l", "-limit", "--limit", type=int, default=None, dest="limit",
344
- help=_i18n("arg_limit_help")
345
- )
346
- info_parser.add_argument(
347
- "-s", "-stem", "--stem", type=str, default=None, dest="stem",
348
- help=_i18n("arg_stem_filter_help")
349
- )
350
- info_parser.add_argument(
351
- "-oi", "-installed", "--only_installed", "--only-installed",
352
- action="store_true", dest="only_installed",
353
- help=_i18n("arg_only_installed_help")
354
- )
355
-
356
- return parser.parse_args()
357
-
358
-
359
- def parse_vbach_args():
360
- parser = argparse.ArgumentParser(
361
- description=_i18n("vbach_main_description"),
362
- epilog=_i18n("vbach_main_epilog")
363
- )
364
- subparsers = parser.add_subparsers(
365
- title=_i18n("arg_subcommands_title"),
366
- dest="mode",
367
- description=_i18n("arg_subcommands_description"),
368
- help=_i18n("arg_subcommands_help")
369
- )
370
-
371
- # infer
372
- infer_parser = subparsers.add_parser(
373
- "infer",
374
- help=_i18n("vbach_infer_help"),
375
- description=_i18n("vbach_infer_description"),
376
- epilog=_i18n("vbach_infer_epilog")
377
- )
378
-
379
- # infer_custom_f0
380
- infer_custom_f0_parser = subparsers.add_parser(
381
- "infer_custom_f0",
382
- help=_i18n("vbach_infer_custom_f0_help"),
383
- description=_i18n("vbach_infer_custom_f0_description"),
384
- epilog=_i18n("vbach_infer_custom_f0_epilog")
385
- )
386
-
387
- # download_hubert
388
- download_hubert_parser = subparsers.add_parser(
389
- "download_hubert",
390
- help=_i18n("vbach_download_hubert_help"),
391
- description=_i18n("vbach_download_hubert_description"),
392
- epilog=_i18n("vbach_download_hubert_epilog")
393
- )
394
-
395
- # infer
396
- infer_parser.add_argument(
397
- "-i", "--i", "-input", "--input", "--input_files", "--input-files",
398
- nargs="+", dest="input",
399
- help=_i18n("arg_input_help")
400
- )
401
- infer_parser.add_argument(
402
- "-o", "-out", "-output", "--output", "--output_dir", "--output-dir",
403
- type=str, default=".", dest="output_dir",
404
- help=_i18n("arg_output_dir_help")
405
- )
406
- infer_parser.add_argument(
407
- "-m", "-model", "--model_path", "--model-path",
408
- type=str, required=True, dest="checkpoint_path",
409
- help=_i18n("vbach_model_path_help")
410
- )
411
- infer_parser.add_argument(
412
- "-idx", "-index", "--index_path", "--index-path",
413
- type=str, default="", dest="index_path",
414
- help=_i18n("vbach_index_path_help")
415
- )
416
- infer_parser.add_argument(
417
- "-p", "-pitch", "--pitch", type=int, default=0, dest="pitch",
418
- help=_i18n("vbach_pitch_help")
419
- )
420
- infer_parser.add_argument(
421
- "-f0m", "-f0_method", "--f0_method", "--f0-method",
422
- type=str, default="rmvpe+", dest="f0_method",
423
- help=_i18n("vbach_f0_method_help")
424
- )
425
- infer_parser.add_argument(
426
- "-idxr", "-index_rate", "--index_rate", "--index-rate",
427
- type=float, default=0.75, dest="index_rate",
428
- help=_i18n("vbach_index_rate_help")
429
- )
430
- infer_parser.add_argument(
431
- "-ve", "-volume_envelope", "--volume_envelope", "--volume-envelope",
432
- type=float, default=0.25, dest="volume_envelope",
433
- help=_i18n("vbach_volume_envelope_help")
434
- )
435
- infer_parser.add_argument(
436
- "-pr", "-protect", "--protect", type=float, default=0.33, dest="protect",
437
- help=_i18n("vbach_protect_help")
438
- )
439
- infer_parser.add_argument(
440
- "-hl", "-hop_length", "--hop_length", "--hop-length",
441
- type=int, default=128, dest="hop_length",
442
- help=_i18n("vbach_hop_length_help")
443
- )
444
- infer_parser.add_argument(
445
- "-emb", "-embedder", "--embedder_model", "--embedder-model",
446
- type=str, default="hubert_base", dest="embedder",
447
- help=_i18n("vbach_embedder_help")
448
- )
449
- infer_parser.add_argument(
450
- "-tf", "-use_transformers", "--use_transformers", "--use-transformers",
451
- action="store_true", dest="use_transformers",
452
- help=_i18n("vbach_use_transformers_help")
453
- )
454
- infer_parser.add_argument(
455
- "-of", "-output_fmt", "--output_format", "--output-format",
456
- type=str, choices=output_formats, default=output_formats[0], dest="output_format",
457
- help=_i18n("arg_output_format_help", formats=", ".join(output_formats), default=output_formats[0])
458
- )
459
- infer_parser.add_argument(
460
- "-stm", "-stereo_mode", "--stereo_mode", "--stereo-mode",
461
- type=str, choices=("mono", "left/right", "sim/dif"), default="mono", dest="stereo_mode",
462
- help=_i18n("vbach_stereo_mode_help")
463
- )
464
- infer_parser.add_argument(
465
- "-f0min", "--f0_min", "--f0-min", type=int, default=50, dest="f0_min",
466
- help=_i18n("vbach_f0_min_help")
467
- )
468
- infer_parser.add_argument(
469
- "-f0max", "--f0_max", "--f0-max", type=int, default=1100, dest="f0_max",
470
- help=_i18n("vbach_f0_max_help")
471
- )
472
- infer_parser.add_argument(
473
- "-chd", "-chunk_duration", "--chunk_duration", "--chunk-duration",
474
- type=int, default=7, dest="chunk_duration",
475
- help=_i18n("vbach_chunk_duration_help")
476
- )
477
- infer_parser.add_argument(
478
- "-tm", "-tmplt", "--template", type=str, default="NAME_F0METHOD_PITCH", dest="template",
479
- help=_i18n("arg_template_help", keys=_i18n("template_keys_vbach"), example="NAME_F0METHOD_PITCH")
480
- )
481
-
482
- # infer_custom_f0
483
- infer_custom_f0_parser.add_argument(
484
- "-i", "--i", "-input", "--input", type=str, required=True, dest="input",
485
- help=_i18n("arg_input_single_help")
486
- )
487
- infer_custom_f0_parser.add_argument(
488
- "-o", "-out", "-output", "--output", "--output_dir", "--output-dir",
489
- type=str, default=".", dest="output_dir",
490
- help=_i18n("arg_output_dir_help")
491
- )
492
- infer_custom_f0_parser.add_argument(
493
- "-m", "-model", "--model_path", "--model-path",
494
- type=str, required=True, dest="checkpoint_path",
495
- help=_i18n("vbach_model_path_help")
496
- )
497
- infer_custom_f0_parser.add_argument(
498
- "-idx", "-index", "--index_path", "--index-path",
499
- type=str, default="", dest="index_path",
500
- help=_i18n("vbach_index_path_help")
501
- )
502
- infer_custom_f0_parser.add_argument(
503
- "-p", "-pitch", "--pitch", type=int, default=0, dest="pitch",
504
- help=_i18n("vbach_pitch_help")
505
- )
506
- infer_custom_f0_parser.add_argument(
507
- "-f0f", "-f0_file", "--f0_file", "--f0-file",
508
- type=str, dest="f0_file",
509
- help=_i18n("vbach_f0_file_help")
510
- )
511
- infer_custom_f0_parser.add_argument(
512
- "-idxr", "-index_rate", "--index_rate", "--index-rate",
513
- type=float, default=0.75, dest="index_rate",
514
- help=_i18n("vbach_index_rate_help")
515
- )
516
- infer_custom_f0_parser.add_argument(
517
- "-ve", "-volume_envelope", "--volume_envelope", "--volume-envelope",
518
- type=float, default=0.25, dest="volume_envelope",
519
- help=_i18n("vbach_volume_envelope_help")
520
- )
521
- infer_custom_f0_parser.add_argument(
522
- "-pr", "-protect", "--protect", type=float, default=0.33, dest="protect",
523
- help=_i18n("vbach_protect_help")
524
- )
525
- infer_custom_f0_parser.add_argument(
526
- "-emb", "-embedder", "--embedder_model", "--embedder-model",
527
- type=str, default="hubert_base", dest="embedder",
528
- help=_i18n("vbach_embedder_help")
529
- )
530
- infer_custom_f0_parser.add_argument(
531
- "-tf", "-use_transformers", "--use_transformers", "--use-transformers",
532
- action="store_true", dest="use_transformers",
533
- help=_i18n("vbach_use_transformers_help")
534
- )
535
- infer_custom_f0_parser.add_argument(
536
- "-of", "-output_fmt", "--output_format", "--output-format",
537
- type=str, choices=output_formats, default=output_formats[0], dest="output_format",
538
- help=_i18n("arg_output_format_help", formats=", ".join(output_formats), default=output_formats[0])
539
- )
540
- infer_custom_f0_parser.add_argument(
541
- "-stm", "-stereo_mode", "--stereo_mode", "--stereo-mode",
542
- type=str, choices=("mono", "left/right", "sim/dif"), default="mono", dest="stereo_mode",
543
- help=_i18n("vbach_stereo_mode_help")
544
- )
545
- infer_custom_f0_parser.add_argument(
546
- "-f0min", "--f0_min", "--f0-min", type=int, default=50, dest="f0_min",
547
- help=_i18n("vbach_f0_min_help")
548
- )
549
- infer_custom_f0_parser.add_argument(
550
- "-f0max", "--f0_max", "--f0-max", type=int, default=1100, dest="f0_max",
551
- help=_i18n("vbach_f0_max_help")
552
- )
553
- infer_custom_f0_parser.add_argument(
554
- "-chd", "-chunk_duration", "--chunk_duration", "--chunk-duration",
555
- type=int, default=7, dest="chunk_duration",
556
- help=_i18n("vbach_chunk_duration_help")
557
- )
558
- infer_custom_f0_parser.add_argument(
559
- "-tm", "-tmplt", "--template", type=str, default="NAME_F0METHOD_PITCH", dest="template",
560
- help=_i18n("arg_template_help", keys=_i18n("template_keys_vbach"), example="NAME_F0METHOD_PITCH")
561
- )
562
-
563
- # download_hubert
564
- download_hubert_parser.add_argument(
565
- "-emb", "-embedder", "--embedder_model", "--embedder-model",
566
- type=str, default="hubert_base", dest="embedder",
567
- help=_i18n("vbach_embedder_help")
568
- )
569
- download_hubert_parser.add_argument(
570
- "-tf", "-use_transformers", "--use_transformers", "--use-transformers",
571
- action="store_true", dest="use_transformers",
572
- help=_i18n("vbach_use_transformers_help")
573
- )
574
-
575
- return parser.parse_args()
576
-
577
-
578
- def parse_f0_extract():
579
- parser = argparse.ArgumentParser(
580
- description=_i18n("f0_extract_description"),
581
- epilog=_i18n("f0_extract_epilog")
582
- )
583
- parser.add_argument(
584
- "-i", "--i", "-input", "--input",
585
- type=str, required=True, dest="input",
586
- help=_i18n("arg_input_single_help")
587
- )
588
- parser.add_argument(
589
- "-f0m", "-f0_method", "--f0_method", "--f0-method",
590
- type=str, default="rmvpe+", dest="f0_method",
591
- help=_i18n("vbach_f0_method_help")
592
- )
593
- parser.add_argument(
594
- "-f0min", "--f0_min", "--f0-min",
595
- type=int, default=50, dest="f0_min",
596
- help=_i18n("vbach_f0_min_help")
597
- )
598
- parser.add_argument(
599
- "-f0max", "--f0_max", "--f0-max",
600
- type=int, default=1100, dest="f0_max",
601
- help=_i18n("vbach_f0_max_help")
602
- )
603
- parser.add_argument(
604
- "-o", "-out", "-output", "--output", "--output_path", "--output-path",
605
- type=str, default=None, dest="output_path",
606
- help=_i18n("f0_extract_output_help")
607
- )
608
- return parser.parse_args()
609
-
610
-
611
- def parse_app_args():
612
- parser = argparse.ArgumentParser(
613
- description=_i18n("app_description"),
614
- epilog=_i18n("app_epilog")
615
- )
616
- parser.add_argument(
617
- "-s", "-share", "--share", "--public", "--gradio_share", "--gradio-share",
618
- action="store_true", dest="share",
619
- help=_i18n("app_share_help")
620
- )
621
- parser.add_argument(
622
- "-p", "-port", "--port", "--server_port", "--server-port",
623
- type=int, default=None, dest="port",
624
- help=_i18n("app_port_help")
625
- )
626
- parser.add_argument(
627
- "-f", "-full", "--full", "--no_hf_mode", "--no-hf-mode",
628
- action="store_true", dest="full",
629
- help=_i18n("app_full_help")
630
- )
631
  return parser.parse_args()
 
1
+ import argparse
2
+ from pathlib import Path
3
+ from i18n import _i18n
4
+ BASE_DIR = Path(__file__).resolve().parent
5
+ from audio import output_formats
6
+
7
+ def tobool(val: str | bool | int):
8
+ if isinstance(val, int):
9
+ return True if val >= 1 else False
10
+ elif isinstance(val, str):
11
+ if val in ["y", "yes", "Yes", "true", "True", "1"]:
12
+ return True
13
+ else:
14
+ return False
15
+ elif isinstance(val, bool):
16
+ return val
17
+
18
+ class NestedAction(argparse.Action):
19
+ def __call__(self, parser, namespace, values, option_string=None):
20
+ # Разбиваем dest по точке, например 'database.host'
21
+ group, dest = self.dest.split('.', 1)
22
+ # Получаем или создаем вложенный Namespace
23
+ groupspace = getattr(namespace, group, argparse.Namespace())
24
+ # Устанавливаем значение во вложенный объект
25
+ setattr(groupspace, dest, values)
26
+ # Сохраняем вложенный объект в основной
27
+ setattr(namespace, group, groupspace)
28
+
29
+ class NestedStoreTrue(argparse.Action):
30
+ def __init__(self, option_strings, dest, default=False, help=None, **kwargs):
31
+ # 1. Сразу при создании парсера готовим структуру во вложенном Namespace
32
+ super().__init__(option_strings=option_strings, dest=dest, nargs=0, default=default, help=help, **kwargs)
33
+
34
+ def __call__(self, parser, namespace, values, option_string=None):
35
+ # 2. Если флаг передан, меняем False на True
36
+ group, attr = self.dest.split('.', 1)
37
+ groupspace = getattr(namespace, group, argparse.Namespace())
38
+ setattr(groupspace, attr, True)
39
+ setattr(namespace, group, groupspace)
40
+
41
+ def parse_separator_args(add_params_args: dict = {}):
42
+ parser = argparse.ArgumentParser(
43
+ description=_i18n("arg_main_description"),
44
+ epilog=_i18n("arg_main_epilog")
45
+ )
46
+ subparsers = parser.add_subparsers(
47
+ title=_i18n("arg_subcommands_title"),
48
+ dest="mode",
49
+ description=_i18n("arg_subcommands_description"),
50
+ help=_i18n("arg_subcommands_help")
51
+ )
52
+
53
+ # separate
54
+ separate_parser = subparsers.add_parser(
55
+ "separate",
56
+ help=_i18n("arg_separate_help"),
57
+ description=_i18n("arg_separate_description"),
58
+ epilog=_i18n("arg_separate_epilog")
59
+ )
60
+
61
+ # custom_separate
62
+ custom_separate_parser = subparsers.add_parser(
63
+ "custom_separate",
64
+ help=_i18n("arg_custom_separate_help"),
65
+ description=_i18n("arg_custom_separate_description"),
66
+ epilog=_i18n("arg_custom_separate_epilog")
67
+ )
68
+
69
+ # info
70
+ info_parser = subparsers.add_parser(
71
+ "info",
72
+ help=_i18n("arg_info_help"),
73
+ description=_i18n("arg_info_description"),
74
+ epilog=_i18n("arg_info_epilog")
75
+ )
76
+
77
+ # auto_ensemble
78
+ auto_ensemble_parser = subparsers.add_parser(
79
+ "auto_ensemble",
80
+ help=_i18n("arg_auto_ensemble_help"),
81
+ description=_i18n("arg_auto_ensemble_description"),
82
+ epilog=_i18n("arg_auto_ensemble_epilog")
83
+ )
84
+
85
+ # manual_ensemble
86
+ manual_ensemble_parser = subparsers.add_parser(
87
+ "manual_ensemble",
88
+ help=_i18n("arg_manual_ensemble_help"),
89
+ description=_i18n("arg_manual_ensemble_description"),
90
+ epilog=_i18n("arg_manual_ensemble_epilog")
91
+ )
92
+
93
+ # subtract
94
+ subtract_parser = subparsers.add_parser(
95
+ "subtract",
96
+ help=_i18n("arg_subtract_help"),
97
+ description=_i18n("arg_subtract_description"),
98
+ epilog=_i18n("arg_subtract_epilog")
99
+ )
100
+
101
+ # separate
102
+ separate_parser.add_argument(
103
+ "-i", "--i", "-input", "--input", "--input_files", "--input-files",
104
+ nargs="+", dest="input",
105
+ help=_i18n("arg_input_help")
106
+ )
107
+ separate_parser.add_argument(
108
+ "-o", "-out", "-output", "--output", "--output_dir", "--output-dir",
109
+ type=str, default=".", dest="output_dir",
110
+ help=_i18n("arg_output_dir_help")
111
+ )
112
+ separate_parser.add_argument(
113
+ "-of", "-output_fmt", "--output_format", "--output-format",
114
+ type=str, choices=output_formats, default=output_formats[0], dest="output_format",
115
+ help=_i18n("arg_output_format_help", formats=", ".join(output_formats), default=output_formats[0])
116
+ )
117
+ separate_parser.add_argument(
118
+ "-tm", "-tmplt", "--template", type=str, default="NAME_STEM_MODEL", dest="template",
119
+ help=_i18n("arg_template_help", keys=_i18n("template_keys_separate"), example="NAME_STEM_MODEL")
120
+ )
121
+ separate_parser.add_argument(
122
+ "-mn", "-model", "--model_name", "--model-name",
123
+ type=str, default="bs_6stem", dest="model_name",
124
+ help=_i18n("arg_model_name_help")
125
+ )
126
+ separate_parser.add_argument(
127
+ "-inst", "-ext_inst", "-ext-inst", "--extract_instrumental", "--extract-instrumental",
128
+ action="store_true", dest="extract_instrumental",
129
+ help=_i18n("arg_extract_instrumental_help")
130
+ )
131
+ separate_parser.add_argument(
132
+ "-ispec", "-spec_invert", "-spec-invert", "--use_spec_invert", "--use-spec-invert",
133
+ action="store_true", dest="use_spec_invert",
134
+ help=_i18n("arg_use_spec_invert_help")
135
+ )
136
+ separate_parser.add_argument(
137
+ "-st", "--st", "-stems", "--stems", "--selected_stems", "--selected-stems",
138
+ nargs="*", metavar="STEM", dest="selected_stems",
139
+ help=_i18n("arg_selected_stems_help")
140
+ )
141
+ for param_name, param_value in add_params_args.items():
142
+ param_type = param_value.get("type")
143
+ default = param_value.get("default")
144
+ separate_parser.add_argument(
145
+ f"--{param_name}",
146
+ action=NestedStoreTrue if param_type == "bool" else NestedAction,
147
+ type=None if param_type == "bool" else (int if param_type == "int" else (float if param_type == "float" else str)),
148
+ default=default,
149
+ dest=f"add_params.{param_name}",
150
+ help=_i18n("arg_add_param_help")
151
+ )
152
+
153
+ # custom_separate
154
+ custom_separate_parser.add_argument(
155
+ "-i", "--i", "-input", "--input", "--input_files", "--input-files",
156
+ nargs="+", dest="input",
157
+ help=_i18n("arg_input_help")
158
+ )
159
+ custom_separate_parser.add_argument(
160
+ "-o", "-out", "-output", "--output", "--output_dir", "--output-dir",
161
+ type=str, default=".", dest="output_dir",
162
+ help=_i18n("arg_output_dir_help")
163
+ )
164
+ custom_separate_parser.add_argument(
165
+ "-of", "-output_fmt", "--output_format", "--output-format",
166
+ type=str, choices=output_formats, default=output_formats[0], dest="output_format",
167
+ help=_i18n("arg_output_format_help", formats=", ".join(output_formats), default=output_formats[0])
168
+ )
169
+ custom_separate_parser.add_argument(
170
+ "-tm", "-tmplt", "--template", type=str, default="NAME_STEM_MODEL", dest="template",
171
+ help=_i18n("arg_template_help", keys=_i18n("template_keys_separate"), example="NAME_STEM_MODEL")
172
+ )
173
+ custom_separate_parser.add_argument(
174
+ "-mt", "-mtype", "--model_type", "--model-type",
175
+ type=str, default="bs_roformer", dest="model_type",
176
+ help=_i18n("arg_model_type_help")
177
+ )
178
+ custom_separate_parser.add_argument(
179
+ "-ckpt", "--ckpt", "-checkpoint", "--checkpoint", "--checkpoint_path", "--checkpoint-path",
180
+ type=str, required=True, dest="checkpoint_path",
181
+ help=_i18n("arg_checkpoint_path_help")
182
+ )
183
+ custom_separate_parser.add_argument(
184
+ "-conf", "--conf", "-config", "--config", "--config_path", "--config-path",
185
+ type=str, required=True, dest="config_path",
186
+ help=_i18n("arg_config_path_help")
187
+ )
188
+ custom_separate_parser.add_argument(
189
+ "-inst", "-ext_inst", "-ext-inst", "--extract_instrumental", "--extract-instrumental",
190
+ action="store_true", dest="extract_instrumental",
191
+ help=_i18n("arg_extract_instrumental_help")
192
+ )
193
+ custom_separate_parser.add_argument(
194
+ "-ispec", "-spec_invert", "-spec-invert", "--use_spec_invert", "--use-spec-invert",
195
+ action="store_true", dest="use_spec_invert",
196
+ help=_i18n("arg_use_spec_invert_help")
197
+ )
198
+ custom_separate_parser.add_argument(
199
+ "-st", "--st", "-stems", "--stems", "--selected_stems", "--selected-stems",
200
+ nargs="*", metavar="STEM", dest="selected_stems",
201
+ help=_i18n("arg_selected_stems_help")
202
+ )
203
+ for param_name, param_value in add_params_args.items():
204
+ param_type = param_value.get("type")
205
+ default = param_value.get("default")
206
+ custom_separate_parser.add_argument(
207
+ f"--{param_name}",
208
+ action=NestedStoreTrue if param_type == "bool" else NestedAction,
209
+ type=None if param_type == "bool" else (int if param_type == "int" else (float if param_type == "float" else str)),
210
+ default=default,
211
+ dest=f"add_params.{param_name}",
212
+ help=_i18n("arg_add_param_help")
213
+ )
214
+
215
+ # auto_ensemble
216
+ auto_ensemble_parser.add_argument(
217
+ "-i", "--i", "-input", "--input", "--input_file", "--input-file",
218
+ type=str, required=True, dest="input",
219
+ help=_i18n("arg_input_single_help")
220
+ )
221
+ auto_ensemble_parser.add_argument(
222
+ "-o", "-out", "-output", "--output", "--output_dir", "--output-dir",
223
+ type=str, default=".", dest="output_dir",
224
+ help=_i18n("arg_output_dir_help")
225
+ )
226
+ auto_ensemble_parser.add_argument(
227
+ "-of", "-output_fmt", "--output_format", "--output-format",
228
+ type=str, choices=output_formats, default=output_formats[0], dest="output_format",
229
+ help=_i18n("arg_output_format_help", formats=", ".join(output_formats), default=output_formats[0])
230
+ )
231
+ auto_ensemble_parser.add_argument(
232
+ "-tm", "-tmplt", "--template", type=str, default="NAME_TYPE_COUNT", dest="template",
233
+ help=_i18n("arg_template_help", keys=_i18n("template_keys_auto_ensemble"), example="NAME_COUNT_TYPE")
234
+ )
235
+ auto_ensemble_parser.add_argument(
236
+ "-t", "-type", "-etype", "--ensemble_type", "--ensemble-type",
237
+ type=str, default="avg_fft", dest="ensemble_type",
238
+ help=_i18n("arg_ensemble_type_help")
239
+ )
240
+ auto_ensemble_parser.add_argument(
241
+ "-ispec", "-spec_invert", "-spec-invert", "--use_spec_invert", "--use-spec-invert",
242
+ action="store_true", dest="use_spec_invert",
243
+ help=_i18n("arg_use_spec_invert_help")
244
+ )
245
+ auto_ensemble_parser.add_argument(
246
+ "-save_stems", "-save-stems", "-save_primary_stems", "--save-primary-stems",
247
+ action="store_true", dest="save_primary_stems",
248
+ help=_i18n("arg_save_primary_stems_help")
249
+ )
250
+ auto_ensemble_flow_group = auto_ensemble_parser.add_mutually_exclusive_group(required=True)
251
+ auto_ensemble_flow_group.add_argument(
252
+ "-flow", "--flow", nargs="+", metavar="MODEL:PRIMARY_STEM:INVERT:WEIGHTS",
253
+ dest="flow",
254
+ help=_i18n("arg_flow_help")
255
+ )
256
+ auto_ensemble_flow_group.add_argument(
257
+ "-json", "-preset", "-preset_json", "-preset-json", "--preset_json", "--preset-json",
258
+ type=str, dest="preset",
259
+ help=_i18n("arg_preset_json_help")
260
+ )
261
+
262
+ # manual_ensemble
263
+ manual_ensemble_parser.add_argument(
264
+ "-i", "--i", "-input", "--input", "--input_files", "--input-files",
265
+ nargs="+", dest="input",
266
+ help=_i18n("arg_input_help")
267
+ )
268
+ manual_ensemble_parser.add_argument(
269
+ "-o", "-out", "-output", "--output", "--output_dir", "--output-dir",
270
+ type=str, default=".", dest="output_dir",
271
+ help=_i18n("arg_output_dir_help")
272
+ )
273
+ manual_ensemble_parser.add_argument(
274
+ "-of", "-output_fmt", "--output_format", "--output-format",
275
+ type=str, choices=output_formats, default=output_formats[0], dest="output_format",
276
+ help=_i18n("arg_output_format_help", formats=", ".join(output_formats), default=output_formats[0])
277
+ )
278
+ manual_ensemble_parser.add_argument(
279
+ "-tm", "-tmplt", "--template", type=str, default="NAME_TYPE", dest="template",
280
+ help=_i18n("arg_template_help", keys=_i18n("template_keys_manual_ensemble"), example="NAME_TYPE")
281
+ )
282
+ manual_ensemble_parser.add_argument(
283
+ "-t", "-type", "-etype", "--ensemble_type", "--ensemble-type",
284
+ type=str, default="avg_fft", dest="ensemble_type",
285
+ help=_i18n("arg_ensemble_type_help")
286
+ )
287
+ manual_ensemble_parser.add_argument(
288
+ "-w", "-weights", "--weights", type=float, nargs="*", dest="weights",
289
+ help=_i18n("arg_weights_help")
290
+ )
291
+
292
+ # subtract
293
+ subtract_parser.add_argument(
294
+ "-i1", "--i1", "-input1", "--input1", "--input_file1", "--input-file1",
295
+ type=str, required=True, dest="input_1",
296
+ help=_i18n("arg_input1_help")
297
+ )
298
+ subtract_parser.add_argument(
299
+ "-i2", "--i2", "-input2", "--input2", "--input_file2", "--input-file2",
300
+ type=str, required=True, dest="input_2",
301
+ help=_i18n("arg_input2_help")
302
+ )
303
+ subtract_parser.add_argument(
304
+ "-o", "-out", "-output", "--output", "--output_dir", "--output-dir",
305
+ type=str, default=".", dest="output_dir",
306
+ help=_i18n("arg_output_dir_help")
307
+ )
308
+ subtract_parser.add_argument(
309
+ "-of", "-output_fmt", "--output_format", "--output-format",
310
+ type=str, choices=output_formats, default=output_formats[0], dest="output_format",
311
+ help=_i18n("arg_output_format_help", formats=", ".join(output_formats), default=output_formats[0])
312
+ )
313
+ subtract_parser.add_argument(
314
+ "-tm", "-tmplt", "--template", type=str, default="NAME_TYPE", dest="template",
315
+ help=_i18n("arg_template_help", keys=_i18n("template_keys_subtract"), example="NAME_TYPE")
316
+ )
317
+ subtract_parser.add_argument(
318
+ "-ispec", "-spec_invert", "-spec-invert", "--use_spec_invert", "--use-spec-invert",
319
+ action="store_true", dest="use_spec_invert",
320
+ help=_i18n("arg_use_spec_invert_help")
321
+ )
322
+
323
+ # info
324
+ info_parser.add_argument(
325
+ "-u", "-update", "--update", action="store_true", dest="update",
326
+ help=_i18n("arg_update_help")
327
+ )
328
+ info_parser.add_argument(
329
+ "-clear", "-clear_cache", "-clear-cache", "--clear_cache", "--clear-cache",
330
+ action="store_true", dest="clear_cache",
331
+ help=_i18n("arg_clear_cache_help")
332
+ )
333
+ info_parser.add_argument(
334
+ "-mn", "-model", "--model_name", "--model-name",
335
+ type=str, default="bs_6stem", dest="model_name",
336
+ help=_i18n("arg_model_name_help")
337
+ )
338
+ info_parser.add_argument(
339
+ "-dw", "-download", "--download", action="store_true", dest="download",
340
+ help=_i18n("arg_download_help")
341
+ )
342
+ info_parser.add_argument(
343
+ "-l", "-limit", "--limit", type=int, default=None, dest="limit",
344
+ help=_i18n("arg_limit_help")
345
+ )
346
+ info_parser.add_argument(
347
+ "-s", "-stem", "--stem", type=str, default=None, dest="stem",
348
+ help=_i18n("arg_stem_filter_help")
349
+ )
350
+ info_parser.add_argument(
351
+ "-oi", "-installed", "--only_installed", "--only-installed",
352
+ action="store_true", dest="only_installed",
353
+ help=_i18n("arg_only_installed_help")
354
+ )
355
+
356
+ return parser.parse_args()
357
+
358
+
359
+ def parse_vbach_args():
360
+ parser = argparse.ArgumentParser(
361
+ description=_i18n("vbach_main_description"),
362
+ epilog=_i18n("vbach_main_epilog")
363
+ )
364
+ subparsers = parser.add_subparsers(
365
+ title=_i18n("arg_subcommands_title"),
366
+ dest="mode",
367
+ description=_i18n("arg_subcommands_description"),
368
+ help=_i18n("arg_subcommands_help")
369
+ )
370
+
371
+ # infer
372
+ infer_parser = subparsers.add_parser(
373
+ "infer",
374
+ help=_i18n("vbach_infer_help"),
375
+ description=_i18n("vbach_infer_description"),
376
+ epilog=_i18n("vbach_infer_epilog")
377
+ )
378
+
379
+ # infer_custom_f0
380
+ infer_custom_f0_parser = subparsers.add_parser(
381
+ "infer_custom_f0",
382
+ help=_i18n("vbach_infer_custom_f0_help"),
383
+ description=_i18n("vbach_infer_custom_f0_description"),
384
+ epilog=_i18n("vbach_infer_custom_f0_epilog")
385
+ )
386
+
387
+ # download_hubert
388
+ download_hubert_parser = subparsers.add_parser(
389
+ "download_hubert",
390
+ help=_i18n("vbach_download_hubert_help"),
391
+ description=_i18n("vbach_download_hubert_description"),
392
+ epilog=_i18n("vbach_download_hubert_epilog")
393
+ )
394
+
395
+ # infer
396
+ infer_parser.add_argument(
397
+ "-i", "--i", "-input", "--input", "--input_files", "--input-files",
398
+ nargs="+", dest="input",
399
+ help=_i18n("arg_input_help")
400
+ )
401
+ infer_parser.add_argument(
402
+ "-o", "-out", "-output", "--output", "--output_dir", "--output-dir",
403
+ type=str, default=".", dest="output_dir",
404
+ help=_i18n("arg_output_dir_help")
405
+ )
406
+ infer_parser.add_argument(
407
+ "-m", "-model", "--model_path", "--model-path",
408
+ type=str, required=True, dest="checkpoint_path",
409
+ help=_i18n("vbach_model_path_help")
410
+ )
411
+ infer_parser.add_argument(
412
+ "-idx", "-index", "--index_path", "--index-path",
413
+ type=str, default="", dest="index_path",
414
+ help=_i18n("vbach_index_path_help")
415
+ )
416
+ infer_parser.add_argument(
417
+ "-p", "-pitch", "--pitch", type=int, default=0, dest="pitch",
418
+ help=_i18n("vbach_pitch_help")
419
+ )
420
+ infer_parser.add_argument(
421
+ "-f0m", "-f0_method", "--f0_method", "--f0-method",
422
+ type=str, default="rmvpe+", dest="f0_method",
423
+ help=_i18n("vbach_f0_method_help")
424
+ )
425
+ infer_parser.add_argument(
426
+ "-idxr", "-index_rate", "--index_rate", "--index-rate",
427
+ type=float, default=0.75, dest="index_rate",
428
+ help=_i18n("vbach_index_rate_help")
429
+ )
430
+ infer_parser.add_argument(
431
+ "-ve", "-volume_envelope", "--volume_envelope", "--volume-envelope",
432
+ type=float, default=0.25, dest="volume_envelope",
433
+ help=_i18n("vbach_volume_envelope_help")
434
+ )
435
+ infer_parser.add_argument(
436
+ "-pr", "-protect", "--protect", type=float, default=0.33, dest="protect",
437
+ help=_i18n("vbach_protect_help")
438
+ )
439
+ infer_parser.add_argument(
440
+ "-hl", "-hop_length", "--hop_length", "--hop-length",
441
+ type=int, default=128, dest="hop_length",
442
+ help=_i18n("vbach_hop_length_help")
443
+ )
444
+ infer_parser.add_argument(
445
+ "-emb", "-embedder", "--embedder_model", "--embedder-model",
446
+ type=str, default="hubert_base", dest="embedder",
447
+ help=_i18n("vbach_embedder_help")
448
+ )
449
+ infer_parser.add_argument(
450
+ "-tf", "-use_transformers", "--use_transformers", "--use-transformers",
451
+ action="store_true", dest="use_transformers",
452
+ help=_i18n("vbach_use_transformers_help")
453
+ )
454
+ infer_parser.add_argument(
455
+ "-of", "-output_fmt", "--output_format", "--output-format",
456
+ type=str, choices=output_formats, default=output_formats[0], dest="output_format",
457
+ help=_i18n("arg_output_format_help", formats=", ".join(output_formats), default=output_formats[0])
458
+ )
459
+ infer_parser.add_argument(
460
+ "-stm", "-stereo_mode", "--stereo_mode", "--stereo-mode",
461
+ type=str, choices=("mono", "left/right", "sim/dif"), default="mono", dest="stereo_mode",
462
+ help=_i18n("vbach_stereo_mode_help")
463
+ )
464
+ infer_parser.add_argument(
465
+ "-f0min", "--f0_min", "--f0-min", type=int, default=50, dest="f0_min",
466
+ help=_i18n("vbach_f0_min_help")
467
+ )
468
+ infer_parser.add_argument(
469
+ "-f0max", "--f0_max", "--f0-max", type=int, default=1100, dest="f0_max",
470
+ help=_i18n("vbach_f0_max_help")
471
+ )
472
+ infer_parser.add_argument(
473
+ "-chd", "-chunk_duration", "--chunk_duration", "--chunk-duration",
474
+ type=int, default=7, dest="chunk_duration",
475
+ help=_i18n("vbach_chunk_duration_help")
476
+ )
477
+ infer_parser.add_argument(
478
+ "-tm", "-tmplt", "--template", type=str, default="NAME_F0METHOD_PITCH", dest="template",
479
+ help=_i18n("arg_template_help", keys=_i18n("template_keys_vbach"), example="NAME_F0METHOD_PITCH")
480
+ )
481
+
482
+ # infer_custom_f0
483
+ infer_custom_f0_parser.add_argument(
484
+ "-i", "--i", "-input", "--input", type=str, required=True, dest="input",
485
+ help=_i18n("arg_input_single_help")
486
+ )
487
+ infer_custom_f0_parser.add_argument(
488
+ "-o", "-out", "-output", "--output", "--output_dir", "--output-dir",
489
+ type=str, default=".", dest="output_dir",
490
+ help=_i18n("arg_output_dir_help")
491
+ )
492
+ infer_custom_f0_parser.add_argument(
493
+ "-m", "-model", "--model_path", "--model-path",
494
+ type=str, required=True, dest="checkpoint_path",
495
+ help=_i18n("vbach_model_path_help")
496
+ )
497
+ infer_custom_f0_parser.add_argument(
498
+ "-idx", "-index", "--index_path", "--index-path",
499
+ type=str, default="", dest="index_path",
500
+ help=_i18n("vbach_index_path_help")
501
+ )
502
+ infer_custom_f0_parser.add_argument(
503
+ "-p", "-pitch", "--pitch", type=int, default=0, dest="pitch",
504
+ help=_i18n("vbach_pitch_help")
505
+ )
506
+ infer_custom_f0_parser.add_argument(
507
+ "-f0f", "-f0_file", "--f0_file", "--f0-file",
508
+ type=str, dest="f0_file",
509
+ help=_i18n("vbach_f0_file_help")
510
+ )
511
+ infer_custom_f0_parser.add_argument(
512
+ "-idxr", "-index_rate", "--index_rate", "--index-rate",
513
+ type=float, default=0.75, dest="index_rate",
514
+ help=_i18n("vbach_index_rate_help")
515
+ )
516
+ infer_custom_f0_parser.add_argument(
517
+ "-ve", "-volume_envelope", "--volume_envelope", "--volume-envelope",
518
+ type=float, default=0.25, dest="volume_envelope",
519
+ help=_i18n("vbach_volume_envelope_help")
520
+ )
521
+ infer_custom_f0_parser.add_argument(
522
+ "-pr", "-protect", "--protect", type=float, default=0.33, dest="protect",
523
+ help=_i18n("vbach_protect_help")
524
+ )
525
+ infer_custom_f0_parser.add_argument(
526
+ "-emb", "-embedder", "--embedder_model", "--embedder-model",
527
+ type=str, default="hubert_base", dest="embedder",
528
+ help=_i18n("vbach_embedder_help")
529
+ )
530
+ infer_custom_f0_parser.add_argument(
531
+ "-tf", "-use_transformers", "--use_transformers", "--use-transformers",
532
+ action="store_true", dest="use_transformers",
533
+ help=_i18n("vbach_use_transformers_help")
534
+ )
535
+ infer_custom_f0_parser.add_argument(
536
+ "-of", "-output_fmt", "--output_format", "--output-format",
537
+ type=str, choices=output_formats, default=output_formats[0], dest="output_format",
538
+ help=_i18n("arg_output_format_help", formats=", ".join(output_formats), default=output_formats[0])
539
+ )
540
+ infer_custom_f0_parser.add_argument(
541
+ "-stm", "-stereo_mode", "--stereo_mode", "--stereo-mode",
542
+ type=str, choices=("mono", "left/right", "sim/dif"), default="mono", dest="stereo_mode",
543
+ help=_i18n("vbach_stereo_mode_help")
544
+ )
545
+ infer_custom_f0_parser.add_argument(
546
+ "-f0min", "--f0_min", "--f0-min", type=int, default=50, dest="f0_min",
547
+ help=_i18n("vbach_f0_min_help")
548
+ )
549
+ infer_custom_f0_parser.add_argument(
550
+ "-f0max", "--f0_max", "--f0-max", type=int, default=1100, dest="f0_max",
551
+ help=_i18n("vbach_f0_max_help")
552
+ )
553
+ infer_custom_f0_parser.add_argument(
554
+ "-chd", "-chunk_duration", "--chunk_duration", "--chunk-duration",
555
+ type=int, default=7, dest="chunk_duration",
556
+ help=_i18n("vbach_chunk_duration_help")
557
+ )
558
+ infer_custom_f0_parser.add_argument(
559
+ "-tm", "-tmplt", "--template", type=str, default="NAME_F0METHOD_PITCH", dest="template",
560
+ help=_i18n("arg_template_help", keys=_i18n("template_keys_vbach"), example="NAME_F0METHOD_PITCH")
561
+ )
562
+
563
+ # download_hubert
564
+ download_hubert_parser.add_argument(
565
+ "-emb", "-embedder", "--embedder_model", "--embedder-model",
566
+ type=str, default="hubert_base", dest="embedder",
567
+ help=_i18n("vbach_embedder_help")
568
+ )
569
+ download_hubert_parser.add_argument(
570
+ "-tf", "-use_transformers", "--use_transformers", "--use-transformers",
571
+ action="store_true", dest="use_transformers",
572
+ help=_i18n("vbach_use_transformers_help")
573
+ )
574
+
575
+ return parser.parse_args()
576
+
577
+
578
+ def parse_f0_extract():
579
+ parser = argparse.ArgumentParser(
580
+ description=_i18n("f0_extract_description"),
581
+ epilog=_i18n("f0_extract_epilog")
582
+ )
583
+ parser.add_argument(
584
+ "-i", "--i", "-input", "--input",
585
+ type=str, required=True, dest="input",
586
+ help=_i18n("arg_input_single_help")
587
+ )
588
+ parser.add_argument(
589
+ "-f0m", "-f0_method", "--f0_method", "--f0-method",
590
+ type=str, default="rmvpe+", dest="f0_method",
591
+ help=_i18n("vbach_f0_method_help")
592
+ )
593
+ parser.add_argument(
594
+ "-f0min", "--f0_min", "--f0-min",
595
+ type=int, default=50, dest="f0_min",
596
+ help=_i18n("vbach_f0_min_help")
597
+ )
598
+ parser.add_argument(
599
+ "-f0max", "--f0_max", "--f0-max",
600
+ type=int, default=1100, dest="f0_max",
601
+ help=_i18n("vbach_f0_max_help")
602
+ )
603
+ parser.add_argument(
604
+ "-o", "-out", "-output", "--output", "--output_path", "--output-path",
605
+ type=str, default=None, dest="output_path",
606
+ help=_i18n("f0_extract_output_help")
607
+ )
608
+ return parser.parse_args()
609
+
610
+
611
+ def parse_app_args():
612
+ parser = argparse.ArgumentParser(
613
+ description=_i18n("app_description"),
614
+ epilog=_i18n("app_epilog")
615
+ )
616
+ parser.add_argument(
617
+ "-s", "-share", "--share", "--public", "--gradio_share", "--gradio-share",
618
+ action="store_true", dest="share",
619
+ help=_i18n("app_share_help")
620
+ )
621
+ parser.add_argument(
622
+ "-p", "-port", "--port", "--server_port", "--server-port",
623
+ type=int, default=None, dest="port",
624
+ help=_i18n("app_port_help")
625
+ )
626
+ parser.add_argument(
627
+ "-f", "-full", "--full", "--no_hf_mode", "--no-hf-mode",
628
+ action="store_true", dest="full",
629
+ help=_i18n("app_full_help")
630
+ )
631
  return parser.parse_args()
extra_utils.py CHANGED
@@ -20,7 +20,7 @@ import ctypes
20
  import platform
21
  import numpy as np
22
  import yt_dlp
23
- import hashlib
24
 
25
  try:
26
  import spaces
@@ -47,7 +47,39 @@ if spaces is not None:
47
  import torch
48
  tz = timezone(timedelta(hours=3))
49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
  class DownloadError(Exception): pass
53
 
@@ -80,9 +112,6 @@ base_c_params = {
80
  }
81
  }
82
 
83
- def get_info():
84
- pass
85
-
86
  def size_readable(size_bytes: int):
87
  if size_bytes == 0:
88
  return f"0 {_i18n('bytes')}"
@@ -95,6 +124,23 @@ def size_readable(size_bytes: int):
95
  i += 1
96
  return f"{size_bytes:.2f} {units[i]}"
97
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
  def define_audio_with_size(basename: bool = False, **kwargs):
99
  path = kwargs.get("value", None)
100
  if not path:
@@ -510,87 +556,4 @@ def extra_clear_torch_cache():
510
  if hasattr(torch._C, "_jit_pass_onnx_clear_scope_records"):
511
  try:
512
  torch._C._jit_pass_onnx_clear_scope_records()
513
- except Exception: pass
514
-
515
- class UserDirectory:
516
- def __init__(self):
517
- self.user_directory = Path('.')
518
-
519
- def change_dir(self, dir: str):
520
- self.user_directory = Path(dir)
521
-
522
- def generate(self, name: str):
523
- timestamp = datetime.now(tz).strftime("%Y-%m-%d_%H-%M-%S")
524
- generated_directory = self.user_directory / name / timestamp
525
- generated_directory.mkdir(parents=True, exist_ok=True)
526
- return generated_directory
527
-
528
- def generate_from_dir(self, dir: str):
529
- timestamp = datetime.now(tz).strftime("%Y-%m-%d_%H-%M-%S")
530
- generated_directory = Path(dir) / timestamp
531
- generated_directory.mkdir(parents=True, exist_ok=True)
532
- return generated_directory
533
-
534
- class InputFilesDatabase(UserDirectory):
535
- def __init__(self):
536
- super().__init__()
537
- self.input_dir_base = self.user_directory / "input"
538
- self.input_dir_base.mkdir(parents=True, exist_ok=True)
539
- self.input_base_json = self.input_dir_base / "inputs.json"
540
- self.input_base = []
541
- self.load()
542
-
543
- def _write_decorator(func):
544
- def wrapper(self, *args, **kwargs):
545
- results_ = func(self, *args, **kwargs)
546
- self.write()
547
- return results_
548
- return wrapper
549
-
550
- def _load_decorator(func):
551
- def wrapper(self, *args, **kwargs):
552
- self.load()
553
- results_ = func(self, *args, **kwargs)
554
- return results_
555
- return wrapper
556
-
557
- def write(self):
558
- self.input_base_json.write_text(json.dumps(self.input_base, ensure_ascii=False, indent=4), encoding="utf-8")
559
-
560
- def load(self):
561
- if self.input_base_json.exists():
562
- self.input_base = json.loads(self.input_base_json.read_text("utf-8"))
563
- print(_i18n("input_base_loaded"))
564
-
565
- @_write_decorator
566
- def upload(self, files, copy=False):
567
- input_dir = self.generate_from_dir(self.input_dir_base)
568
- uploaded_input_files = []
569
- valid_files = get_audio_files_from_list(files, only_files=True)
570
- for file in valid_files:
571
- new_file = Namer.iter(input_dir / Path(file).name)
572
- if copy:
573
- shutil.copy2(file, new_file)
574
- else:
575
- shutil.move(file, new_file)
576
- uploaded_input_files.append(new_file)
577
- self.input_base.extend(uploaded_input_files)
578
- return uploaded_input_files
579
-
580
- @_write_decorator
581
- def clear(self):
582
- for path in self.input_base:
583
- Path(path).unlink(missing_ok=True)
584
- self.input_base.clear()
585
- print(_i18n("input_base_cleared"))
586
-
587
- def get_input_list(self):
588
- return list(reversed(self.input_base))
589
-
590
- class OutputDir(UserDirectory):
591
- def __init__(self, dir: str = "output_mvsepless"):
592
- super().__init__()
593
- self.output_dir_name = dir
594
-
595
- def gen_output_dir(self):
596
- return self.generate(self.output_dir_name)
 
20
  import platform
21
  import numpy as np
22
  import yt_dlp
23
+ import subprocess
24
 
25
  try:
26
  import spaces
 
47
  import torch
48
  tz = timezone(timedelta(hours=3))
49
 
50
+ def get_gdrive_dir():
51
+ try:
52
+ result = subprocess.run(['/bin/mount'], capture_output=True, text=True)
53
+ for line in result.stdout.strip().split('\n'):
54
+ if 'type fuse.drive' in line:
55
+ parts = line.split(' type ')
56
+ if len(parts) >= 2:
57
+ source_mount = parts[0]
58
+ source, mount_point = source_mount.split(' on ')
59
+ return mount_point
60
+ except:
61
+ pass
62
+ return None
63
 
64
+ def easy_check_is_colab() -> bool:
65
+ """
66
+ Проверить, выполняется ли код в Google Colab
67
+
68
+ Returns:
69
+ True если в Colab
70
+ """
71
+ if platform.machine() == "x86_64" and "Linux" in platform.platform():
72
+ try:
73
+ import google.colab
74
+ module_path: str = google.colab.__file__
75
+ if module_path.startswith("/usr/local/lib/python") and module_path.endswith("/dist-packages/google/colab/__init__.py"):
76
+ return True
77
+ else:
78
+ return False
79
+ except ImportError:
80
+ return False
81
+ else:
82
+ return False
83
 
84
  class DownloadError(Exception): pass
85
 
 
112
  }
113
  }
114
 
 
 
 
115
  def size_readable(size_bytes: int):
116
  if size_bytes == 0:
117
  return f"0 {_i18n('bytes')}"
 
124
  i += 1
125
  return f"{size_bytes:.2f} {units[i]}"
126
 
127
+ def get_size_folder(folder: str | Path):
128
+ folder_path = Path(folder)
129
+ return sum([file.stat().st_size for file in folder_path.rglob('*') if file.is_file()])
130
+
131
+ def get_disk_usage(path="/content/drive/MyDrive", user_dir="", user_gdrive_dir="", list_subdirs=[]):
132
+ try:
133
+ usage = shutil.disk_usage(path)
134
+
135
+ total_gb = size_readable(usage.total)
136
+ used_gb = size_readable(usage.used)
137
+ free_gb = size_readable(usage.free)
138
+ return f"""{_i18n("all_space")}: {total_gb}
139
+ {_i18n("used_space")}: {used_gb}
140
+ {_i18n("free_space")}: {free_gb}"""
141
+ except Exception as e:
142
+ return ""
143
+
144
  def define_audio_with_size(basename: bool = False, **kwargs):
145
  path = kwargs.get("value", None)
146
  if not path:
 
556
  if hasattr(torch._C, "_jit_pass_onnx_clear_scope_records"):
557
  try:
558
  torch._C._jit_pass_onnx_clear_scope_records()
559
+ except Exception: pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
i18n.py CHANGED
@@ -551,7 +551,22 @@ TRANSLATIONS: Dict[Language, Dict[str, str]] = {
551
  "no": "Нет",
552
  "zerogpu=true": "Среда выполнения - ZeroGPU",
553
  "ensemble_processing": "Создание ансамбля",
554
- "tracks": "треков"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
555
  },
556
  "en": {
557
  "bytes": "B",
@@ -1087,7 +1102,20 @@ Example: `NAME_F0METHOD_PITCH` → `Song_custom_0`
1087
  "no": "No",
1088
  "zerogpu=true": "Runtime is ZeroGPU",
1089
  "ensemble_processing": "Creating ensemble",
1090
- "tracks": "tracks"
 
 
 
 
 
 
 
 
 
 
 
 
 
1091
  }
1092
  }
1093
 
 
551
  "no": "Нет",
552
  "zerogpu=true": "Среда выполнения - ZeroGPU",
553
  "ensemble_processing": "Создание ансамбля",
554
+ "tracks": "треков",
555
+ "app_user_dir_help": "Путь к директории для хранения пользовательских файлов",
556
+ "gdrive_mount_found": "Обнаружен привязанный Google Диск",
557
+ "copy_to_gdrive": "Копирование данных на Google Диск",
558
+ "dirs": "директорий",
559
+ "copy_to_gdrive_done": "Копирование завершено",
560
+ "copied_dirs": "Скопировано директорий",
561
+ "copy_from_current_user_dir_to_gdrive": "Копировать все пользовательские данные на Google Диск",
562
+ "google_drive": "Google Диск",
563
+ "copy_from_gdrive_to_current_user_dir": "Копировать все пользовательские данные с Google Диска в среду выполнения",
564
+ "copy_to_current_user_dir": "Копирование данных в среду выполнения",
565
+ "free_space": "Свободно",
566
+ "used_space": "Использовано",
567
+ "all_space": "Всего",
568
+ "used_space_data_local": "Обьем пользовательских данных в среде выполнения",
569
+ "used_space_data_gdrive": "Обьем пользовательских данных на Google Диске"
570
  },
571
  "en": {
572
  "bytes": "B",
 
1102
  "no": "No",
1103
  "zerogpu=true": "Runtime is ZeroGPU",
1104
  "ensemble_processing": "Creating ensemble",
1105
+ "tracks": "tracks",
1106
+ "app_user_dir_help": "Path to directories for storing user files",
1107
+ "gdrive_mount_found": "Detected mounted Google Drive",
1108
+ "copy_to_gdrive": "Copying data to Google Drive",
1109
+ "dirs": "directories",
1110
+ "copy_to_gdrive_done": "Copy complete",
1111
+ "copied_dirs": "Directories copied",
1112
+ "copy_from_current_user_dir_to_gdrive": "Copy all user data to Google Drive",
1113
+ "google_drive": "Google Drive",
1114
+ "free_space": "Free",
1115
+ "used_space": "Used",
1116
+ "all_space": "All",
1117
+ "used_space_data_local": "User data space in runtime",
1118
+ "used_space_data_gdrive": "User data space on Google Drive"
1119
  }
1120
  }
1121
 
inference.py CHANGED
@@ -273,24 +273,30 @@ class MSSI: # Music Source Separation Inference
273
  def set_add_params(self, **kwargs):
274
  self.add_params = kwargs
275
 
276
- def load_config(self, model_type: str, conf: str):
277
- conf_ = Path(conf)
 
 
 
 
 
278
  if model_type not in self.model_types:
279
  raise UnknownModelType(_i18n("unknown_model_type", model_type=model_type))
280
  self.model_type = model_type
281
  try:
282
  if self.model_type == "htdemucs":
283
- self.config = OmegaConf.load(conf_)
284
  self.sample_rate = self.config.training.samplerate
285
  else:
286
- with conf_.open("r", encoding="utf-8") as f:
287
  self.config = ConfigDict(yaml.load(f, Loader=yaml.FullLoader))
288
  self.sample_rate = self.config.audio.sample_rate
289
  self.target_instrument = self.config.training.target_instrument
290
  self.instruments = self.config.training.instruments
291
- print(_i18n("config_loaded")+": "+conf.name)
292
  except FileNotFoundError:
293
  self.config = None
 
294
  self.model_type = None
295
  self.target_instrument = None
296
  self.instruments = []
@@ -298,6 +304,7 @@ class MSSI: # Music Source Separation Inference
298
  raise FileNotFoundError(_i18n("config_not_found", path=conf)) from e
299
  except Exception as e:
300
  self.config = None
 
301
  self.model_type = None
302
  self.target_instrument = None
303
  self.instruments = []
@@ -508,13 +515,13 @@ class MSSI: # Music Source Separation Inference
508
  def load_checkpoint(self, ckpt: str | Path):
509
  if not ckpt:
510
  raise PathNotSpecified(_i18n("path_not_specified"))
511
- ckpt_ = Path(ckpt)
512
- if not ckpt_.exists():
 
513
  raise PathNotExist(_i18n("path_not_exist"))
514
  if not self.model:
 
515
  raise ModelNotLoaded(_i18n("model_not_loaded"))
516
-
517
- self.ckpt_path = ckpt_
518
 
519
  if self.model_type == "mdxnet":
520
  try:
@@ -1312,7 +1319,7 @@ class MSSI: # Music Source Separation Inference
1312
  result = self.output_arrays[primary_stem]
1313
  return result, self.sample_rate
1314
 
1315
- def load_model(self, model_type: str, ckpt: str, conf: str):
1316
  self.clear_model()
1317
  self.load_config(model_type=model_type, conf=conf)
1318
  self.load_model_instance()
 
273
  def set_add_params(self, **kwargs):
274
  self.add_params = kwargs
275
 
276
+ def load_config(self, model_type: str, conf: str | Path):
277
+ if not conf:
278
+ raise PathNotSpecified(_i18n("path_not_specified"))
279
+ self.conf_path = Path(conf)
280
+ if not self.conf_path.exists():
281
+ self.conf_path = None
282
+ raise PathNotExist(_i18n("path_not_exist"))
283
  if model_type not in self.model_types:
284
  raise UnknownModelType(_i18n("unknown_model_type", model_type=model_type))
285
  self.model_type = model_type
286
  try:
287
  if self.model_type == "htdemucs":
288
+ self.config = OmegaConf.load(self.conf_path)
289
  self.sample_rate = self.config.training.samplerate
290
  else:
291
+ with self.conf_path.open("r", encoding="utf-8") as f:
292
  self.config = ConfigDict(yaml.load(f, Loader=yaml.FullLoader))
293
  self.sample_rate = self.config.audio.sample_rate
294
  self.target_instrument = self.config.training.target_instrument
295
  self.instruments = self.config.training.instruments
296
+ print(_i18n("config_loaded")+": "+self.conf_path.name)
297
  except FileNotFoundError:
298
  self.config = None
299
+ self.conf_path = None
300
  self.model_type = None
301
  self.target_instrument = None
302
  self.instruments = []
 
304
  raise FileNotFoundError(_i18n("config_not_found", path=conf)) from e
305
  except Exception as e:
306
  self.config = None
307
+ self.conf_path = None
308
  self.model_type = None
309
  self.target_instrument = None
310
  self.instruments = []
 
515
  def load_checkpoint(self, ckpt: str | Path):
516
  if not ckpt:
517
  raise PathNotSpecified(_i18n("path_not_specified"))
518
+ self.ckpt_path = Path(ckpt)
519
+ if not self.ckpt_path.exists():
520
+ self.ckpt_path = None
521
  raise PathNotExist(_i18n("path_not_exist"))
522
  if not self.model:
523
+ self.ckpt_path = None
524
  raise ModelNotLoaded(_i18n("model_not_loaded"))
 
 
525
 
526
  if self.model_type == "mdxnet":
527
  try:
 
1319
  result = self.output_arrays[primary_stem]
1320
  return result, self.sample_rate
1321
 
1322
+ def load_model(self, model_type: str, ckpt: str | Path, conf: str | Path):
1323
  self.clear_model()
1324
  self.load_config(model_type=model_type, conf=conf)
1325
  self.load_model_instance()
vbach_lib/infer.py CHANGED
@@ -1,398 +1,399 @@
1
- from pathlib import Path
2
- import sys
3
- SCRIPT_DIR = Path(__file__).resolve().parent
4
- sys.path.append(str(SCRIPT_DIR.parent))
5
- from extra_utils import hf_spaces_gpu, extra_clear_torch_cache, nuclear_clear_model, emergency_ram_clear
6
- if __package__:
7
- from .hubert_manager import get_hubert, download_hubert, huberts_fairseq
8
- from .pipeline import VC
9
- from .config import Config
10
- from .fairseq import load_model
11
- from .algorithm.synthesizers import Synthesizer
12
- else:
13
- from vbach_lib.hubert_manager import get_hubert, download_hubert, huberts_fairseq
14
- from vbach_lib.pipeline import VC
15
- from vbach_lib.config import Config
16
- from vbach_lib.fairseq import load_model
17
- from vbach_lib.algorithm.synthesizers import Synthesizer
18
- from transformers import HubertModel
19
- from pathlib import Path
20
- import traceback
21
- from audio import read, write, split_channels, split_mid_side, multi_channel_array_from_arrays, output_formats, stereo_to_mono, reshape, mix_arrays, get_audio_files_from_list, check
22
- from inference import PathsNotSpecified, PathNotExist, PathNotSpecified, FileIsNotAudio
23
- from i18n import _i18n
24
- from namer import Namer
25
- from args_parser import parse_vbach_args
26
- import numpy as np
27
- import torch
28
- from torch import nn
29
- import gc
30
-
31
- class VbachModelNotFound(Exception): pass
32
-
33
- stereo_modes = ("mono", "left/right", "sim/dif")
34
-
35
- class HubertModelWithFinalProj(HubertModel):
36
- """Hubert модель с финальной проекцией"""
37
-
38
- def __init__(self, config):
39
- super().__init__(config)
40
- self.final_proj = nn.Linear(config.hidden_size, config.classifier_proj_size)
41
-
42
- def load_audio(path: str | Path, sr: int, stereo_mode: str = stereo_modes[0]):
43
- mixtures = []
44
- add_text = []
45
- if stereo_mode == "mono":
46
- mix, _sr = read(path, sr, mono=True, flatten=True)
47
- mixtures.append(mix)
48
- add_text.append(None)
49
- elif stereo_mode == "left/right":
50
- mix, _sr = read(path, sr, mono=False)
51
- mixtures.extend(split_channels(mix))
52
- add_text.extend(["[L]", "[R]"])
53
- elif stereo_mode == "sim/dif":
54
- mix, _sr = read(path, sr, mono=False)
55
- center, stereo_base = split_mid_side(mix, var=3, sr=sr)
56
- phantom_center = stereo_to_mono(center, to_flatten=True)
57
- stereo_base_left, stereo_base_right = split_channels(stereo_base)
58
- mixtures.extend([phantom_center, stereo_base_left, stereo_base_right])
59
- add_text.extend(["[Sim]", "[Dif-L]", "[Dif-R]"])
60
- return mixtures, add_text
61
-
62
- def post_process_audio(mixtures: list[np.ndarray], target_sr: int, stereo_mode: str = stereo_modes[0]):
63
- if stereo_mode == "mono":
64
- return reshape(mixtures[0], ("channels", "samples"))
65
- elif stereo_mode == "left/right":
66
- dtype = mixtures[0].dtype
67
- return multi_channel_array_from_arrays(*mixtures, index=1, dtype=dtype)
68
- elif stereo_mode == "sim/dif":
69
- sim, dif_l, dif_r = mixtures
70
- dtype = sim.dtype
71
- sim_channel = multi_channel_array_from_arrays(sim, sim, index=1, dtype=dtype)
72
- dif_channel = multi_channel_array_from_arrays(dif_l, dif_r, index=1, dtype=dtype)
73
- return mix_arrays([sim_channel, dif_channel], [target_sr, target_sr], target_sr, index=1, dtype=dtype)[0]
74
-
75
- class VbachConverter:
76
- def __init__(self):
77
- self.config = Config()
78
- self.hubert_model = None
79
- self.cpt = self.version = self.net_g = self.tgt_sr = self.vc = self.use_f0 = self.vocoder = self.emb_weight_shape = self.required_keys = self.missing_keys = self.text_enc_hidden_dim = None
80
- def load_hubert(self, name: str, use_transformers: bool):
81
- if use_transformers:
82
- model_path = get_hubert(name, True)
83
- self.hubert_model = HubertModelWithFinalProj.from_pretrained(model_path)
84
- self.hubert_model = self.hubert_model.to(self.config.device)
85
- else:
86
- model_path = get_hubert(name, False)
87
- self.hubert_model = load_model(model_path)
88
- self.hubert_model = self.hubert_model.to(self.config.device)
89
- self.hubert_model = self.hubert_model.half() if self.config.is_half else self.hubert_model.float()
90
- self.hubert_model.eval()
91
- print(_i18n("hubert_checkpoint_loaded")+": "+name)
92
-
93
- def unload_hubert(self):
94
- self.hubert_model = self.hubert_model.cpu()
95
- self.hubert_model = None
96
- gc.collect()
97
- extra_clear_torch_cache()
98
- nuclear_clear_model()
99
- emergency_ram_clear()
100
-
101
- def unload_model(self):
102
- self.net_g = self.net_g.cpu()
103
- del self.cpt, self.version, self.net_g, self.tgt_sr, self.vc, self.use_f0, self.vocoder, self.emb_weight_shape, self.required_keys, self.missing_keys, self.text_enc_hidden_dim
104
- self.cpt = self.version = self.net_g = self.tgt_sr = self.vc = self.use_f0 = self.vocoder = self.emb_weight_shape = self.required_keys = self.missing_keys = self.text_enc_hidden_dim = None
105
- extra_clear_torch_cache()
106
- nuclear_clear_model()
107
- emergency_ram_clear()
108
-
109
- def clear_gpu_cache(self):
110
- gc.collect()
111
- torch.clear_autocast_cache()
112
- if self.config.device.type == "mps":
113
- torch.mps.empty_cache()
114
- if self.config.device.type == "cuda":
115
- torch.cuda.synchronize()
116
- torch.cuda.ipc_collect()
117
- torch.cuda.empty_cache()
118
-
119
- def get_vc(self, model_path: str | Path, use_transformers: bool):
120
- self.cpt = torch.load(model_path, map_location="cpu", weights_only=True)
121
- self.required_keys = ["config", "weight"]
122
- self.missing_keys = [key for key in self.required_keys if key not in self.cpt]
123
-
124
- self.tgt_sr = self.cpt["config"][-1]
125
-
126
- self.emb_weight_shape = self.cpt["weight"]["emb_g.weight"].shape
127
- self.cpt["config"][-3] = self.emb_weight_shape[0]
128
-
129
- self.use_f0 = self.cpt.get("f0", 1)
130
- self.version = self.cpt.get("version", "v1")
131
- self.vocoder = self.cpt.get("vocoder", "HiFi-GAN")
132
-
133
- self.text_enc_hidden_dim = 768 if self.version == "v2" else 256
134
-
135
- self.net_g = Synthesizer(
136
- *self.cpt["config"],
137
- use_f0=self.use_f0,
138
- text_enc_hidden_dim=self.text_enc_hidden_dim,
139
- vocoder=self.vocoder,
140
- )
141
-
142
- if hasattr(self.net_g, "enc_q"):
143
- del self.net_g.enc_q
144
- else:
145
- pass
146
-
147
- self.net_g.load_state_dict(
148
- self.cpt["weight"], strict=False
149
- )
150
- self.net_g.eval()
151
-
152
- self.net_g = self.net_g.to(self.config.device)
153
- if self.config.is_half:
154
- self.net_g = self.net_g.half()
155
- else:
156
- self.net_g = self.net_g.float()
157
-
158
- self.vc = VC(self.tgt_sr, self.config, use_transformers)
159
- print(_i18n("checkpoint_loaded")+": "+Path(model_path).name)
160
-
161
- @hf_spaces_gpu
162
- def convert_audio(
163
- self,
164
- audio_input: str | Path | list[str | Path],
165
- output_dir: str | Path,
166
- model_path: str,
167
- index_path: str,
168
- pitch: int = 0,
169
- f0_method: str = "rmvpe+",
170
- index_rate: float = 0.75,
171
- volume_envelope: float = 0.25,
172
- protect: float = 0.33,
173
- hop_length: int = 128,
174
- embedder_model: str = "hubert_base",
175
- use_transformers: bool = False,
176
- output_format: str = output_formats[0],
177
- stereo_mode: str = stereo_modes[0],
178
- f0_min: int = 50,
179
- f0_max: int = 1100,
180
- chunk_duration: int = 7,
181
- template: str = "NAME_F0METHOD_PITCH",
182
- **kwargs,
183
- ):
184
- template = Namer.sanitize(template)
185
- template = Namer.dedup_template(template, keys=["NAME", "F0METHOD", "PITCH"])
186
- template = Namer.short(template, length=40)
187
-
188
- if not model_path:
189
- raise VbachModelNotFound()
190
-
191
- self.get_vc(model_path, use_transformers)
192
-
193
- if not self.hubert_model:
194
- self.load_hubert(embedder_model, use_transformers)
195
-
196
- if not output_dir:
197
- output_dir = ""
198
-
199
- output_dir = Path(output_dir)
200
-
201
- input_valid_files = get_audio_files_from_list(audio_input, only_files=False)
202
- if not input_valid_files:
203
- raise PathsNotSpecified(_i18n("paths_not_specified"))
204
-
205
- total = len(input_valid_files)
206
-
207
- print(_i18n("f0_method")+": "+f0_method)
208
-
209
- processed_audios = []
210
-
211
- for i, audio_input_path in enumerate(input_valid_files, start=1):
212
- try:
213
- input_file_name = Path(audio_input_path).stem
214
- mixtures, add_text = load_audio(audio_input_path, 16000, stereo_mode)
215
- print(_i18n("loaded_mix")+": "+input_file_name)
216
- converted_mixtures = []
217
-
218
- for mix, add_text_progress in zip(mixtures, add_text):
219
- audio_max = np.abs(mix).max() / 0.95
220
- if audio_max > 1:
221
- mix /= audio_max
222
- audio_opt = self.vc.pipeline(
223
- model=self.hubert_model,
224
- net_g=self.net_g,
225
- sid=0,
226
- audio=mix,
227
- pitch=pitch,
228
- f0_method=f0_method,
229
- hop_length=hop_length,
230
- file_index=index_path,
231
- index_rate=index_rate,
232
- pitch_guidance=self.use_f0,
233
- volume_envelope=volume_envelope,
234
- version=self.version,
235
- protect=protect,
236
- tgt_sr=self.tgt_sr,
237
- f0_min=f0_min,
238
- f0_max=f0_max,
239
- chunk_duration=chunk_duration,
240
- add_text_channel=add_text_progress,
241
- add_text_custom=f"{i}/{total} {_i18n('files')}",
242
- )
243
- converted_mixtures.append(audio_opt)
244
- custom_name = Namer.template(
245
- template,
246
- PITCH=pitch,
247
- F0METHOD=f0_method,
248
- NAME=Namer.short_input_name_template(template, PITCH=pitch, F0METHOD=f0_method, NAME=input_file_name)
249
- )
250
- processed_audios.append(write(Namer.iter(output_dir / f"{custom_name}.{output_format}"), post_process_audio(converted_mixtures, self.tgt_sr, stereo_mode), self.tgt_sr))
251
- except Exception as e:
252
- traceback.print_exc()
253
-
254
- self.unload_model()
255
- self.unload_hubert()
256
-
257
- return processed_audios
258
-
259
- @hf_spaces_gpu
260
- def convert_audio_custom_f0(
261
- self,
262
- audio_input: str | Path,
263
- output_dir: str | Path,
264
- model_path: str,
265
- index_path: str,
266
- pitch: int = 0,
267
- f0_file: str | Path = None,
268
- index_rate: float = 0.75,
269
- volume_envelope: float = 0.25,
270
- protect: float = 0.33,
271
- embedder_model: str = "hubert_base",
272
- use_transformers: bool = False,
273
- output_format: str = output_formats[0],
274
- f0_min: int = 50,
275
- f0_max: int = 1100,
276
- chunk_duration: int = 7,
277
- template: str = "NAME_F0METHOD_PITCH",
278
- **kwargs,
279
- ):
280
- template = Namer.sanitize(template)
281
- template = Namer.dedup_template(template, keys=["NAME", "F0METHOD", "PITCH"])
282
- template = Namer.short(template, length=40)
283
-
284
- if not model_path:
285
- raise VbachModelNotFound()
286
-
287
- self.get_vc(model_path, use_transformers)
288
-
289
- if not self.hubert_model:
290
- self.load_hubert(embedder_model, use_transformers)
291
-
292
- if not output_dir:
293
- output_dir = ""
294
-
295
- output_dir = Path(output_dir)
296
- output_path = None
297
-
298
- print(_i18n("f0_method")+": "+"custom")
299
-
300
- try:
301
- if not audio_input:
302
- raise PathNotSpecified(_i18n("path_not_specified"))
303
- audio_input = Path(audio_input)
304
- if not audio_input.exists():
305
- raise PathNotExist(_i18n("path_not_exist"))
306
- if check(audio_input):
307
- input_file_name = Path(audio_input).stem
308
- mix, sr = read(audio_input, sr=16000, mono=True, flatten=True)
309
- print(_i18n("loaded_mix")+": "+input_file_name)
310
- else:
311
- raise FileIsNotAudio(_i18n("file_is_not_audio", path=audio_input))
312
-
313
- audio_max = np.abs(mix).max() / 0.95
314
- if audio_max > 1:
315
- mix /= audio_max
316
- audio_opt = self.vc.pipeline_custom_f0(
317
- model=self.hubert_model,
318
- net_g=self.net_g,
319
- sid=0,
320
- audio=mix,
321
- pitch=pitch,
322
- f0_file=f0_file,
323
- file_index=index_path,
324
- index_rate=index_rate,
325
- pitch_guidance=self.use_f0,
326
- volume_envelope=volume_envelope,
327
- version=self.version,
328
- protect=protect,
329
- tgt_sr=self.tgt_sr,
330
- f0_min=f0_min,
331
- f0_max=f0_max,
332
- chunk_duration=chunk_duration,
333
- add_text_channel="",
334
- add_text_custom=f"{_i18n('custom_f0')}",
335
- )
336
- custom_name = Namer.template(
337
- template,
338
- PITCH=pitch,
339
- F0METHOD="custom",
340
- NAME=Namer.short_input_name_template(template, PITCH=pitch, F0METHOD="custom", NAME=input_file_name)
341
- )
342
- output_path = write(Namer.iter(output_dir / f"{custom_name}.{output_format}"), audio_opt, self.tgt_sr)
343
- except Exception as e:
344
- traceback.print_exc()
345
-
346
- self.unload_model()
347
- self.unload_hubert()
348
-
349
- return output_path
350
-
351
- if __name__ == "__main__":
352
- vbach = VbachConverter()
353
- args = parse_vbach_args()
354
- if args.mode == "infer":
355
- download_hubert(args.embedder, args.use_transformers)
356
- vbach.convert_audio(
357
- audio_input=args.input,
358
- output_dir=args.output_dir,
359
- model_path=args.checkpoint_path,
360
- index_path=args.index_path,
361
- pitch=args.pitch,
362
- f0_method=args.f0_method,
363
- index_rate=args.index_rate,
364
- volume_envelope=args.volume_envelope,
365
- protect=args.protect,
366
- hop_length=args.hop_length,
367
- embedder_model=args.embedder,
368
- use_transformers=args.use_transformers,
369
- output_format=args.output_format,
370
- stereo_mode=args.stereo_mode,
371
- f0_min=args.f0_min,
372
- f0_max=args.f0_max,
373
- chunk_duration=args.chunk_duration,
374
- template=args.template
375
- )
376
- elif args.mode == "infer_custom_f0":
377
- download_hubert(args.embedder, args.use_transformers)
378
- vbach.convert_audio_custom_f0(
379
- audio_input=args.input,
380
- output_dir=args.output_dir,
381
- model_path=args.checkpoint_path,
382
- index_path=args.index_path,
383
- pitch=args.pitch,
384
- f0_file=args.f0_file,
385
- index_rate=args.index_rate,
386
- volume_envelope=args.volume_envelope,
387
- protect=args.protect,
388
- embedder_model=args.embedder,
389
- use_transformers=args.use_transformers,
390
- output_format=args.output_format,
391
- stereo_mode=args.stereo_mode,
392
- f0_min=args.f0_min,
393
- f0_max=args.f0_max,
394
- chunk_duration=args.chunk_duration,
395
- template=args.template
396
- )
397
- elif args.mode == "download_hubert":
 
398
  download_hubert(args.embedder, args.use_transformers)
 
1
+ from pathlib import Path
2
+ import sys
3
+ SCRIPT_DIR = Path(__file__).resolve().parent
4
+ sys.path.append(str(SCRIPT_DIR.parent))
5
+ from extra_utils import hf_spaces_gpu, extra_clear_torch_cache, nuclear_clear_model, emergency_ram_clear
6
+ if __package__:
7
+ from .hubert_manager import get_hubert, download_hubert, huberts_fairseq
8
+ from .pipeline import VC
9
+ from .config import Config
10
+ from .fairseq import load_model
11
+ from .algorithm.synthesizers import Synthesizer
12
+ else:
13
+ from vbach_lib.hubert_manager import get_hubert, download_hubert, huberts_fairseq
14
+ from vbach_lib.pipeline import VC
15
+ from vbach_lib.config import Config
16
+ from vbach_lib.fairseq import load_model
17
+ from vbach_lib.algorithm.synthesizers import Synthesizer
18
+ from transformers import HubertModel
19
+ from pathlib import Path
20
+ import traceback
21
+ from audio import read, write, split_channels, split_mid_side, multi_channel_array_from_arrays, output_formats, stereo_to_mono, reshape, mix_arrays, get_audio_files_from_list, check
22
+ from inference import PathsNotSpecified, PathNotExist, PathNotSpecified, FileIsNotAudio
23
+ from i18n import _i18n
24
+ from namer import Namer
25
+ from args_parser import parse_vbach_args
26
+ import numpy as np
27
+ import torch
28
+ from torch import nn
29
+ import gc
30
+
31
+ class VbachModelNotFound(Exception): pass
32
+
33
+ stereo_modes = ("mono", "left/right", "sim/dif")
34
+
35
+ class HubertModelWithFinalProj(HubertModel):
36
+ """Hubert модель с финальной проекцией"""
37
+
38
+ def __init__(self, config):
39
+ super().__init__(config)
40
+ self.final_proj = nn.Linear(config.hidden_size, config.classifier_proj_size)
41
+
42
+ def load_audio(path: str | Path, sr: int, stereo_mode: str = stereo_modes[0]):
43
+ mixtures = []
44
+ add_text = []
45
+ if stereo_mode == "mono":
46
+ mix, _sr = read(path, sr, mono=True, flatten=True)
47
+ mixtures.append(mix)
48
+ add_text.append(None)
49
+ elif stereo_mode == "left/right":
50
+ mix, _sr = read(path, sr, mono=False)
51
+ mixtures.extend(split_channels(mix))
52
+ add_text.extend(["[L]", "[R]"])
53
+ elif stereo_mode == "sim/dif":
54
+ mix, _sr = read(path, sr, mono=False)
55
+ center, stereo_base = split_mid_side(mix, var=3, sr=sr)
56
+ phantom_center = stereo_to_mono(center, to_flatten=True)
57
+ stereo_base_left, stereo_base_right = split_channels(stereo_base)
58
+ mixtures.extend([phantom_center, stereo_base_left, stereo_base_right])
59
+ add_text.extend(["[Sim]", "[Dif-L]", "[Dif-R]"])
60
+ return mixtures, add_text
61
+
62
+ def post_process_audio(mixtures: list[np.ndarray], target_sr: int, stereo_mode: str = stereo_modes[0]):
63
+ if stereo_mode == "mono":
64
+ return reshape(mixtures[0], ("channels", "samples"))
65
+ elif stereo_mode == "left/right":
66
+ dtype = mixtures[0].dtype
67
+ return multi_channel_array_from_arrays(*mixtures, index=1, dtype=dtype)
68
+ elif stereo_mode == "sim/dif":
69
+ sim, dif_l, dif_r = mixtures
70
+ dtype = sim.dtype
71
+ sim_channel = multi_channel_array_from_arrays(sim, sim, index=1, dtype=dtype)
72
+ dif_channel = multi_channel_array_from_arrays(dif_l, dif_r, index=1, dtype=dtype)
73
+ return mix_arrays([sim_channel, dif_channel], [target_sr, target_sr], target_sr, index=1, dtype=dtype)[0]
74
+
75
+ class VbachConverter:
76
+ def __init__(self):
77
+ self.config = Config()
78
+ self.hubert_model = None
79
+ self.cpt = self.version = self.net_g = self.tgt_sr = self.vc = self.use_f0 = self.vocoder = self.emb_weight_shape = self.required_keys = self.missing_keys = self.text_enc_hidden_dim = None
80
+ def load_hubert(self, name: str, use_transformers: bool):
81
+ if use_transformers:
82
+ model_path = get_hubert(name, True)
83
+ self.hubert_model = HubertModelWithFinalProj.from_pretrained(model_path)
84
+ self.hubert_model = self.hubert_model.to(self.config.device)
85
+ else:
86
+ model_path = get_hubert(name, False)
87
+ self.hubert_model = load_model(model_path)
88
+ self.hubert_model = self.hubert_model.to(self.config.device)
89
+ self.hubert_model = self.hubert_model.half() if self.config.is_half else self.hubert_model.float()
90
+ self.hubert_model.eval()
91
+ print(_i18n("hubert_checkpoint_loaded")+": "+name)
92
+
93
+ def unload_hubert(self):
94
+ self.hubert_model = self.hubert_model.cpu()
95
+ self.hubert_model = None
96
+ gc.collect()
97
+ extra_clear_torch_cache()
98
+ nuclear_clear_model()
99
+ emergency_ram_clear()
100
+
101
+ def unload_model(self):
102
+ self.net_g = self.net_g.cpu()
103
+ del self.cpt, self.version, self.net_g, self.tgt_sr, self.vc, self.use_f0, self.vocoder, self.emb_weight_shape, self.required_keys, self.missing_keys, self.text_enc_hidden_dim
104
+ self.cpt = self.version = self.net_g = self.tgt_sr = self.vc = self.use_f0 = self.vocoder = self.emb_weight_shape = self.required_keys = self.missing_keys = self.text_enc_hidden_dim = None
105
+ extra_clear_torch_cache()
106
+ nuclear_clear_model()
107
+ emergency_ram_clear()
108
+
109
+ def clear_gpu_cache(self):
110
+ gc.collect()
111
+ torch.clear_autocast_cache()
112
+ if self.config.device.type == "mps":
113
+ torch.mps.empty_cache()
114
+ if self.config.device.type == "cuda":
115
+ torch.cuda.synchronize()
116
+ torch.cuda.ipc_collect()
117
+ torch.cuda.empty_cache()
118
+
119
+ def get_vc(self, model_path: str | Path, use_transformers: bool):
120
+ self.cpt = torch.load(model_path, map_location="cpu", weights_only=True)
121
+ self.required_keys = ["config", "weight"]
122
+ self.missing_keys = [key for key in self.required_keys if key not in self.cpt]
123
+
124
+ self.tgt_sr = self.cpt["config"][-1]
125
+
126
+ self.emb_weight_shape = self.cpt["weight"]["emb_g.weight"].shape
127
+ self.cpt["config"][-3] = self.emb_weight_shape[0]
128
+
129
+ self.use_f0 = self.cpt.get("f0", 1)
130
+ self.version = self.cpt.get("version", "v1")
131
+ self.vocoder = self.cpt.get("vocoder", "HiFi-GAN")
132
+
133
+ self.text_enc_hidden_dim = 768 if self.version == "v2" else 256
134
+
135
+ self.net_g = Synthesizer(
136
+ *self.cpt["config"],
137
+ use_f0=self.use_f0,
138
+ text_enc_hidden_dim=self.text_enc_hidden_dim,
139
+ vocoder=self.vocoder,
140
+ )
141
+
142
+ if hasattr(self.net_g, "enc_q"):
143
+ del self.net_g.enc_q
144
+ else:
145
+ pass
146
+
147
+ self.net_g.load_state_dict(
148
+ self.cpt["weight"], strict=False
149
+ )
150
+ self.net_g.eval()
151
+
152
+ self.net_g = self.net_g.to(self.config.device)
153
+ if self.config.is_half:
154
+ self.net_g = self.net_g.half()
155
+ else:
156
+ self.net_g = self.net_g.float()
157
+
158
+ self.vc = VC(self.tgt_sr, self.config, use_transformers)
159
+ print(_i18n("checkpoint_loaded")+": "+Path(model_path).name)
160
+
161
+ @hf_spaces_gpu # (duration=120) Для спейса LongQuota / длинная квота на HuggingFace ZeroGPU (по умолчанию 60 секунд)
162
+ def convert_audio(
163
+ self,
164
+ audio_input: str | Path | list[str | Path],
165
+ output_dir: str | Path,
166
+ model_path: str,
167
+ index_path: str,
168
+ pitch: int = 0,
169
+ f0_method: str = "rmvpe+",
170
+ index_rate: float = 0.75,
171
+ volume_envelope: float = 0.25,
172
+ protect: float = 0.33,
173
+ hop_length: int = 128,
174
+ embedder_model: str = "hubert_base",
175
+ use_transformers: bool = False,
176
+ output_format: str = output_formats[0],
177
+ stereo_mode: str = stereo_modes[0],
178
+ f0_min: int = 50,
179
+ f0_max: int = 1100,
180
+ chunk_duration: int = 7,
181
+ template: str = "NAME_F0METHOD_PITCH",
182
+ **kwargs,
183
+ ):
184
+ template = Namer.sanitize(template)
185
+ template = Namer.dedup_template(template, keys=["NAME", "F0METHOD", "PITCH"])
186
+ template = Namer.short(template, length=40)
187
+
188
+ if not model_path:
189
+ raise VbachModelNotFound()
190
+
191
+ self.get_vc(model_path, use_transformers)
192
+
193
+ if not self.hubert_model:
194
+ self.load_hubert(embedder_model, use_transformers)
195
+
196
+ if not output_dir:
197
+ output_dir = ""
198
+
199
+ output_dir = Path(output_dir)
200
+
201
+ input_valid_files = get_audio_files_from_list(audio_input, only_files=False)
202
+ if not input_valid_files:
203
+ raise PathsNotSpecified(_i18n("paths_not_specified"))
204
+
205
+ total = len(input_valid_files)
206
+
207
+ print(_i18n("f0_method")+": "+f0_method)
208
+
209
+ processed_audios = []
210
+
211
+ for i, audio_input_path in enumerate(input_valid_files, start=1):
212
+ try:
213
+ input_file_name = Path(audio_input_path).stem
214
+
215
+ mixtures, add_text = load_audio(audio_input_path, 16000, stereo_mode)
216
+ print(_i18n("loaded_mix")+": "+Path(audio_input_path).name)
217
+ converted_mixtures = []
218
+
219
+ for mix, add_text_progress in zip(mixtures, add_text):
220
+ audio_max = np.abs(mix).max() / 0.95
221
+ if audio_max > 1:
222
+ mix /= audio_max
223
+ audio_opt = self.vc.pipeline(
224
+ model=self.hubert_model,
225
+ net_g=self.net_g,
226
+ sid=0,
227
+ audio=mix,
228
+ pitch=pitch,
229
+ f0_method=f0_method,
230
+ hop_length=hop_length,
231
+ file_index=index_path,
232
+ index_rate=index_rate,
233
+ pitch_guidance=self.use_f0,
234
+ volume_envelope=volume_envelope,
235
+ version=self.version,
236
+ protect=protect,
237
+ tgt_sr=self.tgt_sr,
238
+ f0_min=f0_min,
239
+ f0_max=f0_max,
240
+ chunk_duration=chunk_duration,
241
+ add_text_channel=add_text_progress,
242
+ add_text_custom=f"{i}/{total} {_i18n('files')}",
243
+ )
244
+ converted_mixtures.append(audio_opt)
245
+ custom_name = Namer.template(
246
+ template,
247
+ PITCH=pitch,
248
+ F0METHOD=f0_method,
249
+ NAME=Namer.short_input_name_template(template, PITCH=pitch, F0METHOD=f0_method, NAME=input_file_name)
250
+ )
251
+ processed_audios.append(write(Namer.iter(output_dir / f"{custom_name}.{output_format}"), post_process_audio(converted_mixtures, self.tgt_sr, stereo_mode), self.tgt_sr))
252
+ except Exception as e:
253
+ traceback.print_exc()
254
+
255
+ self.unload_model()
256
+ self.unload_hubert()
257
+
258
+ return processed_audios
259
+
260
+ @hf_spaces_gpu # (duration=120) Для спейса LongQuota / длинная квота на HuggingFace ZeroGPU (по умолчанию 60 секунд)
261
+ def convert_audio_custom_f0(
262
+ self,
263
+ audio_input: str | Path,
264
+ output_dir: str | Path,
265
+ model_path: str,
266
+ index_path: str,
267
+ pitch: int = 0,
268
+ f0_file: str | Path = None,
269
+ index_rate: float = 0.75,
270
+ volume_envelope: float = 0.25,
271
+ protect: float = 0.33,
272
+ embedder_model: str = "hubert_base",
273
+ use_transformers: bool = False,
274
+ output_format: str = output_formats[0],
275
+ f0_min: int = 50,
276
+ f0_max: int = 1100,
277
+ chunk_duration: int = 7,
278
+ template: str = "NAME_F0METHOD_PITCH",
279
+ **kwargs,
280
+ ):
281
+ template = Namer.sanitize(template)
282
+ template = Namer.dedup_template(template, keys=["NAME", "F0METHOD", "PITCH"])
283
+ template = Namer.short(template, length=40)
284
+
285
+ if not model_path:
286
+ raise VbachModelNotFound()
287
+
288
+ self.get_vc(model_path, use_transformers)
289
+
290
+ if not self.hubert_model:
291
+ self.load_hubert(embedder_model, use_transformers)
292
+
293
+ if not output_dir:
294
+ output_dir = ""
295
+
296
+ output_dir = Path(output_dir)
297
+ output_path = None
298
+
299
+ print(_i18n("f0_method")+": "+"custom")
300
+
301
+ try:
302
+ if not audio_input:
303
+ raise PathNotSpecified(_i18n("path_not_specified"))
304
+ audio_input = Path(audio_input)
305
+ if not audio_input.exists():
306
+ raise PathNotExist(_i18n("path_not_exist"))
307
+ if check(audio_input):
308
+ input_file_name = Path(audio_input).stem
309
+ mix, sr = read(audio_input, sr=16000, mono=True, flatten=True)
310
+ print(_i18n("loaded_mix")+": "+Path(audio_input).name)
311
+ else:
312
+ raise FileIsNotAudio(_i18n("file_is_not_audio", path=audio_input))
313
+
314
+ audio_max = np.abs(mix).max() / 0.95
315
+ if audio_max > 1:
316
+ mix /= audio_max
317
+ audio_opt = self.vc.pipeline_custom_f0(
318
+ model=self.hubert_model,
319
+ net_g=self.net_g,
320
+ sid=0,
321
+ audio=mix,
322
+ pitch=pitch,
323
+ f0_file=f0_file,
324
+ file_index=index_path,
325
+ index_rate=index_rate,
326
+ pitch_guidance=self.use_f0,
327
+ volume_envelope=volume_envelope,
328
+ version=self.version,
329
+ protect=protect,
330
+ tgt_sr=self.tgt_sr,
331
+ f0_min=f0_min,
332
+ f0_max=f0_max,
333
+ chunk_duration=chunk_duration,
334
+ add_text_channel="",
335
+ add_text_custom=f"{_i18n('custom_f0')}",
336
+ )
337
+ custom_name = Namer.template(
338
+ template,
339
+ PITCH=pitch,
340
+ F0METHOD="custom",
341
+ NAME=Namer.short_input_name_template(template, PITCH=pitch, F0METHOD="custom", NAME=input_file_name)
342
+ )
343
+ output_path = write(Namer.iter(output_dir / f"{custom_name}.{output_format}"), audio_opt, self.tgt_sr)
344
+ except Exception as e:
345
+ traceback.print_exc()
346
+
347
+ self.unload_model()
348
+ self.unload_hubert()
349
+
350
+ return output_path
351
+
352
+ if __name__ == "__main__":
353
+ vbach = VbachConverter()
354
+ args = parse_vbach_args()
355
+ if args.mode == "infer":
356
+ download_hubert(args.embedder, args.use_transformers)
357
+ vbach.convert_audio(
358
+ audio_input=args.input,
359
+ output_dir=args.output_dir,
360
+ model_path=args.checkpoint_path,
361
+ index_path=args.index_path,
362
+ pitch=args.pitch,
363
+ f0_method=args.f0_method,
364
+ index_rate=args.index_rate,
365
+ volume_envelope=args.volume_envelope,
366
+ protect=args.protect,
367
+ hop_length=args.hop_length,
368
+ embedder_model=args.embedder,
369
+ use_transformers=args.use_transformers,
370
+ output_format=args.output_format,
371
+ stereo_mode=args.stereo_mode,
372
+ f0_min=args.f0_min,
373
+ f0_max=args.f0_max,
374
+ chunk_duration=args.chunk_duration,
375
+ template=args.template
376
+ )
377
+ elif args.mode == "infer_custom_f0":
378
+ download_hubert(args.embedder, args.use_transformers)
379
+ vbach.convert_audio_custom_f0(
380
+ audio_input=args.input,
381
+ output_dir=args.output_dir,
382
+ model_path=args.checkpoint_path,
383
+ index_path=args.index_path,
384
+ pitch=args.pitch,
385
+ f0_file=args.f0_file,
386
+ index_rate=args.index_rate,
387
+ volume_envelope=args.volume_envelope,
388
+ protect=args.protect,
389
+ embedder_model=args.embedder,
390
+ use_transformers=args.use_transformers,
391
+ output_format=args.output_format,
392
+ stereo_mode=args.stereo_mode,
393
+ f0_min=args.f0_min,
394
+ f0_max=args.f0_max,
395
+ chunk_duration=args.chunk_duration,
396
+ template=args.template
397
+ )
398
+ elif args.mode == "download_hubert":
399
  download_hubert(args.embedder, args.use_transformers)