Google Drive support added + minifixes
Browse files- app.py +321 -18
- args_parser.py +630 -630
- extra_utils.py +51 -88
- i18n.py +30 -2
- inference.py +17 -10
- vbach_lib/infer.py +398 -397
app.py
CHANGED
|
@@ -1,3 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import sys
|
| 3 |
import json
|
|
@@ -6,7 +9,7 @@ from urllib.parse import urlparse
|
|
| 6 |
from pathlib import Path, PurePosixPath
|
| 7 |
BASE_DIR = Path(__file__).resolve().parent
|
| 8 |
sys.path.append(str(BASE_DIR))
|
| 9 |
-
from extra_utils import tz, define_audio_with_size, update_audio_with_size, base_c_params,
|
| 10 |
from inference import Separator, add_params, add_params_list, ensemble_types, BASE_DIR
|
| 11 |
from vbach_lib.infer import VbachConverter, stereo_modes
|
| 12 |
from vbach_lib.f0_extractor import f0_methods, crepe_like_f0_methods, f0_extract_and_write
|
|
@@ -18,10 +21,9 @@ from i18n import _i18n
|
|
| 18 |
from args_parser import parse_app_args
|
| 19 |
import tempfile
|
| 20 |
import shutil
|
|
|
|
| 21 |
from copy import deepcopy
|
| 22 |
|
| 23 |
-
|
| 24 |
-
|
| 25 |
def generate_add_params_component():
|
| 26 |
add_params_components = []
|
| 27 |
for tab, components in add_params.items():
|
|
@@ -36,10 +38,183 @@ def generate_add_params_component():
|
|
| 36 |
add_params_components.append(gr.Checkbox(label=_i18n(component_name), value=params["default"], info=_i18n(params.get("info", "")), **base_c_params["base"]))
|
| 37 |
return add_params_components
|
| 38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
class History(UserDirectory):
|
| 40 |
def __init__(self, name: str = "mvsepless"):
|
| 41 |
super().__init__()
|
| 42 |
-
self.history_dir_base = self.user_directory /
|
| 43 |
self.history_dir_base.mkdir(parents=True, exist_ok=True)
|
| 44 |
self.history_dict_json = self.history_dir_base / f"{name}.json"
|
| 45 |
self.history_dict = {}
|
|
@@ -67,6 +242,32 @@ class History(UserDirectory):
|
|
| 67 |
self.history_dict = json.loads(self.history_dict_json.read_text("utf-8"))
|
| 68 |
print(_i18n("history_loaded"))
|
| 69 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
def get_list(self, update_from_file=False):
|
| 71 |
if update_from_file:
|
| 72 |
self.load()
|
|
@@ -78,7 +279,7 @@ class History(UserDirectory):
|
|
| 78 |
self.history_dict.update([(f"{timestamp} | {model_name}", deepcopy(state))])
|
| 79 |
|
| 80 |
def get_from_history(self, key: str):
|
| 81 |
-
return deepcopy(self.history_dict.get(key,
|
| 82 |
|
| 83 |
class HistoryAutoEnsemble(History):
|
| 84 |
def __init__(self):
|
|
@@ -97,6 +298,30 @@ class HistoryAutoEnsemble(History):
|
|
| 97 |
results_ = func(self, *args, **kwargs)
|
| 98 |
return results_
|
| 99 |
return wrapper
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
|
| 101 |
@_write_decorator
|
| 102 |
def add_to_history(self, etype: str, output: str, inverted_output: str, primary_stems_list: list = []):
|
|
@@ -124,6 +349,29 @@ class HistoryManualEnsemble(History):
|
|
| 124 |
return results_
|
| 125 |
return wrapper
|
| 126 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
@_write_decorator
|
| 128 |
def add_to_history(self, etype: str, state: str):
|
| 129 |
timestamp = datetime.now(tz).strftime("%Y-%m-%d_%H-%M-%S")
|
|
@@ -150,6 +398,29 @@ class HistorySubtractor(History):
|
|
| 150 |
return results_
|
| 151 |
return wrapper
|
| 152 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 153 |
@_write_decorator
|
| 154 |
def add_to_history(self, itype: str, state: str):
|
| 155 |
timestamp = datetime.now(tz).strftime("%Y-%m-%d_%H-%M-%S")
|
|
@@ -173,6 +444,29 @@ class HistoryVbach(History):
|
|
| 173 |
return results_
|
| 174 |
return wrapper
|
| 175 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
@_write_decorator
|
| 177 |
def add_to_history(self, model_name: str, f0_method: str, pitch: int, output_files: list):
|
| 178 |
timestamp = datetime.now(tz).strftime("%Y-%m-%d_%H-%M-%S")
|
|
@@ -185,7 +479,7 @@ class AutoEnsembleApp(UserDirectory):
|
|
| 185 |
def __init__(self):
|
| 186 |
super().__init__()
|
| 187 |
self.state = []
|
| 188 |
-
self.ensemble_base = self.user_directory /
|
| 189 |
self.ensemble_base.mkdir(parents=True, exist_ok=True)
|
| 190 |
|
| 191 |
def write_flow(self, name: str):
|
|
@@ -294,7 +588,7 @@ class VbachModelsDir(UserDirectory):
|
|
| 294 |
|
| 295 |
def __init__(self):
|
| 296 |
super().__init__()
|
| 297 |
-
self.vbach_models_base = self.user_directory /
|
| 298 |
self.pth_models_dir = self.vbach_models_base / "pth"
|
| 299 |
self.index_models_dir = self.vbach_models_base / "index"
|
| 300 |
self.pth_models_dir.mkdir(parents=True, exist_ok=True)
|
|
@@ -408,7 +702,7 @@ class VbachModelsDir(UserDirectory):
|
|
| 408 |
class F0GenerateOutPath(UserDirectory):
|
| 409 |
def __init__(self):
|
| 410 |
super().__init__()
|
| 411 |
-
self.f0_curves_dir = self.user_directory /
|
| 412 |
self.f0_curves_dir.mkdir(parents=True, exist_ok=True)
|
| 413 |
|
| 414 |
def generate_output_path(self, name: str, f0_method: str):
|
|
@@ -421,7 +715,7 @@ class CustomSeparationModelsDir(UserDirectory):
|
|
| 421 |
|
| 422 |
def __init__(self):
|
| 423 |
super().__init__()
|
| 424 |
-
self.custom_models_base = self.user_directory /
|
| 425 |
self.checkpoints_dir = self.custom_models_base / "checkpoints"
|
| 426 |
self.configs_dir = self.custom_models_base / "configs"
|
| 427 |
self.checkpoints_dir.mkdir(parents=True, exist_ok=True)
|
|
@@ -617,14 +911,8 @@ class App(Separator):
|
|
| 617 |
return gr.skip()
|
| 618 |
return gr.update(choices=current_configs, value=value), current_configs
|
| 619 |
|
| 620 |
-
def get_actual_custom_sep_history_list(self, value, state):
|
| 621 |
-
"""Get updated history list"""
|
| 622 |
-
current_history = self.custom_sep_history.get_list()
|
| 623 |
-
if current_history == state:
|
| 624 |
-
return gr.skip()
|
| 625 |
-
return gr.update(choices=current_history, value=value), current_history
|
| 626 |
-
|
| 627 |
def UI(self, theme=None, hf_space_mode=False):
|
|
|
|
| 628 |
all_models = self.get_all_models()
|
| 629 |
default_model = all_models[0]
|
| 630 |
stems_default = self.get_stems(default_model)
|
|
@@ -1472,7 +1760,7 @@ class App(Separator):
|
|
| 1472 |
gr.Warning(_i18n("model_not_selected"))
|
| 1473 |
return [], gr.skip()
|
| 1474 |
|
| 1475 |
-
output_dir = self.output_dir.generate(
|
| 1476 |
download_hubert(embedder_model, use_transformers)
|
| 1477 |
results = self.vbach_converter.convert_audio(
|
| 1478 |
audio_input=input_files,
|
|
@@ -1769,7 +2057,7 @@ class App(Separator):
|
|
| 1769 |
gr.Warning(_i18n("no_f0_file_selected"))
|
| 1770 |
return update_audio_with_size(label=_i18n("vbach_result"), value=None), gr.skip()
|
| 1771 |
|
| 1772 |
-
output_dir = self.output_dir.generate(
|
| 1773 |
download_hubert(embedder_model, use_transformers)
|
| 1774 |
|
| 1775 |
result = self.vbach_converter.convert_audio_custom_f0(
|
|
@@ -2072,6 +2360,21 @@ class App(Separator):
|
|
| 2072 |
def upload_vbach_index_fn(files: list, progress=gr.Progress(track_tqdm=True)):
|
| 2073 |
self.vbach_model_manager.upload_index_model(files)
|
| 2074 |
return gr.update(value=[])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2075 |
|
| 2076 |
return mvsepless_app
|
| 2077 |
|
|
|
|
| 1 |
+
import warnings
|
| 2 |
+
warnings.filterwarnings("ignore", category=DeprecationWarning, message=".*show_api.*") # Предупреждения скрыты
|
| 3 |
+
warnings.filterwarnings("ignore", category=DeprecationWarning, message=".*theme.*")
|
| 4 |
import gradio as gr
|
| 5 |
import sys
|
| 6 |
import json
|
|
|
|
| 9 |
from pathlib import Path, PurePosixPath
|
| 10 |
BASE_DIR = Path(__file__).resolve().parent
|
| 11 |
sys.path.append(str(BASE_DIR))
|
| 12 |
+
from extra_utils import tz, define_audio_with_size, update_audio_with_size, base_c_params, easy_check_is_colab, get_gdrive_dir, one_element_list_to_value, dw_file, dw_yt_dlp, get_disk_usage
|
| 13 |
from inference import Separator, add_params, add_params_list, ensemble_types, BASE_DIR
|
| 14 |
from vbach_lib.infer import VbachConverter, stereo_modes
|
| 15 |
from vbach_lib.f0_extractor import f0_methods, crepe_like_f0_methods, f0_extract_and_write
|
|
|
|
| 21 |
from args_parser import parse_app_args
|
| 22 |
import tempfile
|
| 23 |
import shutil
|
| 24 |
+
from tqdm import tqdm
|
| 25 |
from copy import deepcopy
|
| 26 |
|
|
|
|
|
|
|
| 27 |
def generate_add_params_component():
|
| 28 |
add_params_components = []
|
| 29 |
for tab, components in add_params.items():
|
|
|
|
| 38 |
add_params_components.append(gr.Checkbox(label=_i18n(component_name), value=params["default"], info=_i18n(params.get("info", "")), **base_c_params["base"]))
|
| 39 |
return add_params_components
|
| 40 |
|
| 41 |
+
USER_DIR = ""
|
| 42 |
+
GDRIVE_DIR = get_gdrive_dir()
|
| 43 |
+
def generate_user_dir_from_gdrive():
|
| 44 |
+
global GDRIVE_DIR
|
| 45 |
+
if GDRIVE_DIR:
|
| 46 |
+
user_dir = Path(GDRIVE_DIR, "MyDrive", "mvsepless-data")
|
| 47 |
+
user_dir.mkdir(parents=True, exist_ok=True)
|
| 48 |
+
return user_dir.as_posix()
|
| 49 |
+
else:
|
| 50 |
+
return None
|
| 51 |
+
GDRIVE_USER_DIR = generate_user_dir_from_gdrive()
|
| 52 |
+
|
| 53 |
+
def get_default_user_dir():
|
| 54 |
+
if easy_check_is_colab():
|
| 55 |
+
if GDRIVE_DIR:
|
| 56 |
+
print(_i18n("gdrive_mount_found"))
|
| 57 |
+
return GDRIVE_USER_DIR
|
| 58 |
+
else:
|
| 59 |
+
return USER_DIR
|
| 60 |
+
else:
|
| 61 |
+
return USER_DIR
|
| 62 |
+
|
| 63 |
+
DEFAULT_USER_DIR = get_default_user_dir()
|
| 64 |
+
|
| 65 |
+
def rename_user_dir_path(path: str, mode=0):
|
| 66 |
+
global GDRIVE_USER_DIR, USER_DIR
|
| 67 |
+
if path:
|
| 68 |
+
if mode == 0:
|
| 69 |
+
return (PurePosixPath(GDRIVE_USER_DIR) / PurePosixPath(path).relative_to(USER_DIR)).as_posix()
|
| 70 |
+
elif mode == 1:
|
| 71 |
+
return (PurePosixPath(USER_DIR) / PurePosixPath(path).relative_to(GDRIVE_USER_DIR)).as_posix()
|
| 72 |
+
else:
|
| 73 |
+
return None
|
| 74 |
+
|
| 75 |
+
base_names_app_dirs = (
|
| 76 |
+
"input",
|
| 77 |
+
"output_mvsepless",
|
| 78 |
+
"history",
|
| 79 |
+
"ensemble_flows",
|
| 80 |
+
"vbach_models",
|
| 81 |
+
"f0_curves",
|
| 82 |
+
"custom_separation_models",
|
| 83 |
+
"vbach_output"
|
| 84 |
+
)
|
| 85 |
+
|
| 86 |
+
def copy_to_gdrive():
|
| 87 |
+
global GDRIVE_DIR, GDRIVE_USER_DIR, USER_DIR
|
| 88 |
+
if GDRIVE_DIR:
|
| 89 |
+
copied_dirs = []
|
| 90 |
+
dirs = [[dir, Path(USER_DIR, dir)] for dir in base_names_app_dirs]
|
| 91 |
+
for (dir_name, dir_path) in tqdm(dirs, desc=_i18n("copy_to_gdrive"), unit=_i18n("dirs")):
|
| 92 |
+
if dir_path.exists():
|
| 93 |
+
shutil.copytree(dir_path, Path(GDRIVE_USER_DIR, dir_name), dirs_exist_ok=True)
|
| 94 |
+
copied_dirs.append("")
|
| 95 |
+
print(_i18n("copied_dirs")+": "+str(len(copied_dirs)))
|
| 96 |
+
print(_i18n("copy_to_gdrive_done"))
|
| 97 |
+
gr.Info(title=_i18n("copy_to_gdrive_done"), message="")
|
| 98 |
+
|
| 99 |
+
def copy_to_runtime():
|
| 100 |
+
global GDRIVE_DIR, GDRIVE_USER_DIR, USER_DIR
|
| 101 |
+
if GDRIVE_DIR:
|
| 102 |
+
copied_dirs = []
|
| 103 |
+
dirs = [[dir, Path(GDRIVE_USER_DIR, dir)] for dir in base_names_app_dirs]
|
| 104 |
+
for (dir_name, dir_path) in tqdm(dirs, desc=_i18n("copy_to_current_user_dir"), unit=_i18n("dirs")):
|
| 105 |
+
if dir_path.exists():
|
| 106 |
+
shutil.copytree(dir_path, Path(USER_DIR, dir_name), dirs_exist_ok=True)
|
| 107 |
+
copied_dirs.append("")
|
| 108 |
+
print(_i18n("copied_dirs")+": "+str(len(copied_dirs)))
|
| 109 |
+
print(_i18n("copy_to_gdrive_done"))
|
| 110 |
+
gr.Info(title=_i18n("copy_to_gdrive_done"), message="")
|
| 111 |
+
|
| 112 |
+
class UserDirectory:
|
| 113 |
+
def __init__(self, custom_dir=USER_DIR):
|
| 114 |
+
self.user_directory = Path(custom_dir if custom_dir else DEFAULT_USER_DIR)
|
| 115 |
+
|
| 116 |
+
def change_dir(self, dir: str):
|
| 117 |
+
self.user_directory = Path(dir)
|
| 118 |
+
|
| 119 |
+
def generate(self, name: str):
|
| 120 |
+
timestamp = datetime.now(tz).strftime("%Y-%m-%d_%H-%M-%S")
|
| 121 |
+
generated_directory = self.user_directory / name / timestamp
|
| 122 |
+
generated_directory.mkdir(parents=True, exist_ok=True)
|
| 123 |
+
return generated_directory
|
| 124 |
+
|
| 125 |
+
def generate_from_dir(self, dir: str):
|
| 126 |
+
timestamp = datetime.now(tz).strftime("%Y-%m-%d_%H-%M-%S")
|
| 127 |
+
generated_directory = Path(dir) / timestamp
|
| 128 |
+
generated_directory.mkdir(parents=True, exist_ok=True)
|
| 129 |
+
return generated_directory
|
| 130 |
+
|
| 131 |
+
class InputFilesDatabase(UserDirectory):
|
| 132 |
+
def __init__(self):
|
| 133 |
+
super().__init__()
|
| 134 |
+
self.input_dir_base = self.user_directory / base_names_app_dirs[0]
|
| 135 |
+
self.input_dir_base.mkdir(parents=True, exist_ok=True)
|
| 136 |
+
self.input_base_json = self.input_dir_base / "inputs.json"
|
| 137 |
+
self.input_base = []
|
| 138 |
+
self.load()
|
| 139 |
+
|
| 140 |
+
def _write_decorator(func):
|
| 141 |
+
def wrapper(self, *args, **kwargs):
|
| 142 |
+
results_ = func(self, *args, **kwargs)
|
| 143 |
+
self.write()
|
| 144 |
+
return results_
|
| 145 |
+
return wrapper
|
| 146 |
+
|
| 147 |
+
def _load_decorator(func):
|
| 148 |
+
def wrapper(self, *args, **kwargs):
|
| 149 |
+
self.load()
|
| 150 |
+
results_ = func(self, *args, **kwargs)
|
| 151 |
+
return results_
|
| 152 |
+
return wrapper
|
| 153 |
+
|
| 154 |
+
@_write_decorator
|
| 155 |
+
def update_data(self, mode: int):
|
| 156 |
+
current_data = deepcopy(self.input_base)
|
| 157 |
+
new_data = []
|
| 158 |
+
if self.input_base_json.exists():
|
| 159 |
+
new_data: list = json.loads(self.input_base_json.read_text("utf-8"))
|
| 160 |
+
|
| 161 |
+
new_data2 = []
|
| 162 |
+
new_data_to_merge = []
|
| 163 |
+
|
| 164 |
+
for file_path in new_data:
|
| 165 |
+
new_data2.append(rename_user_dir_path(file_path, mode=mode))
|
| 166 |
+
|
| 167 |
+
for path2 in new_data2:
|
| 168 |
+
if path2 not in current_data:
|
| 169 |
+
new_data_to_merge.append(path2)
|
| 170 |
+
|
| 171 |
+
self.input_base = list(dict.fromkeys([*current_data, *new_data_to_merge]))
|
| 172 |
+
|
| 173 |
+
def write(self):
|
| 174 |
+
self.input_base_json.write_text(json.dumps(self.input_base, ensure_ascii=False, indent=4), encoding="utf-8")
|
| 175 |
+
|
| 176 |
+
def load(self):
|
| 177 |
+
if self.input_base_json.exists():
|
| 178 |
+
self.input_base = json.loads(self.input_base_json.read_text("utf-8"))
|
| 179 |
+
print(_i18n("input_base_loaded"))
|
| 180 |
+
|
| 181 |
+
@_write_decorator
|
| 182 |
+
def upload(self, files, copy=False):
|
| 183 |
+
input_dir = self.generate_from_dir(self.input_dir_base)
|
| 184 |
+
uploaded_input_files = []
|
| 185 |
+
valid_files = get_audio_files_from_list(files, only_files=True)
|
| 186 |
+
for file in valid_files:
|
| 187 |
+
new_file = Namer.iter(input_dir / Path(file).name)
|
| 188 |
+
if copy:
|
| 189 |
+
shutil.copy2(file, new_file)
|
| 190 |
+
else:
|
| 191 |
+
shutil.move(file, new_file)
|
| 192 |
+
uploaded_input_files.append(new_file)
|
| 193 |
+
self.input_base.extend(uploaded_input_files)
|
| 194 |
+
return uploaded_input_files
|
| 195 |
+
|
| 196 |
+
@_write_decorator
|
| 197 |
+
def clear(self):
|
| 198 |
+
for path in self.input_base:
|
| 199 |
+
Path(path).unlink(missing_ok=True)
|
| 200 |
+
self.input_base.clear()
|
| 201 |
+
print(_i18n("input_base_cleared"))
|
| 202 |
+
|
| 203 |
+
def get_input_list(self):
|
| 204 |
+
return list(reversed(self.input_base))
|
| 205 |
+
|
| 206 |
+
class OutputDir(UserDirectory):
|
| 207 |
+
def __init__(self, dir: str = base_names_app_dirs[1]):
|
| 208 |
+
super().__init__()
|
| 209 |
+
self.output_dir_name = dir
|
| 210 |
+
|
| 211 |
+
def gen_output_dir(self):
|
| 212 |
+
return self.generate(self.output_dir_name)
|
| 213 |
+
|
| 214 |
class History(UserDirectory):
|
| 215 |
def __init__(self, name: str = "mvsepless"):
|
| 216 |
super().__init__()
|
| 217 |
+
self.history_dir_base = self.user_directory / base_names_app_dirs[2]
|
| 218 |
self.history_dir_base.mkdir(parents=True, exist_ok=True)
|
| 219 |
self.history_dict_json = self.history_dir_base / f"{name}.json"
|
| 220 |
self.history_dict = {}
|
|
|
|
| 242 |
self.history_dict = json.loads(self.history_dict_json.read_text("utf-8"))
|
| 243 |
print(_i18n("history_loaded"))
|
| 244 |
|
| 245 |
+
@_write_decorator
|
| 246 |
+
def update_data(self, mode: int):
|
| 247 |
+
current_data = deepcopy(self.history_dict)
|
| 248 |
+
new_data = {}
|
| 249 |
+
if self.history_dict_json.exists():
|
| 250 |
+
new_data: dict = json.loads(self.history_dict_json.read_text("utf-8"))
|
| 251 |
+
|
| 252 |
+
new_data_to_merge = {}
|
| 253 |
+
|
| 254 |
+
for key, state in new_data.items():
|
| 255 |
+
new_state = []
|
| 256 |
+
for basename, stems_list in state:
|
| 257 |
+
new_stems_list = [basename]
|
| 258 |
+
new_stems_list.append([[stem_name, rename_user_dir_path(stem_path, mode=mode)] for stem_name, stem_path in stems_list])
|
| 259 |
+
new_state.extend(deepcopy(new_stems_list))
|
| 260 |
+
new_data[key] = deepcopy(new_state)
|
| 261 |
+
|
| 262 |
+
for key2, state2 in new_data.items():
|
| 263 |
+
if key2 not in list(current_data.keys()) and state2 != current_data.get(key2):
|
| 264 |
+
new_data_to_merge[key2] = state2
|
| 265 |
+
|
| 266 |
+
self.history_dict: dict = {
|
| 267 |
+
**current_data,
|
| 268 |
+
**new_data_to_merge
|
| 269 |
+
}
|
| 270 |
+
|
| 271 |
def get_list(self, update_from_file=False):
|
| 272 |
if update_from_file:
|
| 273 |
self.load()
|
|
|
|
| 279 |
self.history_dict.update([(f"{timestamp} | {model_name}", deepcopy(state))])
|
| 280 |
|
| 281 |
def get_from_history(self, key: str):
|
| 282 |
+
return deepcopy(self.history_dict.get(key, None))
|
| 283 |
|
| 284 |
class HistoryAutoEnsemble(History):
|
| 285 |
def __init__(self):
|
|
|
|
| 298 |
results_ = func(self, *args, **kwargs)
|
| 299 |
return results_
|
| 300 |
return wrapper
|
| 301 |
+
|
| 302 |
+
@_write_decorator
|
| 303 |
+
def update_data(self, mode: int):
|
| 304 |
+
current_data = deepcopy(self.history_dict)
|
| 305 |
+
new_data = {}
|
| 306 |
+
if self.history_dict_json.exists():
|
| 307 |
+
new_data: dict = json.loads(self.history_dict_json.read_text("utf-8"))
|
| 308 |
+
new_data_to_merge = {}
|
| 309 |
+
|
| 310 |
+
for key, state in new_data.items():
|
| 311 |
+
new_state = [
|
| 312 |
+
rename_user_dir_path(state[0], mode=mode), # result
|
| 313 |
+
rename_user_dir_path(state[1], mode=mode), # invert
|
| 314 |
+
[rename_user_dir_path(stem_path, mode=mode) for stem_path in state[2]] # primary_stems_list
|
| 315 |
+
]
|
| 316 |
+
new_data[key] = deepcopy(new_state)
|
| 317 |
+
for key2, state2 in new_data.items():
|
| 318 |
+
if key2 not in list(current_data.keys()) and state2 != current_data.get(key2):
|
| 319 |
+
new_data_to_merge[key2] = state2
|
| 320 |
+
|
| 321 |
+
self.history_dict: dict = {
|
| 322 |
+
**current_data,
|
| 323 |
+
**new_data_to_merge
|
| 324 |
+
}
|
| 325 |
|
| 326 |
@_write_decorator
|
| 327 |
def add_to_history(self, etype: str, output: str, inverted_output: str, primary_stems_list: list = []):
|
|
|
|
| 349 |
return results_
|
| 350 |
return wrapper
|
| 351 |
|
| 352 |
+
@_write_decorator
|
| 353 |
+
def update_data(self, mode: int):
|
| 354 |
+
current_data = deepcopy(self.history_dict)
|
| 355 |
+
new_data = {}
|
| 356 |
+
if self.history_dict_json.exists():
|
| 357 |
+
new_data: dict = json.loads(self.history_dict_json.read_text("utf-8"))
|
| 358 |
+
new_data_to_merge = {}
|
| 359 |
+
|
| 360 |
+
for key, state in new_data.items():
|
| 361 |
+
new_state = None
|
| 362 |
+
if state:
|
| 363 |
+
new_state = rename_user_dir_path(state, mode=mode)
|
| 364 |
+
new_data[key] = deepcopy(new_state)
|
| 365 |
+
|
| 366 |
+
for key2, state2 in new_data.items():
|
| 367 |
+
if key2 not in list(current_data.keys()) and state2 != current_data.get(key2):
|
| 368 |
+
new_data_to_merge[key2] = state2
|
| 369 |
+
|
| 370 |
+
self.history_dict: dict = {
|
| 371 |
+
**current_data,
|
| 372 |
+
**new_data_to_merge
|
| 373 |
+
}
|
| 374 |
+
|
| 375 |
@_write_decorator
|
| 376 |
def add_to_history(self, etype: str, state: str):
|
| 377 |
timestamp = datetime.now(tz).strftime("%Y-%m-%d_%H-%M-%S")
|
|
|
|
| 398 |
return results_
|
| 399 |
return wrapper
|
| 400 |
|
| 401 |
+
@_write_decorator
|
| 402 |
+
def update_data(self, mode: int):
|
| 403 |
+
current_data = deepcopy(self.history_dict)
|
| 404 |
+
new_data = {}
|
| 405 |
+
if self.history_dict_json.exists():
|
| 406 |
+
new_data: dict = json.loads(self.history_dict_json.read_text("utf-8"))
|
| 407 |
+
new_data_to_merge = {}
|
| 408 |
+
|
| 409 |
+
for key, state in new_data.items():
|
| 410 |
+
new_state = None
|
| 411 |
+
if state:
|
| 412 |
+
new_state = rename_user_dir_path(state, mode=mode)
|
| 413 |
+
new_data[key] = deepcopy(new_state)
|
| 414 |
+
|
| 415 |
+
for key2, state2 in new_data.items():
|
| 416 |
+
if key2 not in list(current_data.keys()) and state2 != current_data.get(key2):
|
| 417 |
+
new_data_to_merge[key2] = state2
|
| 418 |
+
|
| 419 |
+
self.history_dict: dict = {
|
| 420 |
+
**current_data,
|
| 421 |
+
**new_data_to_merge
|
| 422 |
+
}
|
| 423 |
+
|
| 424 |
@_write_decorator
|
| 425 |
def add_to_history(self, itype: str, state: str):
|
| 426 |
timestamp = datetime.now(tz).strftime("%Y-%m-%d_%H-%M-%S")
|
|
|
|
| 444 |
return results_
|
| 445 |
return wrapper
|
| 446 |
|
| 447 |
+
@_write_decorator
|
| 448 |
+
def update_data(self, mode: int):
|
| 449 |
+
current_data = deepcopy(self.history_dict)
|
| 450 |
+
new_data = {}
|
| 451 |
+
if self.history_dict_json.exists():
|
| 452 |
+
new_data: dict = json.loads(self.history_dict_json.read_text("utf-8"))
|
| 453 |
+
new_data_to_merge = {}
|
| 454 |
+
|
| 455 |
+
for key, state in new_data.items():
|
| 456 |
+
new_state = []
|
| 457 |
+
if state:
|
| 458 |
+
new_state = [rename_user_dir_path(file_path, mode=mode) for file_path in state]
|
| 459 |
+
new_data[key] = deepcopy(new_state)
|
| 460 |
+
|
| 461 |
+
for key2, state2 in new_data.items():
|
| 462 |
+
if key2 not in list(current_data.keys()) and state2 != current_data.get(key2):
|
| 463 |
+
new_data_to_merge[key2] = state2
|
| 464 |
+
|
| 465 |
+
self.history_dict: dict = {
|
| 466 |
+
**current_data,
|
| 467 |
+
**new_data_to_merge
|
| 468 |
+
}
|
| 469 |
+
|
| 470 |
@_write_decorator
|
| 471 |
def add_to_history(self, model_name: str, f0_method: str, pitch: int, output_files: list):
|
| 472 |
timestamp = datetime.now(tz).strftime("%Y-%m-%d_%H-%M-%S")
|
|
|
|
| 479 |
def __init__(self):
|
| 480 |
super().__init__()
|
| 481 |
self.state = []
|
| 482 |
+
self.ensemble_base = self.user_directory / base_names_app_dirs[3]
|
| 483 |
self.ensemble_base.mkdir(parents=True, exist_ok=True)
|
| 484 |
|
| 485 |
def write_flow(self, name: str):
|
|
|
|
| 588 |
|
| 589 |
def __init__(self):
|
| 590 |
super().__init__()
|
| 591 |
+
self.vbach_models_base = self.user_directory / base_names_app_dirs[4]
|
| 592 |
self.pth_models_dir = self.vbach_models_base / "pth"
|
| 593 |
self.index_models_dir = self.vbach_models_base / "index"
|
| 594 |
self.pth_models_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
| 702 |
class F0GenerateOutPath(UserDirectory):
|
| 703 |
def __init__(self):
|
| 704 |
super().__init__()
|
| 705 |
+
self.f0_curves_dir = self.user_directory / base_names_app_dirs[5]
|
| 706 |
self.f0_curves_dir.mkdir(parents=True, exist_ok=True)
|
| 707 |
|
| 708 |
def generate_output_path(self, name: str, f0_method: str):
|
|
|
|
| 715 |
|
| 716 |
def __init__(self):
|
| 717 |
super().__init__()
|
| 718 |
+
self.custom_models_base = self.user_directory / base_names_app_dirs[6]
|
| 719 |
self.checkpoints_dir = self.custom_models_base / "checkpoints"
|
| 720 |
self.configs_dir = self.custom_models_base / "configs"
|
| 721 |
self.checkpoints_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
| 911 |
return gr.skip()
|
| 912 |
return gr.update(choices=current_configs, value=value), current_configs
|
| 913 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 914 |
def UI(self, theme=None, hf_space_mode=False):
|
| 915 |
+
global GDRIVE_DIR, IS_CUSTOM_DIR
|
| 916 |
all_models = self.get_all_models()
|
| 917 |
default_model = all_models[0]
|
| 918 |
stems_default = self.get_stems(default_model)
|
|
|
|
| 1760 |
gr.Warning(_i18n("model_not_selected"))
|
| 1761 |
return [], gr.skip()
|
| 1762 |
|
| 1763 |
+
output_dir = self.output_dir.generate(base_names_app_dirs[7])
|
| 1764 |
download_hubert(embedder_model, use_transformers)
|
| 1765 |
results = self.vbach_converter.convert_audio(
|
| 1766 |
audio_input=input_files,
|
|
|
|
| 2057 |
gr.Warning(_i18n("no_f0_file_selected"))
|
| 2058 |
return update_audio_with_size(label=_i18n("vbach_result"), value=None), gr.skip()
|
| 2059 |
|
| 2060 |
+
output_dir = self.output_dir.generate(base_names_app_dirs[7])
|
| 2061 |
download_hubert(embedder_model, use_transformers)
|
| 2062 |
|
| 2063 |
result = self.vbach_converter.convert_audio_custom_f0(
|
|
|
|
| 2360 |
def upload_vbach_index_fn(files: list, progress=gr.Progress(track_tqdm=True)):
|
| 2361 |
self.vbach_model_manager.upload_index_model(files)
|
| 2362 |
return gr.update(value=[])
|
| 2363 |
+
|
| 2364 |
+
if GDRIVE_USER_DIR:
|
| 2365 |
+
with gr.Tab(_i18n("google_drive")):
|
| 2366 |
+
gdrive_info = gr.Textbox(lines=3, label=_i18n("status"), interactive=False)
|
| 2367 |
+
gr.Timer().tick(lambda: gr.update(value=get_disk_usage(GDRIVE_DIR)), outputs=gdrive_info)
|
| 2368 |
+
copy_to_gdrive_btn = gr.Button(_i18n("copy_from_current_user_dir_to_gdrive"), **base_c_params["base"])
|
| 2369 |
+
@copy_to_gdrive_btn.click()
|
| 2370 |
+
def copy_to_gdrive_fn():
|
| 2371 |
+
copy_to_gdrive()
|
| 2372 |
+
self.input_files.update_data(0)
|
| 2373 |
+
self.history.update_data(0)
|
| 2374 |
+
self.auto_ensemble_history_app.update_data(0)
|
| 2375 |
+
self.manual_ensemble_history_app.update_data(0)
|
| 2376 |
+
self.subtract_history_app.update_data(0)
|
| 2377 |
+
self.vbach_history_app.update_data(0)
|
| 2378 |
|
| 2379 |
return mvsepless_app
|
| 2380 |
|
args_parser.py
CHANGED
|
@@ -1,631 +1,631 @@
|
|
| 1 |
-
import argparse
|
| 2 |
-
from pathlib import Path
|
| 3 |
-
from i18n import _i18n
|
| 4 |
-
BASE_DIR = Path(__file__).resolve().parent
|
| 5 |
-
from audio import output_formats
|
| 6 |
-
|
| 7 |
-
def tobool(val: str | bool | int):
|
| 8 |
-
if isinstance(val, int):
|
| 9 |
-
return True if val >= 1 else False
|
| 10 |
-
elif isinstance(val, str):
|
| 11 |
-
if val in ["y", "yes", "Yes", "true", "True", "1"]:
|
| 12 |
-
return True
|
| 13 |
-
else:
|
| 14 |
-
return False
|
| 15 |
-
elif isinstance(val, bool):
|
| 16 |
-
return val
|
| 17 |
-
|
| 18 |
-
class NestedAction(argparse.Action):
|
| 19 |
-
def __call__(self, parser, namespace, values, option_string=None):
|
| 20 |
-
# Разбиваем dest по точке, например 'database.host'
|
| 21 |
-
group, dest = self.dest.split('.', 1)
|
| 22 |
-
# Получаем или создаем вложенный Namespace
|
| 23 |
-
groupspace = getattr(namespace, group, argparse.Namespace())
|
| 24 |
-
# Устанавливаем значение во вложенный объект
|
| 25 |
-
setattr(groupspace, dest, values)
|
| 26 |
-
# Сохраняем вложенный объект в основной
|
| 27 |
-
setattr(namespace, group, groupspace)
|
| 28 |
-
|
| 29 |
-
class NestedStoreTrue(argparse.Action):
|
| 30 |
-
def __init__(self, option_strings, dest, default=False, help=None, **kwargs):
|
| 31 |
-
# 1. Сразу при создании парсера готовим структуру во вложенном Namespace
|
| 32 |
-
super().__init__(option_strings=option_strings, dest=dest, nargs=0, default=default, help=help, **kwargs)
|
| 33 |
-
|
| 34 |
-
def __call__(self, parser, namespace, values, option_string=None):
|
| 35 |
-
# 2. Если флаг передан, меняем False на True
|
| 36 |
-
group, attr = self.dest.split('.', 1)
|
| 37 |
-
groupspace = getattr(namespace, group, argparse.Namespace())
|
| 38 |
-
setattr(groupspace, attr, True)
|
| 39 |
-
setattr(namespace, group, groupspace)
|
| 40 |
-
|
| 41 |
-
def parse_separator_args(add_params_args: dict = {}):
|
| 42 |
-
parser = argparse.ArgumentParser(
|
| 43 |
-
description=_i18n("arg_main_description"),
|
| 44 |
-
epilog=_i18n("arg_main_epilog")
|
| 45 |
-
)
|
| 46 |
-
subparsers = parser.add_subparsers(
|
| 47 |
-
title=_i18n("arg_subcommands_title"),
|
| 48 |
-
dest="mode",
|
| 49 |
-
description=_i18n("arg_subcommands_description"),
|
| 50 |
-
help=_i18n("arg_subcommands_help")
|
| 51 |
-
)
|
| 52 |
-
|
| 53 |
-
# separate
|
| 54 |
-
separate_parser = subparsers.add_parser(
|
| 55 |
-
"separate",
|
| 56 |
-
help=_i18n("arg_separate_help"),
|
| 57 |
-
description=_i18n("arg_separate_description"),
|
| 58 |
-
epilog=_i18n("arg_separate_epilog")
|
| 59 |
-
)
|
| 60 |
-
|
| 61 |
-
# custom_separate
|
| 62 |
-
custom_separate_parser = subparsers.add_parser(
|
| 63 |
-
"custom_separate",
|
| 64 |
-
help=_i18n("arg_custom_separate_help"),
|
| 65 |
-
description=_i18n("arg_custom_separate_description"),
|
| 66 |
-
epilog=_i18n("arg_custom_separate_epilog")
|
| 67 |
-
)
|
| 68 |
-
|
| 69 |
-
# info
|
| 70 |
-
info_parser = subparsers.add_parser(
|
| 71 |
-
"info",
|
| 72 |
-
help=_i18n("arg_info_help"),
|
| 73 |
-
description=_i18n("arg_info_description"),
|
| 74 |
-
epilog=_i18n("arg_info_epilog")
|
| 75 |
-
)
|
| 76 |
-
|
| 77 |
-
# auto_ensemble
|
| 78 |
-
auto_ensemble_parser = subparsers.add_parser(
|
| 79 |
-
"auto_ensemble",
|
| 80 |
-
help=_i18n("arg_auto_ensemble_help"),
|
| 81 |
-
description=_i18n("arg_auto_ensemble_description"),
|
| 82 |
-
epilog=_i18n("arg_auto_ensemble_epilog")
|
| 83 |
-
)
|
| 84 |
-
|
| 85 |
-
# manual_ensemble
|
| 86 |
-
manual_ensemble_parser = subparsers.add_parser(
|
| 87 |
-
"manual_ensemble",
|
| 88 |
-
help=_i18n("arg_manual_ensemble_help"),
|
| 89 |
-
description=_i18n("arg_manual_ensemble_description"),
|
| 90 |
-
epilog=_i18n("arg_manual_ensemble_epilog")
|
| 91 |
-
)
|
| 92 |
-
|
| 93 |
-
# subtract
|
| 94 |
-
subtract_parser = subparsers.add_parser(
|
| 95 |
-
"subtract",
|
| 96 |
-
help=_i18n("arg_subtract_help"),
|
| 97 |
-
description=_i18n("arg_subtract_description"),
|
| 98 |
-
epilog=_i18n("arg_subtract_epilog")
|
| 99 |
-
)
|
| 100 |
-
|
| 101 |
-
# separate
|
| 102 |
-
separate_parser.add_argument(
|
| 103 |
-
"-i", "--i", "-input", "--input", "--input_files", "--input-files",
|
| 104 |
-
nargs="+", dest="input",
|
| 105 |
-
help=_i18n("arg_input_help")
|
| 106 |
-
)
|
| 107 |
-
separate_parser.add_argument(
|
| 108 |
-
"-o", "-out", "-output", "--output", "--output_dir", "--output-dir",
|
| 109 |
-
type=str, default=".", dest="output_dir",
|
| 110 |
-
help=_i18n("arg_output_dir_help")
|
| 111 |
-
)
|
| 112 |
-
separate_parser.add_argument(
|
| 113 |
-
"-of", "-output_fmt", "--output_format", "--output-format",
|
| 114 |
-
type=str, choices=output_formats, default=output_formats[0], dest="output_format",
|
| 115 |
-
help=_i18n("arg_output_format_help", formats=", ".join(output_formats), default=output_formats[0])
|
| 116 |
-
)
|
| 117 |
-
separate_parser.add_argument(
|
| 118 |
-
"-tm", "-tmplt", "--template", type=str, default="NAME_STEM_MODEL", dest="template",
|
| 119 |
-
help=_i18n("arg_template_help", keys=_i18n("template_keys_separate"), example="NAME_STEM_MODEL")
|
| 120 |
-
)
|
| 121 |
-
separate_parser.add_argument(
|
| 122 |
-
"-mn", "-model", "--model_name", "--model-name",
|
| 123 |
-
type=str, default="bs_6stem", dest="model_name",
|
| 124 |
-
help=_i18n("arg_model_name_help")
|
| 125 |
-
)
|
| 126 |
-
separate_parser.add_argument(
|
| 127 |
-
"-inst", "-ext_inst", "-ext-inst", "--extract_instrumental", "--extract-instrumental",
|
| 128 |
-
action="store_true", dest="extract_instrumental",
|
| 129 |
-
help=_i18n("arg_extract_instrumental_help")
|
| 130 |
-
)
|
| 131 |
-
separate_parser.add_argument(
|
| 132 |
-
"-ispec", "-spec_invert", "-spec-invert", "--use_spec_invert", "--use-spec-invert",
|
| 133 |
-
action="store_true", dest="use_spec_invert",
|
| 134 |
-
help=_i18n("arg_use_spec_invert_help")
|
| 135 |
-
)
|
| 136 |
-
separate_parser.add_argument(
|
| 137 |
-
"-st", "--st", "-stems", "--stems", "--selected_stems", "--selected-stems",
|
| 138 |
-
nargs="*", metavar="STEM", dest="selected_stems",
|
| 139 |
-
help=_i18n("arg_selected_stems_help")
|
| 140 |
-
)
|
| 141 |
-
for param_name, param_value in add_params_args.items():
|
| 142 |
-
param_type = param_value.get("type")
|
| 143 |
-
default = param_value.get("default")
|
| 144 |
-
separate_parser.add_argument(
|
| 145 |
-
f"--{param_name}",
|
| 146 |
-
action=NestedStoreTrue if param_type == "bool" else NestedAction,
|
| 147 |
-
type=None if param_type == "bool" else (int if param_type == "int" else (float if param_type == "float" else str)),
|
| 148 |
-
default=default,
|
| 149 |
-
dest=f"add_params.{param_name}",
|
| 150 |
-
help=_i18n("arg_add_param_help")
|
| 151 |
-
)
|
| 152 |
-
|
| 153 |
-
# custom_separate
|
| 154 |
-
custom_separate_parser.add_argument(
|
| 155 |
-
"-i", "--i", "-input", "--input", "--input_files", "--input-files",
|
| 156 |
-
nargs="+", dest="input",
|
| 157 |
-
help=_i18n("arg_input_help")
|
| 158 |
-
)
|
| 159 |
-
custom_separate_parser.add_argument(
|
| 160 |
-
"-o", "-out", "-output", "--output", "--output_dir", "--output-dir",
|
| 161 |
-
type=str, default=".", dest="output_dir",
|
| 162 |
-
help=_i18n("arg_output_dir_help")
|
| 163 |
-
)
|
| 164 |
-
custom_separate_parser.add_argument(
|
| 165 |
-
"-of", "-output_fmt", "--output_format", "--output-format",
|
| 166 |
-
type=str, choices=output_formats, default=output_formats[0], dest="output_format",
|
| 167 |
-
help=_i18n("arg_output_format_help", formats=", ".join(output_formats), default=output_formats[0])
|
| 168 |
-
)
|
| 169 |
-
custom_separate_parser.add_argument(
|
| 170 |
-
"-tm", "-tmplt", "--template", type=str, default="NAME_STEM_MODEL", dest="template",
|
| 171 |
-
help=_i18n("arg_template_help", keys=_i18n("template_keys_separate"), example="NAME_STEM_MODEL")
|
| 172 |
-
)
|
| 173 |
-
custom_separate_parser.add_argument(
|
| 174 |
-
"-mt", "-mtype", "--model_type", "--model-type",
|
| 175 |
-
type=str, default="bs_roformer", dest="model_type",
|
| 176 |
-
help=_i18n("arg_model_type_help")
|
| 177 |
-
)
|
| 178 |
-
custom_separate_parser.add_argument(
|
| 179 |
-
"-ckpt", "--ckpt", "-checkpoint", "--checkpoint", "--checkpoint_path", "--checkpoint-path",
|
| 180 |
-
type=str, required=True, dest="checkpoint_path",
|
| 181 |
-
help=_i18n("arg_checkpoint_path_help")
|
| 182 |
-
)
|
| 183 |
-
custom_separate_parser.add_argument(
|
| 184 |
-
"-conf", "--conf", "-config", "--config", "--config_path", "--config-path",
|
| 185 |
-
type=str, required=True, dest="config_path",
|
| 186 |
-
help=_i18n("arg_config_path_help")
|
| 187 |
-
)
|
| 188 |
-
custom_separate_parser.add_argument(
|
| 189 |
-
"-inst", "-ext_inst", "-ext-inst", "--extract_instrumental", "--extract-instrumental",
|
| 190 |
-
action="store_true", dest="extract_instrumental",
|
| 191 |
-
help=_i18n("arg_extract_instrumental_help")
|
| 192 |
-
)
|
| 193 |
-
custom_separate_parser.add_argument(
|
| 194 |
-
"-ispec", "-spec_invert", "-spec-invert", "--use_spec_invert", "--use-spec-invert",
|
| 195 |
-
action="store_true", dest="use_spec_invert",
|
| 196 |
-
help=_i18n("arg_use_spec_invert_help")
|
| 197 |
-
)
|
| 198 |
-
custom_separate_parser.add_argument(
|
| 199 |
-
"-st", "--st", "-stems", "--stems", "--selected_stems", "--selected-stems",
|
| 200 |
-
nargs="*", metavar="STEM", dest="selected_stems",
|
| 201 |
-
help=_i18n("arg_selected_stems_help")
|
| 202 |
-
)
|
| 203 |
-
for param_name, param_value in add_params_args.items():
|
| 204 |
-
param_type = param_value.get("type")
|
| 205 |
-
default = param_value.get("default")
|
| 206 |
-
custom_separate_parser.add_argument(
|
| 207 |
-
f"--{param_name}",
|
| 208 |
-
action=NestedStoreTrue if param_type == "bool" else NestedAction,
|
| 209 |
-
type=None if param_type == "bool" else (int if param_type == "int" else (float if param_type == "float" else str)),
|
| 210 |
-
default=default,
|
| 211 |
-
dest=f"add_params.{param_name}",
|
| 212 |
-
help=_i18n("arg_add_param_help")
|
| 213 |
-
)
|
| 214 |
-
|
| 215 |
-
# auto_ensemble
|
| 216 |
-
auto_ensemble_parser.add_argument(
|
| 217 |
-
"-i", "--i", "-input", "--input", "--input_file", "--input-file",
|
| 218 |
-
type=str, required=True, dest="input",
|
| 219 |
-
help=_i18n("arg_input_single_help")
|
| 220 |
-
)
|
| 221 |
-
auto_ensemble_parser.add_argument(
|
| 222 |
-
"-o", "-out", "-output", "--output", "--output_dir", "--output-dir",
|
| 223 |
-
type=str, default=".", dest="output_dir",
|
| 224 |
-
help=_i18n("arg_output_dir_help")
|
| 225 |
-
)
|
| 226 |
-
auto_ensemble_parser.add_argument(
|
| 227 |
-
"-of", "-output_fmt", "--output_format", "--output-format",
|
| 228 |
-
type=str, choices=output_formats, default=output_formats[0], dest="output_format",
|
| 229 |
-
help=_i18n("arg_output_format_help", formats=", ".join(output_formats), default=output_formats[0])
|
| 230 |
-
)
|
| 231 |
-
auto_ensemble_parser.add_argument(
|
| 232 |
-
"-tm", "-tmplt", "--template", type=str, default="NAME_TYPE_COUNT", dest="template",
|
| 233 |
-
help=_i18n("arg_template_help", keys=_i18n("template_keys_auto_ensemble"), example="NAME_COUNT_TYPE")
|
| 234 |
-
)
|
| 235 |
-
auto_ensemble_parser.add_argument(
|
| 236 |
-
"-t", "-type", "-etype", "--ensemble_type", "--ensemble-type",
|
| 237 |
-
type=str, default="avg_fft", dest="ensemble_type",
|
| 238 |
-
help=_i18n("arg_ensemble_type_help")
|
| 239 |
-
)
|
| 240 |
-
auto_ensemble_parser.add_argument(
|
| 241 |
-
"-ispec", "-spec_invert", "-spec-invert", "--use_spec_invert", "--use-spec-invert",
|
| 242 |
-
action="store_true", dest="use_spec_invert",
|
| 243 |
-
help=_i18n("arg_use_spec_invert_help")
|
| 244 |
-
)
|
| 245 |
-
auto_ensemble_parser.add_argument(
|
| 246 |
-
"-save_stems", "-save-stems", "-save_primary_stems", "--save-primary-stems",
|
| 247 |
-
action="store_true", dest="save_primary_stems",
|
| 248 |
-
help=_i18n("arg_save_primary_stems_help")
|
| 249 |
-
)
|
| 250 |
-
auto_ensemble_flow_group = auto_ensemble_parser.add_mutually_exclusive_group(required=True)
|
| 251 |
-
auto_ensemble_flow_group.add_argument(
|
| 252 |
-
"-flow", "--flow", nargs="+", metavar="MODEL:PRIMARY_STEM:INVERT:WEIGHTS",
|
| 253 |
-
dest="flow",
|
| 254 |
-
help=_i18n("arg_flow_help")
|
| 255 |
-
)
|
| 256 |
-
auto_ensemble_flow_group.add_argument(
|
| 257 |
-
"-json", "-preset", "-preset_json", "-preset-json", "--preset_json", "--preset-json",
|
| 258 |
-
type=str, dest="preset",
|
| 259 |
-
help=_i18n("arg_preset_json_help")
|
| 260 |
-
)
|
| 261 |
-
|
| 262 |
-
# manual_ensemble
|
| 263 |
-
manual_ensemble_parser.add_argument(
|
| 264 |
-
"-i", "--i", "-input", "--input", "--input_files", "--input-files",
|
| 265 |
-
nargs="+", dest="input",
|
| 266 |
-
help=_i18n("arg_input_help")
|
| 267 |
-
)
|
| 268 |
-
manual_ensemble_parser.add_argument(
|
| 269 |
-
"-o", "-out", "-output", "--output", "--output_dir", "--output-dir",
|
| 270 |
-
type=str, default=".", dest="output_dir",
|
| 271 |
-
help=_i18n("arg_output_dir_help")
|
| 272 |
-
)
|
| 273 |
-
manual_ensemble_parser.add_argument(
|
| 274 |
-
"-of", "-output_fmt", "--output_format", "--output-format",
|
| 275 |
-
type=str, choices=output_formats, default=output_formats[0], dest="output_format",
|
| 276 |
-
help=_i18n("arg_output_format_help", formats=", ".join(output_formats), default=output_formats[0])
|
| 277 |
-
)
|
| 278 |
-
manual_ensemble_parser.add_argument(
|
| 279 |
-
"-tm", "-tmplt", "--template", type=str, default="NAME_TYPE", dest="template",
|
| 280 |
-
help=_i18n("arg_template_help", keys=_i18n("template_keys_manual_ensemble"), example="NAME_TYPE")
|
| 281 |
-
)
|
| 282 |
-
manual_ensemble_parser.add_argument(
|
| 283 |
-
"-t", "-type", "-etype", "--ensemble_type", "--ensemble-type",
|
| 284 |
-
type=str, default="avg_fft", dest="ensemble_type",
|
| 285 |
-
help=_i18n("arg_ensemble_type_help")
|
| 286 |
-
)
|
| 287 |
-
manual_ensemble_parser.add_argument(
|
| 288 |
-
"-w", "-weights", "--weights", type=float, nargs="*", dest="weights",
|
| 289 |
-
help=_i18n("arg_weights_help")
|
| 290 |
-
)
|
| 291 |
-
|
| 292 |
-
# subtract
|
| 293 |
-
subtract_parser.add_argument(
|
| 294 |
-
"-i1", "--i1", "-input1", "--input1", "--input_file1", "--input-file1",
|
| 295 |
-
type=str, required=True, dest="input_1",
|
| 296 |
-
help=_i18n("arg_input1_help")
|
| 297 |
-
)
|
| 298 |
-
subtract_parser.add_argument(
|
| 299 |
-
"-i2", "--i2", "-input2", "--input2", "--input_file2", "--input-file2",
|
| 300 |
-
type=str, required=True, dest="input_2",
|
| 301 |
-
help=_i18n("arg_input2_help")
|
| 302 |
-
)
|
| 303 |
-
subtract_parser.add_argument(
|
| 304 |
-
"-o", "-out", "-output", "--output", "--output_dir", "--output-dir",
|
| 305 |
-
type=str, default=".", dest="output_dir",
|
| 306 |
-
help=_i18n("arg_output_dir_help")
|
| 307 |
-
)
|
| 308 |
-
subtract_parser.add_argument(
|
| 309 |
-
"-of", "-output_fmt", "--output_format", "--output-format",
|
| 310 |
-
type=str, choices=output_formats, default=output_formats[0], dest="output_format",
|
| 311 |
-
help=_i18n("arg_output_format_help", formats=", ".join(output_formats), default=output_formats[0])
|
| 312 |
-
)
|
| 313 |
-
subtract_parser.add_argument(
|
| 314 |
-
"-tm", "-tmplt", "--template", type=str, default="NAME_TYPE", dest="template",
|
| 315 |
-
help=_i18n("arg_template_help", keys=_i18n("template_keys_subtract"), example="NAME_TYPE")
|
| 316 |
-
)
|
| 317 |
-
subtract_parser.add_argument(
|
| 318 |
-
"-ispec", "-spec_invert", "-spec-invert", "--use_spec_invert", "--use-spec-invert",
|
| 319 |
-
action="store_true", dest="use_spec_invert",
|
| 320 |
-
help=_i18n("arg_use_spec_invert_help")
|
| 321 |
-
)
|
| 322 |
-
|
| 323 |
-
# info
|
| 324 |
-
info_parser.add_argument(
|
| 325 |
-
"-u", "-update", "--update", action="store_true", dest="update",
|
| 326 |
-
help=_i18n("arg_update_help")
|
| 327 |
-
)
|
| 328 |
-
info_parser.add_argument(
|
| 329 |
-
"-clear", "-clear_cache", "-clear-cache", "--clear_cache", "--clear-cache",
|
| 330 |
-
action="store_true", dest="clear_cache",
|
| 331 |
-
help=_i18n("arg_clear_cache_help")
|
| 332 |
-
)
|
| 333 |
-
info_parser.add_argument(
|
| 334 |
-
"-mn", "-model", "--model_name", "--model-name",
|
| 335 |
-
type=str, default="bs_6stem", dest="model_name",
|
| 336 |
-
help=_i18n("arg_model_name_help")
|
| 337 |
-
)
|
| 338 |
-
info_parser.add_argument(
|
| 339 |
-
"-dw", "-download", "--download", action="store_true", dest="download",
|
| 340 |
-
help=_i18n("arg_download_help")
|
| 341 |
-
)
|
| 342 |
-
info_parser.add_argument(
|
| 343 |
-
"-l", "-limit", "--limit", type=int, default=None, dest="limit",
|
| 344 |
-
help=_i18n("arg_limit_help")
|
| 345 |
-
)
|
| 346 |
-
info_parser.add_argument(
|
| 347 |
-
"-s", "-stem", "--stem", type=str, default=None, dest="stem",
|
| 348 |
-
help=_i18n("arg_stem_filter_help")
|
| 349 |
-
)
|
| 350 |
-
info_parser.add_argument(
|
| 351 |
-
"-oi", "-installed", "--only_installed", "--only-installed",
|
| 352 |
-
action="store_true", dest="only_installed",
|
| 353 |
-
help=_i18n("arg_only_installed_help")
|
| 354 |
-
)
|
| 355 |
-
|
| 356 |
-
return parser.parse_args()
|
| 357 |
-
|
| 358 |
-
|
| 359 |
-
def parse_vbach_args():
|
| 360 |
-
parser = argparse.ArgumentParser(
|
| 361 |
-
description=_i18n("vbach_main_description"),
|
| 362 |
-
epilog=_i18n("vbach_main_epilog")
|
| 363 |
-
)
|
| 364 |
-
subparsers = parser.add_subparsers(
|
| 365 |
-
title=_i18n("arg_subcommands_title"),
|
| 366 |
-
dest="mode",
|
| 367 |
-
description=_i18n("arg_subcommands_description"),
|
| 368 |
-
help=_i18n("arg_subcommands_help")
|
| 369 |
-
)
|
| 370 |
-
|
| 371 |
-
# infer
|
| 372 |
-
infer_parser = subparsers.add_parser(
|
| 373 |
-
"infer",
|
| 374 |
-
help=_i18n("vbach_infer_help"),
|
| 375 |
-
description=_i18n("vbach_infer_description"),
|
| 376 |
-
epilog=_i18n("vbach_infer_epilog")
|
| 377 |
-
)
|
| 378 |
-
|
| 379 |
-
# infer_custom_f0
|
| 380 |
-
infer_custom_f0_parser = subparsers.add_parser(
|
| 381 |
-
"infer_custom_f0",
|
| 382 |
-
help=_i18n("vbach_infer_custom_f0_help"),
|
| 383 |
-
description=_i18n("vbach_infer_custom_f0_description"),
|
| 384 |
-
epilog=_i18n("vbach_infer_custom_f0_epilog")
|
| 385 |
-
)
|
| 386 |
-
|
| 387 |
-
# download_hubert
|
| 388 |
-
download_hubert_parser = subparsers.add_parser(
|
| 389 |
-
"download_hubert",
|
| 390 |
-
help=_i18n("vbach_download_hubert_help"),
|
| 391 |
-
description=_i18n("vbach_download_hubert_description"),
|
| 392 |
-
epilog=_i18n("vbach_download_hubert_epilog")
|
| 393 |
-
)
|
| 394 |
-
|
| 395 |
-
# infer
|
| 396 |
-
infer_parser.add_argument(
|
| 397 |
-
"-i", "--i", "-input", "--input", "--input_files", "--input-files",
|
| 398 |
-
nargs="+", dest="input",
|
| 399 |
-
help=_i18n("arg_input_help")
|
| 400 |
-
)
|
| 401 |
-
infer_parser.add_argument(
|
| 402 |
-
"-o", "-out", "-output", "--output", "--output_dir", "--output-dir",
|
| 403 |
-
type=str, default=".", dest="output_dir",
|
| 404 |
-
help=_i18n("arg_output_dir_help")
|
| 405 |
-
)
|
| 406 |
-
infer_parser.add_argument(
|
| 407 |
-
"-m", "-model", "--model_path", "--model-path",
|
| 408 |
-
type=str, required=True, dest="checkpoint_path",
|
| 409 |
-
help=_i18n("vbach_model_path_help")
|
| 410 |
-
)
|
| 411 |
-
infer_parser.add_argument(
|
| 412 |
-
"-idx", "-index", "--index_path", "--index-path",
|
| 413 |
-
type=str, default="", dest="index_path",
|
| 414 |
-
help=_i18n("vbach_index_path_help")
|
| 415 |
-
)
|
| 416 |
-
infer_parser.add_argument(
|
| 417 |
-
"-p", "-pitch", "--pitch", type=int, default=0, dest="pitch",
|
| 418 |
-
help=_i18n("vbach_pitch_help")
|
| 419 |
-
)
|
| 420 |
-
infer_parser.add_argument(
|
| 421 |
-
"-f0m", "-f0_method", "--f0_method", "--f0-method",
|
| 422 |
-
type=str, default="rmvpe+", dest="f0_method",
|
| 423 |
-
help=_i18n("vbach_f0_method_help")
|
| 424 |
-
)
|
| 425 |
-
infer_parser.add_argument(
|
| 426 |
-
"-idxr", "-index_rate", "--index_rate", "--index-rate",
|
| 427 |
-
type=float, default=0.75, dest="index_rate",
|
| 428 |
-
help=_i18n("vbach_index_rate_help")
|
| 429 |
-
)
|
| 430 |
-
infer_parser.add_argument(
|
| 431 |
-
"-ve", "-volume_envelope", "--volume_envelope", "--volume-envelope",
|
| 432 |
-
type=float, default=0.25, dest="volume_envelope",
|
| 433 |
-
help=_i18n("vbach_volume_envelope_help")
|
| 434 |
-
)
|
| 435 |
-
infer_parser.add_argument(
|
| 436 |
-
"-pr", "-protect", "--protect", type=float, default=0.33, dest="protect",
|
| 437 |
-
help=_i18n("vbach_protect_help")
|
| 438 |
-
)
|
| 439 |
-
infer_parser.add_argument(
|
| 440 |
-
"-hl", "-hop_length", "--hop_length", "--hop-length",
|
| 441 |
-
type=int, default=128, dest="hop_length",
|
| 442 |
-
help=_i18n("vbach_hop_length_help")
|
| 443 |
-
)
|
| 444 |
-
infer_parser.add_argument(
|
| 445 |
-
"-emb", "-embedder", "--embedder_model", "--embedder-model",
|
| 446 |
-
type=str, default="hubert_base", dest="embedder",
|
| 447 |
-
help=_i18n("vbach_embedder_help")
|
| 448 |
-
)
|
| 449 |
-
infer_parser.add_argument(
|
| 450 |
-
"-tf", "-use_transformers", "--use_transformers", "--use-transformers",
|
| 451 |
-
action="store_true", dest="use_transformers",
|
| 452 |
-
help=_i18n("vbach_use_transformers_help")
|
| 453 |
-
)
|
| 454 |
-
infer_parser.add_argument(
|
| 455 |
-
"-of", "-output_fmt", "--output_format", "--output-format",
|
| 456 |
-
type=str, choices=output_formats, default=output_formats[0], dest="output_format",
|
| 457 |
-
help=_i18n("arg_output_format_help", formats=", ".join(output_formats), default=output_formats[0])
|
| 458 |
-
)
|
| 459 |
-
infer_parser.add_argument(
|
| 460 |
-
"-stm", "-stereo_mode", "--stereo_mode", "--stereo-mode",
|
| 461 |
-
type=str, choices=("mono", "left/right", "sim/dif"), default="mono", dest="stereo_mode",
|
| 462 |
-
help=_i18n("vbach_stereo_mode_help")
|
| 463 |
-
)
|
| 464 |
-
infer_parser.add_argument(
|
| 465 |
-
"-f0min", "--f0_min", "--f0-min", type=int, default=50, dest="f0_min",
|
| 466 |
-
help=_i18n("vbach_f0_min_help")
|
| 467 |
-
)
|
| 468 |
-
infer_parser.add_argument(
|
| 469 |
-
"-f0max", "--f0_max", "--f0-max", type=int, default=1100, dest="f0_max",
|
| 470 |
-
help=_i18n("vbach_f0_max_help")
|
| 471 |
-
)
|
| 472 |
-
infer_parser.add_argument(
|
| 473 |
-
"-chd", "-chunk_duration", "--chunk_duration", "--chunk-duration",
|
| 474 |
-
type=int, default=7, dest="chunk_duration",
|
| 475 |
-
help=_i18n("vbach_chunk_duration_help")
|
| 476 |
-
)
|
| 477 |
-
infer_parser.add_argument(
|
| 478 |
-
"-tm", "-tmplt", "--template", type=str, default="NAME_F0METHOD_PITCH", dest="template",
|
| 479 |
-
help=_i18n("arg_template_help", keys=_i18n("template_keys_vbach"), example="NAME_F0METHOD_PITCH")
|
| 480 |
-
)
|
| 481 |
-
|
| 482 |
-
# infer_custom_f0
|
| 483 |
-
infer_custom_f0_parser.add_argument(
|
| 484 |
-
"-i", "--i", "-input", "--input", type=str, required=True, dest="input",
|
| 485 |
-
help=_i18n("arg_input_single_help")
|
| 486 |
-
)
|
| 487 |
-
infer_custom_f0_parser.add_argument(
|
| 488 |
-
"-o", "-out", "-output", "--output", "--output_dir", "--output-dir",
|
| 489 |
-
type=str, default=".", dest="output_dir",
|
| 490 |
-
help=_i18n("arg_output_dir_help")
|
| 491 |
-
)
|
| 492 |
-
infer_custom_f0_parser.add_argument(
|
| 493 |
-
"-m", "-model", "--model_path", "--model-path",
|
| 494 |
-
type=str, required=True, dest="checkpoint_path",
|
| 495 |
-
help=_i18n("vbach_model_path_help")
|
| 496 |
-
)
|
| 497 |
-
infer_custom_f0_parser.add_argument(
|
| 498 |
-
"-idx", "-index", "--index_path", "--index-path",
|
| 499 |
-
type=str, default="", dest="index_path",
|
| 500 |
-
help=_i18n("vbach_index_path_help")
|
| 501 |
-
)
|
| 502 |
-
infer_custom_f0_parser.add_argument(
|
| 503 |
-
"-p", "-pitch", "--pitch", type=int, default=0, dest="pitch",
|
| 504 |
-
help=_i18n("vbach_pitch_help")
|
| 505 |
-
)
|
| 506 |
-
infer_custom_f0_parser.add_argument(
|
| 507 |
-
"-f0f", "-f0_file", "--f0_file", "--f0-file",
|
| 508 |
-
type=str, dest="f0_file",
|
| 509 |
-
help=_i18n("vbach_f0_file_help")
|
| 510 |
-
)
|
| 511 |
-
infer_custom_f0_parser.add_argument(
|
| 512 |
-
"-idxr", "-index_rate", "--index_rate", "--index-rate",
|
| 513 |
-
type=float, default=0.75, dest="index_rate",
|
| 514 |
-
help=_i18n("vbach_index_rate_help")
|
| 515 |
-
)
|
| 516 |
-
infer_custom_f0_parser.add_argument(
|
| 517 |
-
"-ve", "-volume_envelope", "--volume_envelope", "--volume-envelope",
|
| 518 |
-
type=float, default=0.25, dest="volume_envelope",
|
| 519 |
-
help=_i18n("vbach_volume_envelope_help")
|
| 520 |
-
)
|
| 521 |
-
infer_custom_f0_parser.add_argument(
|
| 522 |
-
"-pr", "-protect", "--protect", type=float, default=0.33, dest="protect",
|
| 523 |
-
help=_i18n("vbach_protect_help")
|
| 524 |
-
)
|
| 525 |
-
infer_custom_f0_parser.add_argument(
|
| 526 |
-
"-emb", "-embedder", "--embedder_model", "--embedder-model",
|
| 527 |
-
type=str, default="hubert_base", dest="embedder",
|
| 528 |
-
help=_i18n("vbach_embedder_help")
|
| 529 |
-
)
|
| 530 |
-
infer_custom_f0_parser.add_argument(
|
| 531 |
-
"-tf", "-use_transformers", "--use_transformers", "--use-transformers",
|
| 532 |
-
action="store_true", dest="use_transformers",
|
| 533 |
-
help=_i18n("vbach_use_transformers_help")
|
| 534 |
-
)
|
| 535 |
-
infer_custom_f0_parser.add_argument(
|
| 536 |
-
"-of", "-output_fmt", "--output_format", "--output-format",
|
| 537 |
-
type=str, choices=output_formats, default=output_formats[0], dest="output_format",
|
| 538 |
-
help=_i18n("arg_output_format_help", formats=", ".join(output_formats), default=output_formats[0])
|
| 539 |
-
)
|
| 540 |
-
infer_custom_f0_parser.add_argument(
|
| 541 |
-
"-stm", "-stereo_mode", "--stereo_mode", "--stereo-mode",
|
| 542 |
-
type=str, choices=("mono", "left/right", "sim/dif"), default="mono", dest="stereo_mode",
|
| 543 |
-
help=_i18n("vbach_stereo_mode_help")
|
| 544 |
-
)
|
| 545 |
-
infer_custom_f0_parser.add_argument(
|
| 546 |
-
"-f0min", "--f0_min", "--f0-min", type=int, default=50, dest="f0_min",
|
| 547 |
-
help=_i18n("vbach_f0_min_help")
|
| 548 |
-
)
|
| 549 |
-
infer_custom_f0_parser.add_argument(
|
| 550 |
-
"-f0max", "--f0_max", "--f0-max", type=int, default=1100, dest="f0_max",
|
| 551 |
-
help=_i18n("vbach_f0_max_help")
|
| 552 |
-
)
|
| 553 |
-
infer_custom_f0_parser.add_argument(
|
| 554 |
-
"-chd", "-chunk_duration", "--chunk_duration", "--chunk-duration",
|
| 555 |
-
type=int, default=7, dest="chunk_duration",
|
| 556 |
-
help=_i18n("vbach_chunk_duration_help")
|
| 557 |
-
)
|
| 558 |
-
infer_custom_f0_parser.add_argument(
|
| 559 |
-
"-tm", "-tmplt", "--template", type=str, default="NAME_F0METHOD_PITCH", dest="template",
|
| 560 |
-
help=_i18n("arg_template_help", keys=_i18n("template_keys_vbach"), example="NAME_F0METHOD_PITCH")
|
| 561 |
-
)
|
| 562 |
-
|
| 563 |
-
# download_hubert
|
| 564 |
-
download_hubert_parser.add_argument(
|
| 565 |
-
"-emb", "-embedder", "--embedder_model", "--embedder-model",
|
| 566 |
-
type=str, default="hubert_base", dest="embedder",
|
| 567 |
-
help=_i18n("vbach_embedder_help")
|
| 568 |
-
)
|
| 569 |
-
download_hubert_parser.add_argument(
|
| 570 |
-
"-tf", "-use_transformers", "--use_transformers", "--use-transformers",
|
| 571 |
-
action="store_true", dest="use_transformers",
|
| 572 |
-
help=_i18n("vbach_use_transformers_help")
|
| 573 |
-
)
|
| 574 |
-
|
| 575 |
-
return parser.parse_args()
|
| 576 |
-
|
| 577 |
-
|
| 578 |
-
def parse_f0_extract():
|
| 579 |
-
parser = argparse.ArgumentParser(
|
| 580 |
-
description=_i18n("f0_extract_description"),
|
| 581 |
-
epilog=_i18n("f0_extract_epilog")
|
| 582 |
-
)
|
| 583 |
-
parser.add_argument(
|
| 584 |
-
"-i", "--i", "-input", "--input",
|
| 585 |
-
type=str, required=True, dest="input",
|
| 586 |
-
help=_i18n("arg_input_single_help")
|
| 587 |
-
)
|
| 588 |
-
parser.add_argument(
|
| 589 |
-
"-f0m", "-f0_method", "--f0_method", "--f0-method",
|
| 590 |
-
type=str, default="rmvpe+", dest="f0_method",
|
| 591 |
-
help=_i18n("vbach_f0_method_help")
|
| 592 |
-
)
|
| 593 |
-
parser.add_argument(
|
| 594 |
-
"-f0min", "--f0_min", "--f0-min",
|
| 595 |
-
type=int, default=50, dest="f0_min",
|
| 596 |
-
help=_i18n("vbach_f0_min_help")
|
| 597 |
-
)
|
| 598 |
-
parser.add_argument(
|
| 599 |
-
"-f0max", "--f0_max", "--f0-max",
|
| 600 |
-
type=int, default=1100, dest="f0_max",
|
| 601 |
-
help=_i18n("vbach_f0_max_help")
|
| 602 |
-
)
|
| 603 |
-
parser.add_argument(
|
| 604 |
-
"-o", "-out", "-output", "--output", "--output_path", "--output-path",
|
| 605 |
-
type=str, default=None, dest="output_path",
|
| 606 |
-
help=_i18n("f0_extract_output_help")
|
| 607 |
-
)
|
| 608 |
-
return parser.parse_args()
|
| 609 |
-
|
| 610 |
-
|
| 611 |
-
def parse_app_args():
|
| 612 |
-
parser = argparse.ArgumentParser(
|
| 613 |
-
description=_i18n("app_description"),
|
| 614 |
-
epilog=_i18n("app_epilog")
|
| 615 |
-
)
|
| 616 |
-
parser.add_argument(
|
| 617 |
-
"-s", "-share", "--share", "--public", "--gradio_share", "--gradio-share",
|
| 618 |
-
action="store_true", dest="share",
|
| 619 |
-
help=_i18n("app_share_help")
|
| 620 |
-
)
|
| 621 |
-
parser.add_argument(
|
| 622 |
-
"-p", "-port", "--port", "--server_port", "--server-port",
|
| 623 |
-
type=int, default=None, dest="port",
|
| 624 |
-
help=_i18n("app_port_help")
|
| 625 |
-
)
|
| 626 |
-
parser.add_argument(
|
| 627 |
-
"-f", "-full", "--full", "--no_hf_mode", "--no-hf-mode",
|
| 628 |
-
action="store_true", dest="full",
|
| 629 |
-
help=_i18n("app_full_help")
|
| 630 |
-
)
|
| 631 |
return parser.parse_args()
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
from pathlib import Path
|
| 3 |
+
from i18n import _i18n
|
| 4 |
+
BASE_DIR = Path(__file__).resolve().parent
|
| 5 |
+
from audio import output_formats
|
| 6 |
+
|
| 7 |
+
def tobool(val: str | bool | int):
|
| 8 |
+
if isinstance(val, int):
|
| 9 |
+
return True if val >= 1 else False
|
| 10 |
+
elif isinstance(val, str):
|
| 11 |
+
if val in ["y", "yes", "Yes", "true", "True", "1"]:
|
| 12 |
+
return True
|
| 13 |
+
else:
|
| 14 |
+
return False
|
| 15 |
+
elif isinstance(val, bool):
|
| 16 |
+
return val
|
| 17 |
+
|
| 18 |
+
class NestedAction(argparse.Action):
|
| 19 |
+
def __call__(self, parser, namespace, values, option_string=None):
|
| 20 |
+
# Разбиваем dest по точке, например 'database.host'
|
| 21 |
+
group, dest = self.dest.split('.', 1)
|
| 22 |
+
# Получаем или создаем вложенный Namespace
|
| 23 |
+
groupspace = getattr(namespace, group, argparse.Namespace())
|
| 24 |
+
# Устанавливаем значение во вложенный объект
|
| 25 |
+
setattr(groupspace, dest, values)
|
| 26 |
+
# Сохраняем вложенный объект в основной
|
| 27 |
+
setattr(namespace, group, groupspace)
|
| 28 |
+
|
| 29 |
+
class NestedStoreTrue(argparse.Action):
|
| 30 |
+
def __init__(self, option_strings, dest, default=False, help=None, **kwargs):
|
| 31 |
+
# 1. Сразу при создании парсера готовим структуру во вложенном Namespace
|
| 32 |
+
super().__init__(option_strings=option_strings, dest=dest, nargs=0, default=default, help=help, **kwargs)
|
| 33 |
+
|
| 34 |
+
def __call__(self, parser, namespace, values, option_string=None):
|
| 35 |
+
# 2. Если флаг передан, меняем False на True
|
| 36 |
+
group, attr = self.dest.split('.', 1)
|
| 37 |
+
groupspace = getattr(namespace, group, argparse.Namespace())
|
| 38 |
+
setattr(groupspace, attr, True)
|
| 39 |
+
setattr(namespace, group, groupspace)
|
| 40 |
+
|
| 41 |
+
def parse_separator_args(add_params_args: dict = {}):
|
| 42 |
+
parser = argparse.ArgumentParser(
|
| 43 |
+
description=_i18n("arg_main_description"),
|
| 44 |
+
epilog=_i18n("arg_main_epilog")
|
| 45 |
+
)
|
| 46 |
+
subparsers = parser.add_subparsers(
|
| 47 |
+
title=_i18n("arg_subcommands_title"),
|
| 48 |
+
dest="mode",
|
| 49 |
+
description=_i18n("arg_subcommands_description"),
|
| 50 |
+
help=_i18n("arg_subcommands_help")
|
| 51 |
+
)
|
| 52 |
+
|
| 53 |
+
# separate
|
| 54 |
+
separate_parser = subparsers.add_parser(
|
| 55 |
+
"separate",
|
| 56 |
+
help=_i18n("arg_separate_help"),
|
| 57 |
+
description=_i18n("arg_separate_description"),
|
| 58 |
+
epilog=_i18n("arg_separate_epilog")
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
# custom_separate
|
| 62 |
+
custom_separate_parser = subparsers.add_parser(
|
| 63 |
+
"custom_separate",
|
| 64 |
+
help=_i18n("arg_custom_separate_help"),
|
| 65 |
+
description=_i18n("arg_custom_separate_description"),
|
| 66 |
+
epilog=_i18n("arg_custom_separate_epilog")
|
| 67 |
+
)
|
| 68 |
+
|
| 69 |
+
# info
|
| 70 |
+
info_parser = subparsers.add_parser(
|
| 71 |
+
"info",
|
| 72 |
+
help=_i18n("arg_info_help"),
|
| 73 |
+
description=_i18n("arg_info_description"),
|
| 74 |
+
epilog=_i18n("arg_info_epilog")
|
| 75 |
+
)
|
| 76 |
+
|
| 77 |
+
# auto_ensemble
|
| 78 |
+
auto_ensemble_parser = subparsers.add_parser(
|
| 79 |
+
"auto_ensemble",
|
| 80 |
+
help=_i18n("arg_auto_ensemble_help"),
|
| 81 |
+
description=_i18n("arg_auto_ensemble_description"),
|
| 82 |
+
epilog=_i18n("arg_auto_ensemble_epilog")
|
| 83 |
+
)
|
| 84 |
+
|
| 85 |
+
# manual_ensemble
|
| 86 |
+
manual_ensemble_parser = subparsers.add_parser(
|
| 87 |
+
"manual_ensemble",
|
| 88 |
+
help=_i18n("arg_manual_ensemble_help"),
|
| 89 |
+
description=_i18n("arg_manual_ensemble_description"),
|
| 90 |
+
epilog=_i18n("arg_manual_ensemble_epilog")
|
| 91 |
+
)
|
| 92 |
+
|
| 93 |
+
# subtract
|
| 94 |
+
subtract_parser = subparsers.add_parser(
|
| 95 |
+
"subtract",
|
| 96 |
+
help=_i18n("arg_subtract_help"),
|
| 97 |
+
description=_i18n("arg_subtract_description"),
|
| 98 |
+
epilog=_i18n("arg_subtract_epilog")
|
| 99 |
+
)
|
| 100 |
+
|
| 101 |
+
# separate
|
| 102 |
+
separate_parser.add_argument(
|
| 103 |
+
"-i", "--i", "-input", "--input", "--input_files", "--input-files",
|
| 104 |
+
nargs="+", dest="input",
|
| 105 |
+
help=_i18n("arg_input_help")
|
| 106 |
+
)
|
| 107 |
+
separate_parser.add_argument(
|
| 108 |
+
"-o", "-out", "-output", "--output", "--output_dir", "--output-dir",
|
| 109 |
+
type=str, default=".", dest="output_dir",
|
| 110 |
+
help=_i18n("arg_output_dir_help")
|
| 111 |
+
)
|
| 112 |
+
separate_parser.add_argument(
|
| 113 |
+
"-of", "-output_fmt", "--output_format", "--output-format",
|
| 114 |
+
type=str, choices=output_formats, default=output_formats[0], dest="output_format",
|
| 115 |
+
help=_i18n("arg_output_format_help", formats=", ".join(output_formats), default=output_formats[0])
|
| 116 |
+
)
|
| 117 |
+
separate_parser.add_argument(
|
| 118 |
+
"-tm", "-tmplt", "--template", type=str, default="NAME_STEM_MODEL", dest="template",
|
| 119 |
+
help=_i18n("arg_template_help", keys=_i18n("template_keys_separate"), example="NAME_STEM_MODEL")
|
| 120 |
+
)
|
| 121 |
+
separate_parser.add_argument(
|
| 122 |
+
"-mn", "-model", "--model_name", "--model-name",
|
| 123 |
+
type=str, default="bs_6stem", dest="model_name",
|
| 124 |
+
help=_i18n("arg_model_name_help")
|
| 125 |
+
)
|
| 126 |
+
separate_parser.add_argument(
|
| 127 |
+
"-inst", "-ext_inst", "-ext-inst", "--extract_instrumental", "--extract-instrumental",
|
| 128 |
+
action="store_true", dest="extract_instrumental",
|
| 129 |
+
help=_i18n("arg_extract_instrumental_help")
|
| 130 |
+
)
|
| 131 |
+
separate_parser.add_argument(
|
| 132 |
+
"-ispec", "-spec_invert", "-spec-invert", "--use_spec_invert", "--use-spec-invert",
|
| 133 |
+
action="store_true", dest="use_spec_invert",
|
| 134 |
+
help=_i18n("arg_use_spec_invert_help")
|
| 135 |
+
)
|
| 136 |
+
separate_parser.add_argument(
|
| 137 |
+
"-st", "--st", "-stems", "--stems", "--selected_stems", "--selected-stems",
|
| 138 |
+
nargs="*", metavar="STEM", dest="selected_stems",
|
| 139 |
+
help=_i18n("arg_selected_stems_help")
|
| 140 |
+
)
|
| 141 |
+
for param_name, param_value in add_params_args.items():
|
| 142 |
+
param_type = param_value.get("type")
|
| 143 |
+
default = param_value.get("default")
|
| 144 |
+
separate_parser.add_argument(
|
| 145 |
+
f"--{param_name}",
|
| 146 |
+
action=NestedStoreTrue if param_type == "bool" else NestedAction,
|
| 147 |
+
type=None if param_type == "bool" else (int if param_type == "int" else (float if param_type == "float" else str)),
|
| 148 |
+
default=default,
|
| 149 |
+
dest=f"add_params.{param_name}",
|
| 150 |
+
help=_i18n("arg_add_param_help")
|
| 151 |
+
)
|
| 152 |
+
|
| 153 |
+
# custom_separate
|
| 154 |
+
custom_separate_parser.add_argument(
|
| 155 |
+
"-i", "--i", "-input", "--input", "--input_files", "--input-files",
|
| 156 |
+
nargs="+", dest="input",
|
| 157 |
+
help=_i18n("arg_input_help")
|
| 158 |
+
)
|
| 159 |
+
custom_separate_parser.add_argument(
|
| 160 |
+
"-o", "-out", "-output", "--output", "--output_dir", "--output-dir",
|
| 161 |
+
type=str, default=".", dest="output_dir",
|
| 162 |
+
help=_i18n("arg_output_dir_help")
|
| 163 |
+
)
|
| 164 |
+
custom_separate_parser.add_argument(
|
| 165 |
+
"-of", "-output_fmt", "--output_format", "--output-format",
|
| 166 |
+
type=str, choices=output_formats, default=output_formats[0], dest="output_format",
|
| 167 |
+
help=_i18n("arg_output_format_help", formats=", ".join(output_formats), default=output_formats[0])
|
| 168 |
+
)
|
| 169 |
+
custom_separate_parser.add_argument(
|
| 170 |
+
"-tm", "-tmplt", "--template", type=str, default="NAME_STEM_MODEL", dest="template",
|
| 171 |
+
help=_i18n("arg_template_help", keys=_i18n("template_keys_separate"), example="NAME_STEM_MODEL")
|
| 172 |
+
)
|
| 173 |
+
custom_separate_parser.add_argument(
|
| 174 |
+
"-mt", "-mtype", "--model_type", "--model-type",
|
| 175 |
+
type=str, default="bs_roformer", dest="model_type",
|
| 176 |
+
help=_i18n("arg_model_type_help")
|
| 177 |
+
)
|
| 178 |
+
custom_separate_parser.add_argument(
|
| 179 |
+
"-ckpt", "--ckpt", "-checkpoint", "--checkpoint", "--checkpoint_path", "--checkpoint-path",
|
| 180 |
+
type=str, required=True, dest="checkpoint_path",
|
| 181 |
+
help=_i18n("arg_checkpoint_path_help")
|
| 182 |
+
)
|
| 183 |
+
custom_separate_parser.add_argument(
|
| 184 |
+
"-conf", "--conf", "-config", "--config", "--config_path", "--config-path",
|
| 185 |
+
type=str, required=True, dest="config_path",
|
| 186 |
+
help=_i18n("arg_config_path_help")
|
| 187 |
+
)
|
| 188 |
+
custom_separate_parser.add_argument(
|
| 189 |
+
"-inst", "-ext_inst", "-ext-inst", "--extract_instrumental", "--extract-instrumental",
|
| 190 |
+
action="store_true", dest="extract_instrumental",
|
| 191 |
+
help=_i18n("arg_extract_instrumental_help")
|
| 192 |
+
)
|
| 193 |
+
custom_separate_parser.add_argument(
|
| 194 |
+
"-ispec", "-spec_invert", "-spec-invert", "--use_spec_invert", "--use-spec-invert",
|
| 195 |
+
action="store_true", dest="use_spec_invert",
|
| 196 |
+
help=_i18n("arg_use_spec_invert_help")
|
| 197 |
+
)
|
| 198 |
+
custom_separate_parser.add_argument(
|
| 199 |
+
"-st", "--st", "-stems", "--stems", "--selected_stems", "--selected-stems",
|
| 200 |
+
nargs="*", metavar="STEM", dest="selected_stems",
|
| 201 |
+
help=_i18n("arg_selected_stems_help")
|
| 202 |
+
)
|
| 203 |
+
for param_name, param_value in add_params_args.items():
|
| 204 |
+
param_type = param_value.get("type")
|
| 205 |
+
default = param_value.get("default")
|
| 206 |
+
custom_separate_parser.add_argument(
|
| 207 |
+
f"--{param_name}",
|
| 208 |
+
action=NestedStoreTrue if param_type == "bool" else NestedAction,
|
| 209 |
+
type=None if param_type == "bool" else (int if param_type == "int" else (float if param_type == "float" else str)),
|
| 210 |
+
default=default,
|
| 211 |
+
dest=f"add_params.{param_name}",
|
| 212 |
+
help=_i18n("arg_add_param_help")
|
| 213 |
+
)
|
| 214 |
+
|
| 215 |
+
# auto_ensemble
|
| 216 |
+
auto_ensemble_parser.add_argument(
|
| 217 |
+
"-i", "--i", "-input", "--input", "--input_file", "--input-file",
|
| 218 |
+
type=str, required=True, dest="input",
|
| 219 |
+
help=_i18n("arg_input_single_help")
|
| 220 |
+
)
|
| 221 |
+
auto_ensemble_parser.add_argument(
|
| 222 |
+
"-o", "-out", "-output", "--output", "--output_dir", "--output-dir",
|
| 223 |
+
type=str, default=".", dest="output_dir",
|
| 224 |
+
help=_i18n("arg_output_dir_help")
|
| 225 |
+
)
|
| 226 |
+
auto_ensemble_parser.add_argument(
|
| 227 |
+
"-of", "-output_fmt", "--output_format", "--output-format",
|
| 228 |
+
type=str, choices=output_formats, default=output_formats[0], dest="output_format",
|
| 229 |
+
help=_i18n("arg_output_format_help", formats=", ".join(output_formats), default=output_formats[0])
|
| 230 |
+
)
|
| 231 |
+
auto_ensemble_parser.add_argument(
|
| 232 |
+
"-tm", "-tmplt", "--template", type=str, default="NAME_TYPE_COUNT", dest="template",
|
| 233 |
+
help=_i18n("arg_template_help", keys=_i18n("template_keys_auto_ensemble"), example="NAME_COUNT_TYPE")
|
| 234 |
+
)
|
| 235 |
+
auto_ensemble_parser.add_argument(
|
| 236 |
+
"-t", "-type", "-etype", "--ensemble_type", "--ensemble-type",
|
| 237 |
+
type=str, default="avg_fft", dest="ensemble_type",
|
| 238 |
+
help=_i18n("arg_ensemble_type_help")
|
| 239 |
+
)
|
| 240 |
+
auto_ensemble_parser.add_argument(
|
| 241 |
+
"-ispec", "-spec_invert", "-spec-invert", "--use_spec_invert", "--use-spec-invert",
|
| 242 |
+
action="store_true", dest="use_spec_invert",
|
| 243 |
+
help=_i18n("arg_use_spec_invert_help")
|
| 244 |
+
)
|
| 245 |
+
auto_ensemble_parser.add_argument(
|
| 246 |
+
"-save_stems", "-save-stems", "-save_primary_stems", "--save-primary-stems",
|
| 247 |
+
action="store_true", dest="save_primary_stems",
|
| 248 |
+
help=_i18n("arg_save_primary_stems_help")
|
| 249 |
+
)
|
| 250 |
+
auto_ensemble_flow_group = auto_ensemble_parser.add_mutually_exclusive_group(required=True)
|
| 251 |
+
auto_ensemble_flow_group.add_argument(
|
| 252 |
+
"-flow", "--flow", nargs="+", metavar="MODEL:PRIMARY_STEM:INVERT:WEIGHTS",
|
| 253 |
+
dest="flow",
|
| 254 |
+
help=_i18n("arg_flow_help")
|
| 255 |
+
)
|
| 256 |
+
auto_ensemble_flow_group.add_argument(
|
| 257 |
+
"-json", "-preset", "-preset_json", "-preset-json", "--preset_json", "--preset-json",
|
| 258 |
+
type=str, dest="preset",
|
| 259 |
+
help=_i18n("arg_preset_json_help")
|
| 260 |
+
)
|
| 261 |
+
|
| 262 |
+
# manual_ensemble
|
| 263 |
+
manual_ensemble_parser.add_argument(
|
| 264 |
+
"-i", "--i", "-input", "--input", "--input_files", "--input-files",
|
| 265 |
+
nargs="+", dest="input",
|
| 266 |
+
help=_i18n("arg_input_help")
|
| 267 |
+
)
|
| 268 |
+
manual_ensemble_parser.add_argument(
|
| 269 |
+
"-o", "-out", "-output", "--output", "--output_dir", "--output-dir",
|
| 270 |
+
type=str, default=".", dest="output_dir",
|
| 271 |
+
help=_i18n("arg_output_dir_help")
|
| 272 |
+
)
|
| 273 |
+
manual_ensemble_parser.add_argument(
|
| 274 |
+
"-of", "-output_fmt", "--output_format", "--output-format",
|
| 275 |
+
type=str, choices=output_formats, default=output_formats[0], dest="output_format",
|
| 276 |
+
help=_i18n("arg_output_format_help", formats=", ".join(output_formats), default=output_formats[0])
|
| 277 |
+
)
|
| 278 |
+
manual_ensemble_parser.add_argument(
|
| 279 |
+
"-tm", "-tmplt", "--template", type=str, default="NAME_TYPE", dest="template",
|
| 280 |
+
help=_i18n("arg_template_help", keys=_i18n("template_keys_manual_ensemble"), example="NAME_TYPE")
|
| 281 |
+
)
|
| 282 |
+
manual_ensemble_parser.add_argument(
|
| 283 |
+
"-t", "-type", "-etype", "--ensemble_type", "--ensemble-type",
|
| 284 |
+
type=str, default="avg_fft", dest="ensemble_type",
|
| 285 |
+
help=_i18n("arg_ensemble_type_help")
|
| 286 |
+
)
|
| 287 |
+
manual_ensemble_parser.add_argument(
|
| 288 |
+
"-w", "-weights", "--weights", type=float, nargs="*", dest="weights",
|
| 289 |
+
help=_i18n("arg_weights_help")
|
| 290 |
+
)
|
| 291 |
+
|
| 292 |
+
# subtract
|
| 293 |
+
subtract_parser.add_argument(
|
| 294 |
+
"-i1", "--i1", "-input1", "--input1", "--input_file1", "--input-file1",
|
| 295 |
+
type=str, required=True, dest="input_1",
|
| 296 |
+
help=_i18n("arg_input1_help")
|
| 297 |
+
)
|
| 298 |
+
subtract_parser.add_argument(
|
| 299 |
+
"-i2", "--i2", "-input2", "--input2", "--input_file2", "--input-file2",
|
| 300 |
+
type=str, required=True, dest="input_2",
|
| 301 |
+
help=_i18n("arg_input2_help")
|
| 302 |
+
)
|
| 303 |
+
subtract_parser.add_argument(
|
| 304 |
+
"-o", "-out", "-output", "--output", "--output_dir", "--output-dir",
|
| 305 |
+
type=str, default=".", dest="output_dir",
|
| 306 |
+
help=_i18n("arg_output_dir_help")
|
| 307 |
+
)
|
| 308 |
+
subtract_parser.add_argument(
|
| 309 |
+
"-of", "-output_fmt", "--output_format", "--output-format",
|
| 310 |
+
type=str, choices=output_formats, default=output_formats[0], dest="output_format",
|
| 311 |
+
help=_i18n("arg_output_format_help", formats=", ".join(output_formats), default=output_formats[0])
|
| 312 |
+
)
|
| 313 |
+
subtract_parser.add_argument(
|
| 314 |
+
"-tm", "-tmplt", "--template", type=str, default="NAME_TYPE", dest="template",
|
| 315 |
+
help=_i18n("arg_template_help", keys=_i18n("template_keys_subtract"), example="NAME_TYPE")
|
| 316 |
+
)
|
| 317 |
+
subtract_parser.add_argument(
|
| 318 |
+
"-ispec", "-spec_invert", "-spec-invert", "--use_spec_invert", "--use-spec-invert",
|
| 319 |
+
action="store_true", dest="use_spec_invert",
|
| 320 |
+
help=_i18n("arg_use_spec_invert_help")
|
| 321 |
+
)
|
| 322 |
+
|
| 323 |
+
# info
|
| 324 |
+
info_parser.add_argument(
|
| 325 |
+
"-u", "-update", "--update", action="store_true", dest="update",
|
| 326 |
+
help=_i18n("arg_update_help")
|
| 327 |
+
)
|
| 328 |
+
info_parser.add_argument(
|
| 329 |
+
"-clear", "-clear_cache", "-clear-cache", "--clear_cache", "--clear-cache",
|
| 330 |
+
action="store_true", dest="clear_cache",
|
| 331 |
+
help=_i18n("arg_clear_cache_help")
|
| 332 |
+
)
|
| 333 |
+
info_parser.add_argument(
|
| 334 |
+
"-mn", "-model", "--model_name", "--model-name",
|
| 335 |
+
type=str, default="bs_6stem", dest="model_name",
|
| 336 |
+
help=_i18n("arg_model_name_help")
|
| 337 |
+
)
|
| 338 |
+
info_parser.add_argument(
|
| 339 |
+
"-dw", "-download", "--download", action="store_true", dest="download",
|
| 340 |
+
help=_i18n("arg_download_help")
|
| 341 |
+
)
|
| 342 |
+
info_parser.add_argument(
|
| 343 |
+
"-l", "-limit", "--limit", type=int, default=None, dest="limit",
|
| 344 |
+
help=_i18n("arg_limit_help")
|
| 345 |
+
)
|
| 346 |
+
info_parser.add_argument(
|
| 347 |
+
"-s", "-stem", "--stem", type=str, default=None, dest="stem",
|
| 348 |
+
help=_i18n("arg_stem_filter_help")
|
| 349 |
+
)
|
| 350 |
+
info_parser.add_argument(
|
| 351 |
+
"-oi", "-installed", "--only_installed", "--only-installed",
|
| 352 |
+
action="store_true", dest="only_installed",
|
| 353 |
+
help=_i18n("arg_only_installed_help")
|
| 354 |
+
)
|
| 355 |
+
|
| 356 |
+
return parser.parse_args()
|
| 357 |
+
|
| 358 |
+
|
| 359 |
+
def parse_vbach_args():
|
| 360 |
+
parser = argparse.ArgumentParser(
|
| 361 |
+
description=_i18n("vbach_main_description"),
|
| 362 |
+
epilog=_i18n("vbach_main_epilog")
|
| 363 |
+
)
|
| 364 |
+
subparsers = parser.add_subparsers(
|
| 365 |
+
title=_i18n("arg_subcommands_title"),
|
| 366 |
+
dest="mode",
|
| 367 |
+
description=_i18n("arg_subcommands_description"),
|
| 368 |
+
help=_i18n("arg_subcommands_help")
|
| 369 |
+
)
|
| 370 |
+
|
| 371 |
+
# infer
|
| 372 |
+
infer_parser = subparsers.add_parser(
|
| 373 |
+
"infer",
|
| 374 |
+
help=_i18n("vbach_infer_help"),
|
| 375 |
+
description=_i18n("vbach_infer_description"),
|
| 376 |
+
epilog=_i18n("vbach_infer_epilog")
|
| 377 |
+
)
|
| 378 |
+
|
| 379 |
+
# infer_custom_f0
|
| 380 |
+
infer_custom_f0_parser = subparsers.add_parser(
|
| 381 |
+
"infer_custom_f0",
|
| 382 |
+
help=_i18n("vbach_infer_custom_f0_help"),
|
| 383 |
+
description=_i18n("vbach_infer_custom_f0_description"),
|
| 384 |
+
epilog=_i18n("vbach_infer_custom_f0_epilog")
|
| 385 |
+
)
|
| 386 |
+
|
| 387 |
+
# download_hubert
|
| 388 |
+
download_hubert_parser = subparsers.add_parser(
|
| 389 |
+
"download_hubert",
|
| 390 |
+
help=_i18n("vbach_download_hubert_help"),
|
| 391 |
+
description=_i18n("vbach_download_hubert_description"),
|
| 392 |
+
epilog=_i18n("vbach_download_hubert_epilog")
|
| 393 |
+
)
|
| 394 |
+
|
| 395 |
+
# infer
|
| 396 |
+
infer_parser.add_argument(
|
| 397 |
+
"-i", "--i", "-input", "--input", "--input_files", "--input-files",
|
| 398 |
+
nargs="+", dest="input",
|
| 399 |
+
help=_i18n("arg_input_help")
|
| 400 |
+
)
|
| 401 |
+
infer_parser.add_argument(
|
| 402 |
+
"-o", "-out", "-output", "--output", "--output_dir", "--output-dir",
|
| 403 |
+
type=str, default=".", dest="output_dir",
|
| 404 |
+
help=_i18n("arg_output_dir_help")
|
| 405 |
+
)
|
| 406 |
+
infer_parser.add_argument(
|
| 407 |
+
"-m", "-model", "--model_path", "--model-path",
|
| 408 |
+
type=str, required=True, dest="checkpoint_path",
|
| 409 |
+
help=_i18n("vbach_model_path_help")
|
| 410 |
+
)
|
| 411 |
+
infer_parser.add_argument(
|
| 412 |
+
"-idx", "-index", "--index_path", "--index-path",
|
| 413 |
+
type=str, default="", dest="index_path",
|
| 414 |
+
help=_i18n("vbach_index_path_help")
|
| 415 |
+
)
|
| 416 |
+
infer_parser.add_argument(
|
| 417 |
+
"-p", "-pitch", "--pitch", type=int, default=0, dest="pitch",
|
| 418 |
+
help=_i18n("vbach_pitch_help")
|
| 419 |
+
)
|
| 420 |
+
infer_parser.add_argument(
|
| 421 |
+
"-f0m", "-f0_method", "--f0_method", "--f0-method",
|
| 422 |
+
type=str, default="rmvpe+", dest="f0_method",
|
| 423 |
+
help=_i18n("vbach_f0_method_help")
|
| 424 |
+
)
|
| 425 |
+
infer_parser.add_argument(
|
| 426 |
+
"-idxr", "-index_rate", "--index_rate", "--index-rate",
|
| 427 |
+
type=float, default=0.75, dest="index_rate",
|
| 428 |
+
help=_i18n("vbach_index_rate_help")
|
| 429 |
+
)
|
| 430 |
+
infer_parser.add_argument(
|
| 431 |
+
"-ve", "-volume_envelope", "--volume_envelope", "--volume-envelope",
|
| 432 |
+
type=float, default=0.25, dest="volume_envelope",
|
| 433 |
+
help=_i18n("vbach_volume_envelope_help")
|
| 434 |
+
)
|
| 435 |
+
infer_parser.add_argument(
|
| 436 |
+
"-pr", "-protect", "--protect", type=float, default=0.33, dest="protect",
|
| 437 |
+
help=_i18n("vbach_protect_help")
|
| 438 |
+
)
|
| 439 |
+
infer_parser.add_argument(
|
| 440 |
+
"-hl", "-hop_length", "--hop_length", "--hop-length",
|
| 441 |
+
type=int, default=128, dest="hop_length",
|
| 442 |
+
help=_i18n("vbach_hop_length_help")
|
| 443 |
+
)
|
| 444 |
+
infer_parser.add_argument(
|
| 445 |
+
"-emb", "-embedder", "--embedder_model", "--embedder-model",
|
| 446 |
+
type=str, default="hubert_base", dest="embedder",
|
| 447 |
+
help=_i18n("vbach_embedder_help")
|
| 448 |
+
)
|
| 449 |
+
infer_parser.add_argument(
|
| 450 |
+
"-tf", "-use_transformers", "--use_transformers", "--use-transformers",
|
| 451 |
+
action="store_true", dest="use_transformers",
|
| 452 |
+
help=_i18n("vbach_use_transformers_help")
|
| 453 |
+
)
|
| 454 |
+
infer_parser.add_argument(
|
| 455 |
+
"-of", "-output_fmt", "--output_format", "--output-format",
|
| 456 |
+
type=str, choices=output_formats, default=output_formats[0], dest="output_format",
|
| 457 |
+
help=_i18n("arg_output_format_help", formats=", ".join(output_formats), default=output_formats[0])
|
| 458 |
+
)
|
| 459 |
+
infer_parser.add_argument(
|
| 460 |
+
"-stm", "-stereo_mode", "--stereo_mode", "--stereo-mode",
|
| 461 |
+
type=str, choices=("mono", "left/right", "sim/dif"), default="mono", dest="stereo_mode",
|
| 462 |
+
help=_i18n("vbach_stereo_mode_help")
|
| 463 |
+
)
|
| 464 |
+
infer_parser.add_argument(
|
| 465 |
+
"-f0min", "--f0_min", "--f0-min", type=int, default=50, dest="f0_min",
|
| 466 |
+
help=_i18n("vbach_f0_min_help")
|
| 467 |
+
)
|
| 468 |
+
infer_parser.add_argument(
|
| 469 |
+
"-f0max", "--f0_max", "--f0-max", type=int, default=1100, dest="f0_max",
|
| 470 |
+
help=_i18n("vbach_f0_max_help")
|
| 471 |
+
)
|
| 472 |
+
infer_parser.add_argument(
|
| 473 |
+
"-chd", "-chunk_duration", "--chunk_duration", "--chunk-duration",
|
| 474 |
+
type=int, default=7, dest="chunk_duration",
|
| 475 |
+
help=_i18n("vbach_chunk_duration_help")
|
| 476 |
+
)
|
| 477 |
+
infer_parser.add_argument(
|
| 478 |
+
"-tm", "-tmplt", "--template", type=str, default="NAME_F0METHOD_PITCH", dest="template",
|
| 479 |
+
help=_i18n("arg_template_help", keys=_i18n("template_keys_vbach"), example="NAME_F0METHOD_PITCH")
|
| 480 |
+
)
|
| 481 |
+
|
| 482 |
+
# infer_custom_f0
|
| 483 |
+
infer_custom_f0_parser.add_argument(
|
| 484 |
+
"-i", "--i", "-input", "--input", type=str, required=True, dest="input",
|
| 485 |
+
help=_i18n("arg_input_single_help")
|
| 486 |
+
)
|
| 487 |
+
infer_custom_f0_parser.add_argument(
|
| 488 |
+
"-o", "-out", "-output", "--output", "--output_dir", "--output-dir",
|
| 489 |
+
type=str, default=".", dest="output_dir",
|
| 490 |
+
help=_i18n("arg_output_dir_help")
|
| 491 |
+
)
|
| 492 |
+
infer_custom_f0_parser.add_argument(
|
| 493 |
+
"-m", "-model", "--model_path", "--model-path",
|
| 494 |
+
type=str, required=True, dest="checkpoint_path",
|
| 495 |
+
help=_i18n("vbach_model_path_help")
|
| 496 |
+
)
|
| 497 |
+
infer_custom_f0_parser.add_argument(
|
| 498 |
+
"-idx", "-index", "--index_path", "--index-path",
|
| 499 |
+
type=str, default="", dest="index_path",
|
| 500 |
+
help=_i18n("vbach_index_path_help")
|
| 501 |
+
)
|
| 502 |
+
infer_custom_f0_parser.add_argument(
|
| 503 |
+
"-p", "-pitch", "--pitch", type=int, default=0, dest="pitch",
|
| 504 |
+
help=_i18n("vbach_pitch_help")
|
| 505 |
+
)
|
| 506 |
+
infer_custom_f0_parser.add_argument(
|
| 507 |
+
"-f0f", "-f0_file", "--f0_file", "--f0-file",
|
| 508 |
+
type=str, dest="f0_file",
|
| 509 |
+
help=_i18n("vbach_f0_file_help")
|
| 510 |
+
)
|
| 511 |
+
infer_custom_f0_parser.add_argument(
|
| 512 |
+
"-idxr", "-index_rate", "--index_rate", "--index-rate",
|
| 513 |
+
type=float, default=0.75, dest="index_rate",
|
| 514 |
+
help=_i18n("vbach_index_rate_help")
|
| 515 |
+
)
|
| 516 |
+
infer_custom_f0_parser.add_argument(
|
| 517 |
+
"-ve", "-volume_envelope", "--volume_envelope", "--volume-envelope",
|
| 518 |
+
type=float, default=0.25, dest="volume_envelope",
|
| 519 |
+
help=_i18n("vbach_volume_envelope_help")
|
| 520 |
+
)
|
| 521 |
+
infer_custom_f0_parser.add_argument(
|
| 522 |
+
"-pr", "-protect", "--protect", type=float, default=0.33, dest="protect",
|
| 523 |
+
help=_i18n("vbach_protect_help")
|
| 524 |
+
)
|
| 525 |
+
infer_custom_f0_parser.add_argument(
|
| 526 |
+
"-emb", "-embedder", "--embedder_model", "--embedder-model",
|
| 527 |
+
type=str, default="hubert_base", dest="embedder",
|
| 528 |
+
help=_i18n("vbach_embedder_help")
|
| 529 |
+
)
|
| 530 |
+
infer_custom_f0_parser.add_argument(
|
| 531 |
+
"-tf", "-use_transformers", "--use_transformers", "--use-transformers",
|
| 532 |
+
action="store_true", dest="use_transformers",
|
| 533 |
+
help=_i18n("vbach_use_transformers_help")
|
| 534 |
+
)
|
| 535 |
+
infer_custom_f0_parser.add_argument(
|
| 536 |
+
"-of", "-output_fmt", "--output_format", "--output-format",
|
| 537 |
+
type=str, choices=output_formats, default=output_formats[0], dest="output_format",
|
| 538 |
+
help=_i18n("arg_output_format_help", formats=", ".join(output_formats), default=output_formats[0])
|
| 539 |
+
)
|
| 540 |
+
infer_custom_f0_parser.add_argument(
|
| 541 |
+
"-stm", "-stereo_mode", "--stereo_mode", "--stereo-mode",
|
| 542 |
+
type=str, choices=("mono", "left/right", "sim/dif"), default="mono", dest="stereo_mode",
|
| 543 |
+
help=_i18n("vbach_stereo_mode_help")
|
| 544 |
+
)
|
| 545 |
+
infer_custom_f0_parser.add_argument(
|
| 546 |
+
"-f0min", "--f0_min", "--f0-min", type=int, default=50, dest="f0_min",
|
| 547 |
+
help=_i18n("vbach_f0_min_help")
|
| 548 |
+
)
|
| 549 |
+
infer_custom_f0_parser.add_argument(
|
| 550 |
+
"-f0max", "--f0_max", "--f0-max", type=int, default=1100, dest="f0_max",
|
| 551 |
+
help=_i18n("vbach_f0_max_help")
|
| 552 |
+
)
|
| 553 |
+
infer_custom_f0_parser.add_argument(
|
| 554 |
+
"-chd", "-chunk_duration", "--chunk_duration", "--chunk-duration",
|
| 555 |
+
type=int, default=7, dest="chunk_duration",
|
| 556 |
+
help=_i18n("vbach_chunk_duration_help")
|
| 557 |
+
)
|
| 558 |
+
infer_custom_f0_parser.add_argument(
|
| 559 |
+
"-tm", "-tmplt", "--template", type=str, default="NAME_F0METHOD_PITCH", dest="template",
|
| 560 |
+
help=_i18n("arg_template_help", keys=_i18n("template_keys_vbach"), example="NAME_F0METHOD_PITCH")
|
| 561 |
+
)
|
| 562 |
+
|
| 563 |
+
# download_hubert
|
| 564 |
+
download_hubert_parser.add_argument(
|
| 565 |
+
"-emb", "-embedder", "--embedder_model", "--embedder-model",
|
| 566 |
+
type=str, default="hubert_base", dest="embedder",
|
| 567 |
+
help=_i18n("vbach_embedder_help")
|
| 568 |
+
)
|
| 569 |
+
download_hubert_parser.add_argument(
|
| 570 |
+
"-tf", "-use_transformers", "--use_transformers", "--use-transformers",
|
| 571 |
+
action="store_true", dest="use_transformers",
|
| 572 |
+
help=_i18n("vbach_use_transformers_help")
|
| 573 |
+
)
|
| 574 |
+
|
| 575 |
+
return parser.parse_args()
|
| 576 |
+
|
| 577 |
+
|
| 578 |
+
def parse_f0_extract():
|
| 579 |
+
parser = argparse.ArgumentParser(
|
| 580 |
+
description=_i18n("f0_extract_description"),
|
| 581 |
+
epilog=_i18n("f0_extract_epilog")
|
| 582 |
+
)
|
| 583 |
+
parser.add_argument(
|
| 584 |
+
"-i", "--i", "-input", "--input",
|
| 585 |
+
type=str, required=True, dest="input",
|
| 586 |
+
help=_i18n("arg_input_single_help")
|
| 587 |
+
)
|
| 588 |
+
parser.add_argument(
|
| 589 |
+
"-f0m", "-f0_method", "--f0_method", "--f0-method",
|
| 590 |
+
type=str, default="rmvpe+", dest="f0_method",
|
| 591 |
+
help=_i18n("vbach_f0_method_help")
|
| 592 |
+
)
|
| 593 |
+
parser.add_argument(
|
| 594 |
+
"-f0min", "--f0_min", "--f0-min",
|
| 595 |
+
type=int, default=50, dest="f0_min",
|
| 596 |
+
help=_i18n("vbach_f0_min_help")
|
| 597 |
+
)
|
| 598 |
+
parser.add_argument(
|
| 599 |
+
"-f0max", "--f0_max", "--f0-max",
|
| 600 |
+
type=int, default=1100, dest="f0_max",
|
| 601 |
+
help=_i18n("vbach_f0_max_help")
|
| 602 |
+
)
|
| 603 |
+
parser.add_argument(
|
| 604 |
+
"-o", "-out", "-output", "--output", "--output_path", "--output-path",
|
| 605 |
+
type=str, default=None, dest="output_path",
|
| 606 |
+
help=_i18n("f0_extract_output_help")
|
| 607 |
+
)
|
| 608 |
+
return parser.parse_args()
|
| 609 |
+
|
| 610 |
+
|
| 611 |
+
def parse_app_args():
|
| 612 |
+
parser = argparse.ArgumentParser(
|
| 613 |
+
description=_i18n("app_description"),
|
| 614 |
+
epilog=_i18n("app_epilog")
|
| 615 |
+
)
|
| 616 |
+
parser.add_argument(
|
| 617 |
+
"-s", "-share", "--share", "--public", "--gradio_share", "--gradio-share",
|
| 618 |
+
action="store_true", dest="share",
|
| 619 |
+
help=_i18n("app_share_help")
|
| 620 |
+
)
|
| 621 |
+
parser.add_argument(
|
| 622 |
+
"-p", "-port", "--port", "--server_port", "--server-port",
|
| 623 |
+
type=int, default=None, dest="port",
|
| 624 |
+
help=_i18n("app_port_help")
|
| 625 |
+
)
|
| 626 |
+
parser.add_argument(
|
| 627 |
+
"-f", "-full", "--full", "--no_hf_mode", "--no-hf-mode",
|
| 628 |
+
action="store_true", dest="full",
|
| 629 |
+
help=_i18n("app_full_help")
|
| 630 |
+
)
|
| 631 |
return parser.parse_args()
|
extra_utils.py
CHANGED
|
@@ -20,7 +20,7 @@ import ctypes
|
|
| 20 |
import platform
|
| 21 |
import numpy as np
|
| 22 |
import yt_dlp
|
| 23 |
-
import
|
| 24 |
|
| 25 |
try:
|
| 26 |
import spaces
|
|
@@ -47,7 +47,39 @@ if spaces is not None:
|
|
| 47 |
import torch
|
| 48 |
tz = timezone(timedelta(hours=3))
|
| 49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
|
| 52 |
class DownloadError(Exception): pass
|
| 53 |
|
|
@@ -80,9 +112,6 @@ base_c_params = {
|
|
| 80 |
}
|
| 81 |
}
|
| 82 |
|
| 83 |
-
def get_info():
|
| 84 |
-
pass
|
| 85 |
-
|
| 86 |
def size_readable(size_bytes: int):
|
| 87 |
if size_bytes == 0:
|
| 88 |
return f"0 {_i18n('bytes')}"
|
|
@@ -95,6 +124,23 @@ def size_readable(size_bytes: int):
|
|
| 95 |
i += 1
|
| 96 |
return f"{size_bytes:.2f} {units[i]}"
|
| 97 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
def define_audio_with_size(basename: bool = False, **kwargs):
|
| 99 |
path = kwargs.get("value", None)
|
| 100 |
if not path:
|
|
@@ -510,87 +556,4 @@ def extra_clear_torch_cache():
|
|
| 510 |
if hasattr(torch._C, "_jit_pass_onnx_clear_scope_records"):
|
| 511 |
try:
|
| 512 |
torch._C._jit_pass_onnx_clear_scope_records()
|
| 513 |
-
except Exception: pass
|
| 514 |
-
|
| 515 |
-
class UserDirectory:
|
| 516 |
-
def __init__(self):
|
| 517 |
-
self.user_directory = Path('.')
|
| 518 |
-
|
| 519 |
-
def change_dir(self, dir: str):
|
| 520 |
-
self.user_directory = Path(dir)
|
| 521 |
-
|
| 522 |
-
def generate(self, name: str):
|
| 523 |
-
timestamp = datetime.now(tz).strftime("%Y-%m-%d_%H-%M-%S")
|
| 524 |
-
generated_directory = self.user_directory / name / timestamp
|
| 525 |
-
generated_directory.mkdir(parents=True, exist_ok=True)
|
| 526 |
-
return generated_directory
|
| 527 |
-
|
| 528 |
-
def generate_from_dir(self, dir: str):
|
| 529 |
-
timestamp = datetime.now(tz).strftime("%Y-%m-%d_%H-%M-%S")
|
| 530 |
-
generated_directory = Path(dir) / timestamp
|
| 531 |
-
generated_directory.mkdir(parents=True, exist_ok=True)
|
| 532 |
-
return generated_directory
|
| 533 |
-
|
| 534 |
-
class InputFilesDatabase(UserDirectory):
|
| 535 |
-
def __init__(self):
|
| 536 |
-
super().__init__()
|
| 537 |
-
self.input_dir_base = self.user_directory / "input"
|
| 538 |
-
self.input_dir_base.mkdir(parents=True, exist_ok=True)
|
| 539 |
-
self.input_base_json = self.input_dir_base / "inputs.json"
|
| 540 |
-
self.input_base = []
|
| 541 |
-
self.load()
|
| 542 |
-
|
| 543 |
-
def _write_decorator(func):
|
| 544 |
-
def wrapper(self, *args, **kwargs):
|
| 545 |
-
results_ = func(self, *args, **kwargs)
|
| 546 |
-
self.write()
|
| 547 |
-
return results_
|
| 548 |
-
return wrapper
|
| 549 |
-
|
| 550 |
-
def _load_decorator(func):
|
| 551 |
-
def wrapper(self, *args, **kwargs):
|
| 552 |
-
self.load()
|
| 553 |
-
results_ = func(self, *args, **kwargs)
|
| 554 |
-
return results_
|
| 555 |
-
return wrapper
|
| 556 |
-
|
| 557 |
-
def write(self):
|
| 558 |
-
self.input_base_json.write_text(json.dumps(self.input_base, ensure_ascii=False, indent=4), encoding="utf-8")
|
| 559 |
-
|
| 560 |
-
def load(self):
|
| 561 |
-
if self.input_base_json.exists():
|
| 562 |
-
self.input_base = json.loads(self.input_base_json.read_text("utf-8"))
|
| 563 |
-
print(_i18n("input_base_loaded"))
|
| 564 |
-
|
| 565 |
-
@_write_decorator
|
| 566 |
-
def upload(self, files, copy=False):
|
| 567 |
-
input_dir = self.generate_from_dir(self.input_dir_base)
|
| 568 |
-
uploaded_input_files = []
|
| 569 |
-
valid_files = get_audio_files_from_list(files, only_files=True)
|
| 570 |
-
for file in valid_files:
|
| 571 |
-
new_file = Namer.iter(input_dir / Path(file).name)
|
| 572 |
-
if copy:
|
| 573 |
-
shutil.copy2(file, new_file)
|
| 574 |
-
else:
|
| 575 |
-
shutil.move(file, new_file)
|
| 576 |
-
uploaded_input_files.append(new_file)
|
| 577 |
-
self.input_base.extend(uploaded_input_files)
|
| 578 |
-
return uploaded_input_files
|
| 579 |
-
|
| 580 |
-
@_write_decorator
|
| 581 |
-
def clear(self):
|
| 582 |
-
for path in self.input_base:
|
| 583 |
-
Path(path).unlink(missing_ok=True)
|
| 584 |
-
self.input_base.clear()
|
| 585 |
-
print(_i18n("input_base_cleared"))
|
| 586 |
-
|
| 587 |
-
def get_input_list(self):
|
| 588 |
-
return list(reversed(self.input_base))
|
| 589 |
-
|
| 590 |
-
class OutputDir(UserDirectory):
|
| 591 |
-
def __init__(self, dir: str = "output_mvsepless"):
|
| 592 |
-
super().__init__()
|
| 593 |
-
self.output_dir_name = dir
|
| 594 |
-
|
| 595 |
-
def gen_output_dir(self):
|
| 596 |
-
return self.generate(self.output_dir_name)
|
|
|
|
| 20 |
import platform
|
| 21 |
import numpy as np
|
| 22 |
import yt_dlp
|
| 23 |
+
import subprocess
|
| 24 |
|
| 25 |
try:
|
| 26 |
import spaces
|
|
|
|
| 47 |
import torch
|
| 48 |
tz = timezone(timedelta(hours=3))
|
| 49 |
|
| 50 |
+
def get_gdrive_dir():
|
| 51 |
+
try:
|
| 52 |
+
result = subprocess.run(['/bin/mount'], capture_output=True, text=True)
|
| 53 |
+
for line in result.stdout.strip().split('\n'):
|
| 54 |
+
if 'type fuse.drive' in line:
|
| 55 |
+
parts = line.split(' type ')
|
| 56 |
+
if len(parts) >= 2:
|
| 57 |
+
source_mount = parts[0]
|
| 58 |
+
source, mount_point = source_mount.split(' on ')
|
| 59 |
+
return mount_point
|
| 60 |
+
except:
|
| 61 |
+
pass
|
| 62 |
+
return None
|
| 63 |
|
| 64 |
+
def easy_check_is_colab() -> bool:
|
| 65 |
+
"""
|
| 66 |
+
Проверить, выполняется ли код в Google Colab
|
| 67 |
+
|
| 68 |
+
Returns:
|
| 69 |
+
True если в Colab
|
| 70 |
+
"""
|
| 71 |
+
if platform.machine() == "x86_64" and "Linux" in platform.platform():
|
| 72 |
+
try:
|
| 73 |
+
import google.colab
|
| 74 |
+
module_path: str = google.colab.__file__
|
| 75 |
+
if module_path.startswith("/usr/local/lib/python") and module_path.endswith("/dist-packages/google/colab/__init__.py"):
|
| 76 |
+
return True
|
| 77 |
+
else:
|
| 78 |
+
return False
|
| 79 |
+
except ImportError:
|
| 80 |
+
return False
|
| 81 |
+
else:
|
| 82 |
+
return False
|
| 83 |
|
| 84 |
class DownloadError(Exception): pass
|
| 85 |
|
|
|
|
| 112 |
}
|
| 113 |
}
|
| 114 |
|
|
|
|
|
|
|
|
|
|
| 115 |
def size_readable(size_bytes: int):
|
| 116 |
if size_bytes == 0:
|
| 117 |
return f"0 {_i18n('bytes')}"
|
|
|
|
| 124 |
i += 1
|
| 125 |
return f"{size_bytes:.2f} {units[i]}"
|
| 126 |
|
| 127 |
+
def get_size_folder(folder: str | Path):
|
| 128 |
+
folder_path = Path(folder)
|
| 129 |
+
return sum([file.stat().st_size for file in folder_path.rglob('*') if file.is_file()])
|
| 130 |
+
|
| 131 |
+
def get_disk_usage(path="/content/drive/MyDrive", user_dir="", user_gdrive_dir="", list_subdirs=[]):
|
| 132 |
+
try:
|
| 133 |
+
usage = shutil.disk_usage(path)
|
| 134 |
+
|
| 135 |
+
total_gb = size_readable(usage.total)
|
| 136 |
+
used_gb = size_readable(usage.used)
|
| 137 |
+
free_gb = size_readable(usage.free)
|
| 138 |
+
return f"""{_i18n("all_space")}: {total_gb}
|
| 139 |
+
{_i18n("used_space")}: {used_gb}
|
| 140 |
+
{_i18n("free_space")}: {free_gb}"""
|
| 141 |
+
except Exception as e:
|
| 142 |
+
return ""
|
| 143 |
+
|
| 144 |
def define_audio_with_size(basename: bool = False, **kwargs):
|
| 145 |
path = kwargs.get("value", None)
|
| 146 |
if not path:
|
|
|
|
| 556 |
if hasattr(torch._C, "_jit_pass_onnx_clear_scope_records"):
|
| 557 |
try:
|
| 558 |
torch._C._jit_pass_onnx_clear_scope_records()
|
| 559 |
+
except Exception: pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
i18n.py
CHANGED
|
@@ -551,7 +551,22 @@ TRANSLATIONS: Dict[Language, Dict[str, str]] = {
|
|
| 551 |
"no": "Нет",
|
| 552 |
"zerogpu=true": "Среда выполнения - ZeroGPU",
|
| 553 |
"ensemble_processing": "Создание ансамбля",
|
| 554 |
-
"tracks": "треков"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 555 |
},
|
| 556 |
"en": {
|
| 557 |
"bytes": "B",
|
|
@@ -1087,7 +1102,20 @@ Example: `NAME_F0METHOD_PITCH` → `Song_custom_0`
|
|
| 1087 |
"no": "No",
|
| 1088 |
"zerogpu=true": "Runtime is ZeroGPU",
|
| 1089 |
"ensemble_processing": "Creating ensemble",
|
| 1090 |
-
"tracks": "tracks"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1091 |
}
|
| 1092 |
}
|
| 1093 |
|
|
|
|
| 551 |
"no": "Нет",
|
| 552 |
"zerogpu=true": "Среда выполнения - ZeroGPU",
|
| 553 |
"ensemble_processing": "Создание ансамбля",
|
| 554 |
+
"tracks": "треков",
|
| 555 |
+
"app_user_dir_help": "Путь к директории для хранения пользовательских файлов",
|
| 556 |
+
"gdrive_mount_found": "Обнаружен привязанный Google Диск",
|
| 557 |
+
"copy_to_gdrive": "Копирование данных на Google Диск",
|
| 558 |
+
"dirs": "директорий",
|
| 559 |
+
"copy_to_gdrive_done": "Копирование завершено",
|
| 560 |
+
"copied_dirs": "Скопировано директорий",
|
| 561 |
+
"copy_from_current_user_dir_to_gdrive": "Копировать все пользовательские данные на Google Диск",
|
| 562 |
+
"google_drive": "Google Диск",
|
| 563 |
+
"copy_from_gdrive_to_current_user_dir": "Копировать все пользовательские данные с Google Диска в среду выполнения",
|
| 564 |
+
"copy_to_current_user_dir": "Копирование данных в среду выполнения",
|
| 565 |
+
"free_space": "Свободно",
|
| 566 |
+
"used_space": "Использовано",
|
| 567 |
+
"all_space": "Всего",
|
| 568 |
+
"used_space_data_local": "Обьем пользовательских данных в среде выполнения",
|
| 569 |
+
"used_space_data_gdrive": "Обьем пользовательских данных на Google Диске"
|
| 570 |
},
|
| 571 |
"en": {
|
| 572 |
"bytes": "B",
|
|
|
|
| 1102 |
"no": "No",
|
| 1103 |
"zerogpu=true": "Runtime is ZeroGPU",
|
| 1104 |
"ensemble_processing": "Creating ensemble",
|
| 1105 |
+
"tracks": "tracks",
|
| 1106 |
+
"app_user_dir_help": "Path to directories for storing user files",
|
| 1107 |
+
"gdrive_mount_found": "Detected mounted Google Drive",
|
| 1108 |
+
"copy_to_gdrive": "Copying data to Google Drive",
|
| 1109 |
+
"dirs": "directories",
|
| 1110 |
+
"copy_to_gdrive_done": "Copy complete",
|
| 1111 |
+
"copied_dirs": "Directories copied",
|
| 1112 |
+
"copy_from_current_user_dir_to_gdrive": "Copy all user data to Google Drive",
|
| 1113 |
+
"google_drive": "Google Drive",
|
| 1114 |
+
"free_space": "Free",
|
| 1115 |
+
"used_space": "Used",
|
| 1116 |
+
"all_space": "All",
|
| 1117 |
+
"used_space_data_local": "User data space in runtime",
|
| 1118 |
+
"used_space_data_gdrive": "User data space on Google Drive"
|
| 1119 |
}
|
| 1120 |
}
|
| 1121 |
|
inference.py
CHANGED
|
@@ -273,24 +273,30 @@ class MSSI: # Music Source Separation Inference
|
|
| 273 |
def set_add_params(self, **kwargs):
|
| 274 |
self.add_params = kwargs
|
| 275 |
|
| 276 |
-
def load_config(self, model_type: str, conf: str):
|
| 277 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 278 |
if model_type not in self.model_types:
|
| 279 |
raise UnknownModelType(_i18n("unknown_model_type", model_type=model_type))
|
| 280 |
self.model_type = model_type
|
| 281 |
try:
|
| 282 |
if self.model_type == "htdemucs":
|
| 283 |
-
self.config = OmegaConf.load(
|
| 284 |
self.sample_rate = self.config.training.samplerate
|
| 285 |
else:
|
| 286 |
-
with
|
| 287 |
self.config = ConfigDict(yaml.load(f, Loader=yaml.FullLoader))
|
| 288 |
self.sample_rate = self.config.audio.sample_rate
|
| 289 |
self.target_instrument = self.config.training.target_instrument
|
| 290 |
self.instruments = self.config.training.instruments
|
| 291 |
-
print(_i18n("config_loaded")+": "+
|
| 292 |
except FileNotFoundError:
|
| 293 |
self.config = None
|
|
|
|
| 294 |
self.model_type = None
|
| 295 |
self.target_instrument = None
|
| 296 |
self.instruments = []
|
|
@@ -298,6 +304,7 @@ class MSSI: # Music Source Separation Inference
|
|
| 298 |
raise FileNotFoundError(_i18n("config_not_found", path=conf)) from e
|
| 299 |
except Exception as e:
|
| 300 |
self.config = None
|
|
|
|
| 301 |
self.model_type = None
|
| 302 |
self.target_instrument = None
|
| 303 |
self.instruments = []
|
|
@@ -508,13 +515,13 @@ class MSSI: # Music Source Separation Inference
|
|
| 508 |
def load_checkpoint(self, ckpt: str | Path):
|
| 509 |
if not ckpt:
|
| 510 |
raise PathNotSpecified(_i18n("path_not_specified"))
|
| 511 |
-
|
| 512 |
-
if not
|
|
|
|
| 513 |
raise PathNotExist(_i18n("path_not_exist"))
|
| 514 |
if not self.model:
|
|
|
|
| 515 |
raise ModelNotLoaded(_i18n("model_not_loaded"))
|
| 516 |
-
|
| 517 |
-
self.ckpt_path = ckpt_
|
| 518 |
|
| 519 |
if self.model_type == "mdxnet":
|
| 520 |
try:
|
|
@@ -1312,7 +1319,7 @@ class MSSI: # Music Source Separation Inference
|
|
| 1312 |
result = self.output_arrays[primary_stem]
|
| 1313 |
return result, self.sample_rate
|
| 1314 |
|
| 1315 |
-
def load_model(self, model_type: str, ckpt: str, conf: str):
|
| 1316 |
self.clear_model()
|
| 1317 |
self.load_config(model_type=model_type, conf=conf)
|
| 1318 |
self.load_model_instance()
|
|
|
|
| 273 |
def set_add_params(self, **kwargs):
|
| 274 |
self.add_params = kwargs
|
| 275 |
|
| 276 |
+
def load_config(self, model_type: str, conf: str | Path):
|
| 277 |
+
if not conf:
|
| 278 |
+
raise PathNotSpecified(_i18n("path_not_specified"))
|
| 279 |
+
self.conf_path = Path(conf)
|
| 280 |
+
if not self.conf_path.exists():
|
| 281 |
+
self.conf_path = None
|
| 282 |
+
raise PathNotExist(_i18n("path_not_exist"))
|
| 283 |
if model_type not in self.model_types:
|
| 284 |
raise UnknownModelType(_i18n("unknown_model_type", model_type=model_type))
|
| 285 |
self.model_type = model_type
|
| 286 |
try:
|
| 287 |
if self.model_type == "htdemucs":
|
| 288 |
+
self.config = OmegaConf.load(self.conf_path)
|
| 289 |
self.sample_rate = self.config.training.samplerate
|
| 290 |
else:
|
| 291 |
+
with self.conf_path.open("r", encoding="utf-8") as f:
|
| 292 |
self.config = ConfigDict(yaml.load(f, Loader=yaml.FullLoader))
|
| 293 |
self.sample_rate = self.config.audio.sample_rate
|
| 294 |
self.target_instrument = self.config.training.target_instrument
|
| 295 |
self.instruments = self.config.training.instruments
|
| 296 |
+
print(_i18n("config_loaded")+": "+self.conf_path.name)
|
| 297 |
except FileNotFoundError:
|
| 298 |
self.config = None
|
| 299 |
+
self.conf_path = None
|
| 300 |
self.model_type = None
|
| 301 |
self.target_instrument = None
|
| 302 |
self.instruments = []
|
|
|
|
| 304 |
raise FileNotFoundError(_i18n("config_not_found", path=conf)) from e
|
| 305 |
except Exception as e:
|
| 306 |
self.config = None
|
| 307 |
+
self.conf_path = None
|
| 308 |
self.model_type = None
|
| 309 |
self.target_instrument = None
|
| 310 |
self.instruments = []
|
|
|
|
| 515 |
def load_checkpoint(self, ckpt: str | Path):
|
| 516 |
if not ckpt:
|
| 517 |
raise PathNotSpecified(_i18n("path_not_specified"))
|
| 518 |
+
self.ckpt_path = Path(ckpt)
|
| 519 |
+
if not self.ckpt_path.exists():
|
| 520 |
+
self.ckpt_path = None
|
| 521 |
raise PathNotExist(_i18n("path_not_exist"))
|
| 522 |
if not self.model:
|
| 523 |
+
self.ckpt_path = None
|
| 524 |
raise ModelNotLoaded(_i18n("model_not_loaded"))
|
|
|
|
|
|
|
| 525 |
|
| 526 |
if self.model_type == "mdxnet":
|
| 527 |
try:
|
|
|
|
| 1319 |
result = self.output_arrays[primary_stem]
|
| 1320 |
return result, self.sample_rate
|
| 1321 |
|
| 1322 |
+
def load_model(self, model_type: str, ckpt: str | Path, conf: str | Path):
|
| 1323 |
self.clear_model()
|
| 1324 |
self.load_config(model_type=model_type, conf=conf)
|
| 1325 |
self.load_model_instance()
|
vbach_lib/infer.py
CHANGED
|
@@ -1,398 +1,399 @@
|
|
| 1 |
-
from pathlib import Path
|
| 2 |
-
import sys
|
| 3 |
-
SCRIPT_DIR = Path(__file__).resolve().parent
|
| 4 |
-
sys.path.append(str(SCRIPT_DIR.parent))
|
| 5 |
-
from extra_utils import hf_spaces_gpu, extra_clear_torch_cache, nuclear_clear_model, emergency_ram_clear
|
| 6 |
-
if __package__:
|
| 7 |
-
from .hubert_manager import get_hubert, download_hubert, huberts_fairseq
|
| 8 |
-
from .pipeline import VC
|
| 9 |
-
from .config import Config
|
| 10 |
-
from .fairseq import load_model
|
| 11 |
-
from .algorithm.synthesizers import Synthesizer
|
| 12 |
-
else:
|
| 13 |
-
from vbach_lib.hubert_manager import get_hubert, download_hubert, huberts_fairseq
|
| 14 |
-
from vbach_lib.pipeline import VC
|
| 15 |
-
from vbach_lib.config import Config
|
| 16 |
-
from vbach_lib.fairseq import load_model
|
| 17 |
-
from vbach_lib.algorithm.synthesizers import Synthesizer
|
| 18 |
-
from transformers import HubertModel
|
| 19 |
-
from pathlib import Path
|
| 20 |
-
import traceback
|
| 21 |
-
from audio import read, write, split_channels, split_mid_side, multi_channel_array_from_arrays, output_formats, stereo_to_mono, reshape, mix_arrays, get_audio_files_from_list, check
|
| 22 |
-
from inference import PathsNotSpecified, PathNotExist, PathNotSpecified, FileIsNotAudio
|
| 23 |
-
from i18n import _i18n
|
| 24 |
-
from namer import Namer
|
| 25 |
-
from args_parser import parse_vbach_args
|
| 26 |
-
import numpy as np
|
| 27 |
-
import torch
|
| 28 |
-
from torch import nn
|
| 29 |
-
import gc
|
| 30 |
-
|
| 31 |
-
class VbachModelNotFound(Exception): pass
|
| 32 |
-
|
| 33 |
-
stereo_modes = ("mono", "left/right", "sim/dif")
|
| 34 |
-
|
| 35 |
-
class HubertModelWithFinalProj(HubertModel):
|
| 36 |
-
"""Hubert модель с финальной проекцией"""
|
| 37 |
-
|
| 38 |
-
def __init__(self, config):
|
| 39 |
-
super().__init__(config)
|
| 40 |
-
self.final_proj = nn.Linear(config.hidden_size, config.classifier_proj_size)
|
| 41 |
-
|
| 42 |
-
def load_audio(path: str | Path, sr: int, stereo_mode: str = stereo_modes[0]):
|
| 43 |
-
mixtures = []
|
| 44 |
-
add_text = []
|
| 45 |
-
if stereo_mode == "mono":
|
| 46 |
-
mix, _sr = read(path, sr, mono=True, flatten=True)
|
| 47 |
-
mixtures.append(mix)
|
| 48 |
-
add_text.append(None)
|
| 49 |
-
elif stereo_mode == "left/right":
|
| 50 |
-
mix, _sr = read(path, sr, mono=False)
|
| 51 |
-
mixtures.extend(split_channels(mix))
|
| 52 |
-
add_text.extend(["[L]", "[R]"])
|
| 53 |
-
elif stereo_mode == "sim/dif":
|
| 54 |
-
mix, _sr = read(path, sr, mono=False)
|
| 55 |
-
center, stereo_base = split_mid_side(mix, var=3, sr=sr)
|
| 56 |
-
phantom_center = stereo_to_mono(center, to_flatten=True)
|
| 57 |
-
stereo_base_left, stereo_base_right = split_channels(stereo_base)
|
| 58 |
-
mixtures.extend([phantom_center, stereo_base_left, stereo_base_right])
|
| 59 |
-
add_text.extend(["[Sim]", "[Dif-L]", "[Dif-R]"])
|
| 60 |
-
return mixtures, add_text
|
| 61 |
-
|
| 62 |
-
def post_process_audio(mixtures: list[np.ndarray], target_sr: int, stereo_mode: str = stereo_modes[0]):
|
| 63 |
-
if stereo_mode == "mono":
|
| 64 |
-
return reshape(mixtures[0], ("channels", "samples"))
|
| 65 |
-
elif stereo_mode == "left/right":
|
| 66 |
-
dtype = mixtures[0].dtype
|
| 67 |
-
return multi_channel_array_from_arrays(*mixtures, index=1, dtype=dtype)
|
| 68 |
-
elif stereo_mode == "sim/dif":
|
| 69 |
-
sim, dif_l, dif_r = mixtures
|
| 70 |
-
dtype = sim.dtype
|
| 71 |
-
sim_channel = multi_channel_array_from_arrays(sim, sim, index=1, dtype=dtype)
|
| 72 |
-
dif_channel = multi_channel_array_from_arrays(dif_l, dif_r, index=1, dtype=dtype)
|
| 73 |
-
return mix_arrays([sim_channel, dif_channel], [target_sr, target_sr], target_sr, index=1, dtype=dtype)[0]
|
| 74 |
-
|
| 75 |
-
class VbachConverter:
|
| 76 |
-
def __init__(self):
|
| 77 |
-
self.config = Config()
|
| 78 |
-
self.hubert_model = None
|
| 79 |
-
self.cpt = self.version = self.net_g = self.tgt_sr = self.vc = self.use_f0 = self.vocoder = self.emb_weight_shape = self.required_keys = self.missing_keys = self.text_enc_hidden_dim = None
|
| 80 |
-
def load_hubert(self, name: str, use_transformers: bool):
|
| 81 |
-
if use_transformers:
|
| 82 |
-
model_path = get_hubert(name, True)
|
| 83 |
-
self.hubert_model = HubertModelWithFinalProj.from_pretrained(model_path)
|
| 84 |
-
self.hubert_model = self.hubert_model.to(self.config.device)
|
| 85 |
-
else:
|
| 86 |
-
model_path = get_hubert(name, False)
|
| 87 |
-
self.hubert_model = load_model(model_path)
|
| 88 |
-
self.hubert_model = self.hubert_model.to(self.config.device)
|
| 89 |
-
self.hubert_model = self.hubert_model.half() if self.config.is_half else self.hubert_model.float()
|
| 90 |
-
self.hubert_model.eval()
|
| 91 |
-
print(_i18n("hubert_checkpoint_loaded")+": "+name)
|
| 92 |
-
|
| 93 |
-
def unload_hubert(self):
|
| 94 |
-
self.hubert_model = self.hubert_model.cpu()
|
| 95 |
-
self.hubert_model = None
|
| 96 |
-
gc.collect()
|
| 97 |
-
extra_clear_torch_cache()
|
| 98 |
-
nuclear_clear_model()
|
| 99 |
-
emergency_ram_clear()
|
| 100 |
-
|
| 101 |
-
def unload_model(self):
|
| 102 |
-
self.net_g = self.net_g.cpu()
|
| 103 |
-
del self.cpt, self.version, self.net_g, self.tgt_sr, self.vc, self.use_f0, self.vocoder, self.emb_weight_shape, self.required_keys, self.missing_keys, self.text_enc_hidden_dim
|
| 104 |
-
self.cpt = self.version = self.net_g = self.tgt_sr = self.vc = self.use_f0 = self.vocoder = self.emb_weight_shape = self.required_keys = self.missing_keys = self.text_enc_hidden_dim = None
|
| 105 |
-
extra_clear_torch_cache()
|
| 106 |
-
nuclear_clear_model()
|
| 107 |
-
emergency_ram_clear()
|
| 108 |
-
|
| 109 |
-
def clear_gpu_cache(self):
|
| 110 |
-
gc.collect()
|
| 111 |
-
torch.clear_autocast_cache()
|
| 112 |
-
if self.config.device.type == "mps":
|
| 113 |
-
torch.mps.empty_cache()
|
| 114 |
-
if self.config.device.type == "cuda":
|
| 115 |
-
torch.cuda.synchronize()
|
| 116 |
-
torch.cuda.ipc_collect()
|
| 117 |
-
torch.cuda.empty_cache()
|
| 118 |
-
|
| 119 |
-
def get_vc(self, model_path: str | Path, use_transformers: bool):
|
| 120 |
-
self.cpt = torch.load(model_path, map_location="cpu", weights_only=True)
|
| 121 |
-
self.required_keys = ["config", "weight"]
|
| 122 |
-
self.missing_keys = [key for key in self.required_keys if key not in self.cpt]
|
| 123 |
-
|
| 124 |
-
self.tgt_sr = self.cpt["config"][-1]
|
| 125 |
-
|
| 126 |
-
self.emb_weight_shape = self.cpt["weight"]["emb_g.weight"].shape
|
| 127 |
-
self.cpt["config"][-3] = self.emb_weight_shape[0]
|
| 128 |
-
|
| 129 |
-
self.use_f0 = self.cpt.get("f0", 1)
|
| 130 |
-
self.version = self.cpt.get("version", "v1")
|
| 131 |
-
self.vocoder = self.cpt.get("vocoder", "HiFi-GAN")
|
| 132 |
-
|
| 133 |
-
self.text_enc_hidden_dim = 768 if self.version == "v2" else 256
|
| 134 |
-
|
| 135 |
-
self.net_g = Synthesizer(
|
| 136 |
-
*self.cpt["config"],
|
| 137 |
-
use_f0=self.use_f0,
|
| 138 |
-
text_enc_hidden_dim=self.text_enc_hidden_dim,
|
| 139 |
-
vocoder=self.vocoder,
|
| 140 |
-
)
|
| 141 |
-
|
| 142 |
-
if hasattr(self.net_g, "enc_q"):
|
| 143 |
-
del self.net_g.enc_q
|
| 144 |
-
else:
|
| 145 |
-
pass
|
| 146 |
-
|
| 147 |
-
self.net_g.load_state_dict(
|
| 148 |
-
self.cpt["weight"], strict=False
|
| 149 |
-
)
|
| 150 |
-
self.net_g.eval()
|
| 151 |
-
|
| 152 |
-
self.net_g = self.net_g.to(self.config.device)
|
| 153 |
-
if self.config.is_half:
|
| 154 |
-
self.net_g = self.net_g.half()
|
| 155 |
-
else:
|
| 156 |
-
self.net_g = self.net_g.float()
|
| 157 |
-
|
| 158 |
-
self.vc = VC(self.tgt_sr, self.config, use_transformers)
|
| 159 |
-
print(_i18n("checkpoint_loaded")+": "+Path(model_path).name)
|
| 160 |
-
|
| 161 |
-
@hf_spaces_gpu
|
| 162 |
-
def convert_audio(
|
| 163 |
-
self,
|
| 164 |
-
audio_input: str | Path | list[str | Path],
|
| 165 |
-
output_dir: str | Path,
|
| 166 |
-
model_path: str,
|
| 167 |
-
index_path: str,
|
| 168 |
-
pitch: int = 0,
|
| 169 |
-
f0_method: str = "rmvpe+",
|
| 170 |
-
index_rate: float = 0.75,
|
| 171 |
-
volume_envelope: float = 0.25,
|
| 172 |
-
protect: float = 0.33,
|
| 173 |
-
hop_length: int = 128,
|
| 174 |
-
embedder_model: str = "hubert_base",
|
| 175 |
-
use_transformers: bool = False,
|
| 176 |
-
output_format: str = output_formats[0],
|
| 177 |
-
stereo_mode: str = stereo_modes[0],
|
| 178 |
-
f0_min: int = 50,
|
| 179 |
-
f0_max: int = 1100,
|
| 180 |
-
chunk_duration: int = 7,
|
| 181 |
-
template: str = "NAME_F0METHOD_PITCH",
|
| 182 |
-
**kwargs,
|
| 183 |
-
):
|
| 184 |
-
template = Namer.sanitize(template)
|
| 185 |
-
template = Namer.dedup_template(template, keys=["NAME", "F0METHOD", "PITCH"])
|
| 186 |
-
template = Namer.short(template, length=40)
|
| 187 |
-
|
| 188 |
-
if not model_path:
|
| 189 |
-
raise VbachModelNotFound()
|
| 190 |
-
|
| 191 |
-
self.get_vc(model_path, use_transformers)
|
| 192 |
-
|
| 193 |
-
if not self.hubert_model:
|
| 194 |
-
self.load_hubert(embedder_model, use_transformers)
|
| 195 |
-
|
| 196 |
-
if not output_dir:
|
| 197 |
-
output_dir = ""
|
| 198 |
-
|
| 199 |
-
output_dir = Path(output_dir)
|
| 200 |
-
|
| 201 |
-
input_valid_files = get_audio_files_from_list(audio_input, only_files=False)
|
| 202 |
-
if not input_valid_files:
|
| 203 |
-
raise PathsNotSpecified(_i18n("paths_not_specified"))
|
| 204 |
-
|
| 205 |
-
total = len(input_valid_files)
|
| 206 |
-
|
| 207 |
-
print(_i18n("f0_method")+": "+f0_method)
|
| 208 |
-
|
| 209 |
-
processed_audios = []
|
| 210 |
-
|
| 211 |
-
for i, audio_input_path in enumerate(input_valid_files, start=1):
|
| 212 |
-
try:
|
| 213 |
-
input_file_name = Path(audio_input_path).stem
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
self.
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
template = Namer.
|
| 282 |
-
template = Namer.
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
|
| 346 |
-
|
| 347 |
-
self.
|
| 348 |
-
|
| 349 |
-
|
| 350 |
-
|
| 351 |
-
|
| 352 |
-
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
|
| 358 |
-
|
| 359 |
-
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
|
| 380 |
-
|
| 381 |
-
|
| 382 |
-
|
| 383 |
-
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
|
| 388 |
-
|
| 389 |
-
|
| 390 |
-
|
| 391 |
-
|
| 392 |
-
|
| 393 |
-
|
| 394 |
-
|
| 395 |
-
|
| 396 |
-
|
| 397 |
-
|
|
|
|
| 398 |
download_hubert(args.embedder, args.use_transformers)
|
|
|
|
| 1 |
+
from pathlib import Path
|
| 2 |
+
import sys
|
| 3 |
+
SCRIPT_DIR = Path(__file__).resolve().parent
|
| 4 |
+
sys.path.append(str(SCRIPT_DIR.parent))
|
| 5 |
+
from extra_utils import hf_spaces_gpu, extra_clear_torch_cache, nuclear_clear_model, emergency_ram_clear
|
| 6 |
+
if __package__:
|
| 7 |
+
from .hubert_manager import get_hubert, download_hubert, huberts_fairseq
|
| 8 |
+
from .pipeline import VC
|
| 9 |
+
from .config import Config
|
| 10 |
+
from .fairseq import load_model
|
| 11 |
+
from .algorithm.synthesizers import Synthesizer
|
| 12 |
+
else:
|
| 13 |
+
from vbach_lib.hubert_manager import get_hubert, download_hubert, huberts_fairseq
|
| 14 |
+
from vbach_lib.pipeline import VC
|
| 15 |
+
from vbach_lib.config import Config
|
| 16 |
+
from vbach_lib.fairseq import load_model
|
| 17 |
+
from vbach_lib.algorithm.synthesizers import Synthesizer
|
| 18 |
+
from transformers import HubertModel
|
| 19 |
+
from pathlib import Path
|
| 20 |
+
import traceback
|
| 21 |
+
from audio import read, write, split_channels, split_mid_side, multi_channel_array_from_arrays, output_formats, stereo_to_mono, reshape, mix_arrays, get_audio_files_from_list, check
|
| 22 |
+
from inference import PathsNotSpecified, PathNotExist, PathNotSpecified, FileIsNotAudio
|
| 23 |
+
from i18n import _i18n
|
| 24 |
+
from namer import Namer
|
| 25 |
+
from args_parser import parse_vbach_args
|
| 26 |
+
import numpy as np
|
| 27 |
+
import torch
|
| 28 |
+
from torch import nn
|
| 29 |
+
import gc
|
| 30 |
+
|
| 31 |
+
class VbachModelNotFound(Exception): pass
|
| 32 |
+
|
| 33 |
+
stereo_modes = ("mono", "left/right", "sim/dif")
|
| 34 |
+
|
| 35 |
+
class HubertModelWithFinalProj(HubertModel):
|
| 36 |
+
"""Hubert модель с финальной проекцией"""
|
| 37 |
+
|
| 38 |
+
def __init__(self, config):
|
| 39 |
+
super().__init__(config)
|
| 40 |
+
self.final_proj = nn.Linear(config.hidden_size, config.classifier_proj_size)
|
| 41 |
+
|
| 42 |
+
def load_audio(path: str | Path, sr: int, stereo_mode: str = stereo_modes[0]):
|
| 43 |
+
mixtures = []
|
| 44 |
+
add_text = []
|
| 45 |
+
if stereo_mode == "mono":
|
| 46 |
+
mix, _sr = read(path, sr, mono=True, flatten=True)
|
| 47 |
+
mixtures.append(mix)
|
| 48 |
+
add_text.append(None)
|
| 49 |
+
elif stereo_mode == "left/right":
|
| 50 |
+
mix, _sr = read(path, sr, mono=False)
|
| 51 |
+
mixtures.extend(split_channels(mix))
|
| 52 |
+
add_text.extend(["[L]", "[R]"])
|
| 53 |
+
elif stereo_mode == "sim/dif":
|
| 54 |
+
mix, _sr = read(path, sr, mono=False)
|
| 55 |
+
center, stereo_base = split_mid_side(mix, var=3, sr=sr)
|
| 56 |
+
phantom_center = stereo_to_mono(center, to_flatten=True)
|
| 57 |
+
stereo_base_left, stereo_base_right = split_channels(stereo_base)
|
| 58 |
+
mixtures.extend([phantom_center, stereo_base_left, stereo_base_right])
|
| 59 |
+
add_text.extend(["[Sim]", "[Dif-L]", "[Dif-R]"])
|
| 60 |
+
return mixtures, add_text
|
| 61 |
+
|
| 62 |
+
def post_process_audio(mixtures: list[np.ndarray], target_sr: int, stereo_mode: str = stereo_modes[0]):
|
| 63 |
+
if stereo_mode == "mono":
|
| 64 |
+
return reshape(mixtures[0], ("channels", "samples"))
|
| 65 |
+
elif stereo_mode == "left/right":
|
| 66 |
+
dtype = mixtures[0].dtype
|
| 67 |
+
return multi_channel_array_from_arrays(*mixtures, index=1, dtype=dtype)
|
| 68 |
+
elif stereo_mode == "sim/dif":
|
| 69 |
+
sim, dif_l, dif_r = mixtures
|
| 70 |
+
dtype = sim.dtype
|
| 71 |
+
sim_channel = multi_channel_array_from_arrays(sim, sim, index=1, dtype=dtype)
|
| 72 |
+
dif_channel = multi_channel_array_from_arrays(dif_l, dif_r, index=1, dtype=dtype)
|
| 73 |
+
return mix_arrays([sim_channel, dif_channel], [target_sr, target_sr], target_sr, index=1, dtype=dtype)[0]
|
| 74 |
+
|
| 75 |
+
class VbachConverter:
|
| 76 |
+
def __init__(self):
|
| 77 |
+
self.config = Config()
|
| 78 |
+
self.hubert_model = None
|
| 79 |
+
self.cpt = self.version = self.net_g = self.tgt_sr = self.vc = self.use_f0 = self.vocoder = self.emb_weight_shape = self.required_keys = self.missing_keys = self.text_enc_hidden_dim = None
|
| 80 |
+
def load_hubert(self, name: str, use_transformers: bool):
|
| 81 |
+
if use_transformers:
|
| 82 |
+
model_path = get_hubert(name, True)
|
| 83 |
+
self.hubert_model = HubertModelWithFinalProj.from_pretrained(model_path)
|
| 84 |
+
self.hubert_model = self.hubert_model.to(self.config.device)
|
| 85 |
+
else:
|
| 86 |
+
model_path = get_hubert(name, False)
|
| 87 |
+
self.hubert_model = load_model(model_path)
|
| 88 |
+
self.hubert_model = self.hubert_model.to(self.config.device)
|
| 89 |
+
self.hubert_model = self.hubert_model.half() if self.config.is_half else self.hubert_model.float()
|
| 90 |
+
self.hubert_model.eval()
|
| 91 |
+
print(_i18n("hubert_checkpoint_loaded")+": "+name)
|
| 92 |
+
|
| 93 |
+
def unload_hubert(self):
|
| 94 |
+
self.hubert_model = self.hubert_model.cpu()
|
| 95 |
+
self.hubert_model = None
|
| 96 |
+
gc.collect()
|
| 97 |
+
extra_clear_torch_cache()
|
| 98 |
+
nuclear_clear_model()
|
| 99 |
+
emergency_ram_clear()
|
| 100 |
+
|
| 101 |
+
def unload_model(self):
|
| 102 |
+
self.net_g = self.net_g.cpu()
|
| 103 |
+
del self.cpt, self.version, self.net_g, self.tgt_sr, self.vc, self.use_f0, self.vocoder, self.emb_weight_shape, self.required_keys, self.missing_keys, self.text_enc_hidden_dim
|
| 104 |
+
self.cpt = self.version = self.net_g = self.tgt_sr = self.vc = self.use_f0 = self.vocoder = self.emb_weight_shape = self.required_keys = self.missing_keys = self.text_enc_hidden_dim = None
|
| 105 |
+
extra_clear_torch_cache()
|
| 106 |
+
nuclear_clear_model()
|
| 107 |
+
emergency_ram_clear()
|
| 108 |
+
|
| 109 |
+
def clear_gpu_cache(self):
|
| 110 |
+
gc.collect()
|
| 111 |
+
torch.clear_autocast_cache()
|
| 112 |
+
if self.config.device.type == "mps":
|
| 113 |
+
torch.mps.empty_cache()
|
| 114 |
+
if self.config.device.type == "cuda":
|
| 115 |
+
torch.cuda.synchronize()
|
| 116 |
+
torch.cuda.ipc_collect()
|
| 117 |
+
torch.cuda.empty_cache()
|
| 118 |
+
|
| 119 |
+
def get_vc(self, model_path: str | Path, use_transformers: bool):
|
| 120 |
+
self.cpt = torch.load(model_path, map_location="cpu", weights_only=True)
|
| 121 |
+
self.required_keys = ["config", "weight"]
|
| 122 |
+
self.missing_keys = [key for key in self.required_keys if key not in self.cpt]
|
| 123 |
+
|
| 124 |
+
self.tgt_sr = self.cpt["config"][-1]
|
| 125 |
+
|
| 126 |
+
self.emb_weight_shape = self.cpt["weight"]["emb_g.weight"].shape
|
| 127 |
+
self.cpt["config"][-3] = self.emb_weight_shape[0]
|
| 128 |
+
|
| 129 |
+
self.use_f0 = self.cpt.get("f0", 1)
|
| 130 |
+
self.version = self.cpt.get("version", "v1")
|
| 131 |
+
self.vocoder = self.cpt.get("vocoder", "HiFi-GAN")
|
| 132 |
+
|
| 133 |
+
self.text_enc_hidden_dim = 768 if self.version == "v2" else 256
|
| 134 |
+
|
| 135 |
+
self.net_g = Synthesizer(
|
| 136 |
+
*self.cpt["config"],
|
| 137 |
+
use_f0=self.use_f0,
|
| 138 |
+
text_enc_hidden_dim=self.text_enc_hidden_dim,
|
| 139 |
+
vocoder=self.vocoder,
|
| 140 |
+
)
|
| 141 |
+
|
| 142 |
+
if hasattr(self.net_g, "enc_q"):
|
| 143 |
+
del self.net_g.enc_q
|
| 144 |
+
else:
|
| 145 |
+
pass
|
| 146 |
+
|
| 147 |
+
self.net_g.load_state_dict(
|
| 148 |
+
self.cpt["weight"], strict=False
|
| 149 |
+
)
|
| 150 |
+
self.net_g.eval()
|
| 151 |
+
|
| 152 |
+
self.net_g = self.net_g.to(self.config.device)
|
| 153 |
+
if self.config.is_half:
|
| 154 |
+
self.net_g = self.net_g.half()
|
| 155 |
+
else:
|
| 156 |
+
self.net_g = self.net_g.float()
|
| 157 |
+
|
| 158 |
+
self.vc = VC(self.tgt_sr, self.config, use_transformers)
|
| 159 |
+
print(_i18n("checkpoint_loaded")+": "+Path(model_path).name)
|
| 160 |
+
|
| 161 |
+
@hf_spaces_gpu # (duration=120) Для спейса LongQuota / длинная квота на HuggingFace ZeroGPU (по умолчанию 60 секунд)
|
| 162 |
+
def convert_audio(
|
| 163 |
+
self,
|
| 164 |
+
audio_input: str | Path | list[str | Path],
|
| 165 |
+
output_dir: str | Path,
|
| 166 |
+
model_path: str,
|
| 167 |
+
index_path: str,
|
| 168 |
+
pitch: int = 0,
|
| 169 |
+
f0_method: str = "rmvpe+",
|
| 170 |
+
index_rate: float = 0.75,
|
| 171 |
+
volume_envelope: float = 0.25,
|
| 172 |
+
protect: float = 0.33,
|
| 173 |
+
hop_length: int = 128,
|
| 174 |
+
embedder_model: str = "hubert_base",
|
| 175 |
+
use_transformers: bool = False,
|
| 176 |
+
output_format: str = output_formats[0],
|
| 177 |
+
stereo_mode: str = stereo_modes[0],
|
| 178 |
+
f0_min: int = 50,
|
| 179 |
+
f0_max: int = 1100,
|
| 180 |
+
chunk_duration: int = 7,
|
| 181 |
+
template: str = "NAME_F0METHOD_PITCH",
|
| 182 |
+
**kwargs,
|
| 183 |
+
):
|
| 184 |
+
template = Namer.sanitize(template)
|
| 185 |
+
template = Namer.dedup_template(template, keys=["NAME", "F0METHOD", "PITCH"])
|
| 186 |
+
template = Namer.short(template, length=40)
|
| 187 |
+
|
| 188 |
+
if not model_path:
|
| 189 |
+
raise VbachModelNotFound()
|
| 190 |
+
|
| 191 |
+
self.get_vc(model_path, use_transformers)
|
| 192 |
+
|
| 193 |
+
if not self.hubert_model:
|
| 194 |
+
self.load_hubert(embedder_model, use_transformers)
|
| 195 |
+
|
| 196 |
+
if not output_dir:
|
| 197 |
+
output_dir = ""
|
| 198 |
+
|
| 199 |
+
output_dir = Path(output_dir)
|
| 200 |
+
|
| 201 |
+
input_valid_files = get_audio_files_from_list(audio_input, only_files=False)
|
| 202 |
+
if not input_valid_files:
|
| 203 |
+
raise PathsNotSpecified(_i18n("paths_not_specified"))
|
| 204 |
+
|
| 205 |
+
total = len(input_valid_files)
|
| 206 |
+
|
| 207 |
+
print(_i18n("f0_method")+": "+f0_method)
|
| 208 |
+
|
| 209 |
+
processed_audios = []
|
| 210 |
+
|
| 211 |
+
for i, audio_input_path in enumerate(input_valid_files, start=1):
|
| 212 |
+
try:
|
| 213 |
+
input_file_name = Path(audio_input_path).stem
|
| 214 |
+
|
| 215 |
+
mixtures, add_text = load_audio(audio_input_path, 16000, stereo_mode)
|
| 216 |
+
print(_i18n("loaded_mix")+": "+Path(audio_input_path).name)
|
| 217 |
+
converted_mixtures = []
|
| 218 |
+
|
| 219 |
+
for mix, add_text_progress in zip(mixtures, add_text):
|
| 220 |
+
audio_max = np.abs(mix).max() / 0.95
|
| 221 |
+
if audio_max > 1:
|
| 222 |
+
mix /= audio_max
|
| 223 |
+
audio_opt = self.vc.pipeline(
|
| 224 |
+
model=self.hubert_model,
|
| 225 |
+
net_g=self.net_g,
|
| 226 |
+
sid=0,
|
| 227 |
+
audio=mix,
|
| 228 |
+
pitch=pitch,
|
| 229 |
+
f0_method=f0_method,
|
| 230 |
+
hop_length=hop_length,
|
| 231 |
+
file_index=index_path,
|
| 232 |
+
index_rate=index_rate,
|
| 233 |
+
pitch_guidance=self.use_f0,
|
| 234 |
+
volume_envelope=volume_envelope,
|
| 235 |
+
version=self.version,
|
| 236 |
+
protect=protect,
|
| 237 |
+
tgt_sr=self.tgt_sr,
|
| 238 |
+
f0_min=f0_min,
|
| 239 |
+
f0_max=f0_max,
|
| 240 |
+
chunk_duration=chunk_duration,
|
| 241 |
+
add_text_channel=add_text_progress,
|
| 242 |
+
add_text_custom=f"{i}/{total} {_i18n('files')}",
|
| 243 |
+
)
|
| 244 |
+
converted_mixtures.append(audio_opt)
|
| 245 |
+
custom_name = Namer.template(
|
| 246 |
+
template,
|
| 247 |
+
PITCH=pitch,
|
| 248 |
+
F0METHOD=f0_method,
|
| 249 |
+
NAME=Namer.short_input_name_template(template, PITCH=pitch, F0METHOD=f0_method, NAME=input_file_name)
|
| 250 |
+
)
|
| 251 |
+
processed_audios.append(write(Namer.iter(output_dir / f"{custom_name}.{output_format}"), post_process_audio(converted_mixtures, self.tgt_sr, stereo_mode), self.tgt_sr))
|
| 252 |
+
except Exception as e:
|
| 253 |
+
traceback.print_exc()
|
| 254 |
+
|
| 255 |
+
self.unload_model()
|
| 256 |
+
self.unload_hubert()
|
| 257 |
+
|
| 258 |
+
return processed_audios
|
| 259 |
+
|
| 260 |
+
@hf_spaces_gpu # (duration=120) Для спейса LongQuota / длинная квота на HuggingFace ZeroGPU (по умолчанию 60 секунд)
|
| 261 |
+
def convert_audio_custom_f0(
|
| 262 |
+
self,
|
| 263 |
+
audio_input: str | Path,
|
| 264 |
+
output_dir: str | Path,
|
| 265 |
+
model_path: str,
|
| 266 |
+
index_path: str,
|
| 267 |
+
pitch: int = 0,
|
| 268 |
+
f0_file: str | Path = None,
|
| 269 |
+
index_rate: float = 0.75,
|
| 270 |
+
volume_envelope: float = 0.25,
|
| 271 |
+
protect: float = 0.33,
|
| 272 |
+
embedder_model: str = "hubert_base",
|
| 273 |
+
use_transformers: bool = False,
|
| 274 |
+
output_format: str = output_formats[0],
|
| 275 |
+
f0_min: int = 50,
|
| 276 |
+
f0_max: int = 1100,
|
| 277 |
+
chunk_duration: int = 7,
|
| 278 |
+
template: str = "NAME_F0METHOD_PITCH",
|
| 279 |
+
**kwargs,
|
| 280 |
+
):
|
| 281 |
+
template = Namer.sanitize(template)
|
| 282 |
+
template = Namer.dedup_template(template, keys=["NAME", "F0METHOD", "PITCH"])
|
| 283 |
+
template = Namer.short(template, length=40)
|
| 284 |
+
|
| 285 |
+
if not model_path:
|
| 286 |
+
raise VbachModelNotFound()
|
| 287 |
+
|
| 288 |
+
self.get_vc(model_path, use_transformers)
|
| 289 |
+
|
| 290 |
+
if not self.hubert_model:
|
| 291 |
+
self.load_hubert(embedder_model, use_transformers)
|
| 292 |
+
|
| 293 |
+
if not output_dir:
|
| 294 |
+
output_dir = ""
|
| 295 |
+
|
| 296 |
+
output_dir = Path(output_dir)
|
| 297 |
+
output_path = None
|
| 298 |
+
|
| 299 |
+
print(_i18n("f0_method")+": "+"custom")
|
| 300 |
+
|
| 301 |
+
try:
|
| 302 |
+
if not audio_input:
|
| 303 |
+
raise PathNotSpecified(_i18n("path_not_specified"))
|
| 304 |
+
audio_input = Path(audio_input)
|
| 305 |
+
if not audio_input.exists():
|
| 306 |
+
raise PathNotExist(_i18n("path_not_exist"))
|
| 307 |
+
if check(audio_input):
|
| 308 |
+
input_file_name = Path(audio_input).stem
|
| 309 |
+
mix, sr = read(audio_input, sr=16000, mono=True, flatten=True)
|
| 310 |
+
print(_i18n("loaded_mix")+": "+Path(audio_input).name)
|
| 311 |
+
else:
|
| 312 |
+
raise FileIsNotAudio(_i18n("file_is_not_audio", path=audio_input))
|
| 313 |
+
|
| 314 |
+
audio_max = np.abs(mix).max() / 0.95
|
| 315 |
+
if audio_max > 1:
|
| 316 |
+
mix /= audio_max
|
| 317 |
+
audio_opt = self.vc.pipeline_custom_f0(
|
| 318 |
+
model=self.hubert_model,
|
| 319 |
+
net_g=self.net_g,
|
| 320 |
+
sid=0,
|
| 321 |
+
audio=mix,
|
| 322 |
+
pitch=pitch,
|
| 323 |
+
f0_file=f0_file,
|
| 324 |
+
file_index=index_path,
|
| 325 |
+
index_rate=index_rate,
|
| 326 |
+
pitch_guidance=self.use_f0,
|
| 327 |
+
volume_envelope=volume_envelope,
|
| 328 |
+
version=self.version,
|
| 329 |
+
protect=protect,
|
| 330 |
+
tgt_sr=self.tgt_sr,
|
| 331 |
+
f0_min=f0_min,
|
| 332 |
+
f0_max=f0_max,
|
| 333 |
+
chunk_duration=chunk_duration,
|
| 334 |
+
add_text_channel="",
|
| 335 |
+
add_text_custom=f"{_i18n('custom_f0')}",
|
| 336 |
+
)
|
| 337 |
+
custom_name = Namer.template(
|
| 338 |
+
template,
|
| 339 |
+
PITCH=pitch,
|
| 340 |
+
F0METHOD="custom",
|
| 341 |
+
NAME=Namer.short_input_name_template(template, PITCH=pitch, F0METHOD="custom", NAME=input_file_name)
|
| 342 |
+
)
|
| 343 |
+
output_path = write(Namer.iter(output_dir / f"{custom_name}.{output_format}"), audio_opt, self.tgt_sr)
|
| 344 |
+
except Exception as e:
|
| 345 |
+
traceback.print_exc()
|
| 346 |
+
|
| 347 |
+
self.unload_model()
|
| 348 |
+
self.unload_hubert()
|
| 349 |
+
|
| 350 |
+
return output_path
|
| 351 |
+
|
| 352 |
+
if __name__ == "__main__":
|
| 353 |
+
vbach = VbachConverter()
|
| 354 |
+
args = parse_vbach_args()
|
| 355 |
+
if args.mode == "infer":
|
| 356 |
+
download_hubert(args.embedder, args.use_transformers)
|
| 357 |
+
vbach.convert_audio(
|
| 358 |
+
audio_input=args.input,
|
| 359 |
+
output_dir=args.output_dir,
|
| 360 |
+
model_path=args.checkpoint_path,
|
| 361 |
+
index_path=args.index_path,
|
| 362 |
+
pitch=args.pitch,
|
| 363 |
+
f0_method=args.f0_method,
|
| 364 |
+
index_rate=args.index_rate,
|
| 365 |
+
volume_envelope=args.volume_envelope,
|
| 366 |
+
protect=args.protect,
|
| 367 |
+
hop_length=args.hop_length,
|
| 368 |
+
embedder_model=args.embedder,
|
| 369 |
+
use_transformers=args.use_transformers,
|
| 370 |
+
output_format=args.output_format,
|
| 371 |
+
stereo_mode=args.stereo_mode,
|
| 372 |
+
f0_min=args.f0_min,
|
| 373 |
+
f0_max=args.f0_max,
|
| 374 |
+
chunk_duration=args.chunk_duration,
|
| 375 |
+
template=args.template
|
| 376 |
+
)
|
| 377 |
+
elif args.mode == "infer_custom_f0":
|
| 378 |
+
download_hubert(args.embedder, args.use_transformers)
|
| 379 |
+
vbach.convert_audio_custom_f0(
|
| 380 |
+
audio_input=args.input,
|
| 381 |
+
output_dir=args.output_dir,
|
| 382 |
+
model_path=args.checkpoint_path,
|
| 383 |
+
index_path=args.index_path,
|
| 384 |
+
pitch=args.pitch,
|
| 385 |
+
f0_file=args.f0_file,
|
| 386 |
+
index_rate=args.index_rate,
|
| 387 |
+
volume_envelope=args.volume_envelope,
|
| 388 |
+
protect=args.protect,
|
| 389 |
+
embedder_model=args.embedder,
|
| 390 |
+
use_transformers=args.use_transformers,
|
| 391 |
+
output_format=args.output_format,
|
| 392 |
+
stereo_mode=args.stereo_mode,
|
| 393 |
+
f0_min=args.f0_min,
|
| 394 |
+
f0_max=args.f0_max,
|
| 395 |
+
chunk_duration=args.chunk_duration,
|
| 396 |
+
template=args.template
|
| 397 |
+
)
|
| 398 |
+
elif args.mode == "download_hubert":
|
| 399 |
download_hubert(args.embedder, args.use_transformers)
|