Add OmniVoice TTS module with config, API, profiles and CLI

- Create modules/omnivoice/ with VoiceAPI, VoiceProfiles, CLI
- Add config manager integration with local model support
- Add app/komAI.py entry point
- Add tests/test_omnivoice.py
- Clone OmniVoice to external/ for development
- Add omnivoice config to global.yaml
This commit is contained in:
2026-04-16 17:51:15 +03:00
parent 22b85455e1
commit 55353654b7
11 changed files with 1064 additions and 3 deletions

245
modules/omnivoice/cli.py Normal file
View File

@@ -0,0 +1,245 @@
import argparse
import sys
from pathlib import Path
def cmd_clone(args):
from .api import api
from .profiles import profiles
audio = api.clone(
text=args.text,
ref_audio=args.ref_audio,
ref_text=args.ref_text,
num_steps=args.steps,
speed=args.speed,
)
path = api.save_audio(audio[0], args.output)
if path:
print(f"Saved: {path}")
else:
print("Playing...")
if args.profile:
profiles.save_from_generated(
name=args.profile,
text=args.text,
ref_audio=args.ref_audio,
ref_text=args.ref_text,
)
print(f"Profile saved: {args.profile}")
return 0
def cmd_design(args):
from .api import api
from .profiles import profiles
audio = api.design(
text=args.text,
instruct=args.instruct,
num_steps=args.steps,
speed=args.speed,
)
path = api.save_audio(audio[0], args.output)
if path:
print(f"Saved: {path}")
else:
print("Playing...")
if args.profile:
profiles.save_from_generated(
name=args.profile,
text=args.text,
instruct=args.instruct,
)
print(f"Profile saved: {args.profile}")
return 0
def cmd_auto(args):
from .api import api
audio = api.auto(
text=args.text,
num_steps=args.steps,
speed=args.speed,
)
path = api.save_audio(audio[0], args.output)
if path:
print(f"Saved: {path}")
else:
print("Playing...")
return 0
def cmd_profile_list(args):
from .profiles import profiles
for name in profiles.list():
p = profiles.get(name)
print(f"{name} [{p.mode}]: {p.description}")
return 0
def cmd_profile_add(args):
from .profiles import profiles, VoiceProfile
profile = VoiceProfile(
name=args.name,
ref_audio=args.ref_audio,
ref_text=args.ref_text,
instruct=args.instruct,
description=args.description or "",
)
profiles.add(profile)
print(f"Profile added: {args.name}")
return 0
def cmd_profile_remove(args):
from .profiles import profiles
if profiles.remove(args.name):
print(f"Profile removed: {args.name}")
return 0
print(f"Profile not found: {args.name}", file=sys.stderr)
return 1
def cmd_profile_use(args):
from .api import api
from .profiles import profiles
audio = profiles.generate(args.profile, args.text, args.steps, args.speed)
path = api.save_audio(audio[0], args.output)
if path:
print(f"Saved: {path}")
else:
print("Playing...")
return 0
def cmd_download(args):
from huggingface_hub import snapshot_download
import src.utils.config_manager as config
from modules.omnivoice.config import (
MODEL_PATH,
DEFAULT_MODEL_PATH,
MODEL_NAME,
DEFAULT_MODEL_NAME,
)
model_name = (
args.model
or config.config.get(MODEL_NAME, cat="omnivoice")
or DEFAULT_MODEL_NAME
)
if args.path:
model_path = Path(args.path)
else:
model_path_str = (
config.config.get(MODEL_PATH, cat="omnivoice") or DEFAULT_MODEL_PATH
)
model_path = Path(model_path_str)
if model_path.exists() and any(model_path.iterdir()):
print(f"Model already exists: {model_path}")
print("Delete folder to re-download")
return 0
print(f"Downloading model {model_name} to {model_path}...")
model_path.mkdir(parents=True, exist_ok=True)
snapshot_download(
repo_id=model_name,
local_dir=str(model_path),
ignore_patterns=["*.pt", "*.bin", "*.pth"],
)
print(f"Model saved to: {model_path}")
return 0
def main(argv=None):
argv = argv or sys.argv[1:]
import src.utils.config_manager as config
from modules.omnivoice import register_config
try:
register_config()
except Exception as e:
print(f"Failed to register omnivoice config: {e}")
config.config.load()
parser = argparse.ArgumentParser(prog="komai-voice")
subparsers = parser.add_subparsers()
p_download = subparsers.add_parser("download", help="Download model locally")
p_download.add_argument(
"--model", default="k2-fsa/OmniVoice", help="Model name on HuggingFace"
)
p_download.add_argument("--path", help="Local path to save model")
p_download.set_defaults(func=cmd_download)
p_clone = subparsers.add_parser("clone", help="Voice cloning")
p_clone.add_argument("--text", required=True)
p_clone.add_argument("--ref-audio", required=True)
p_clone.add_argument("--ref-text")
p_clone.add_argument("--output")
p_clone.add_argument("--steps", type=int)
p_clone.add_argument("--speed", type=float)
p_clone.add_argument("--profile", help="Save as profile")
p_clone.set_defaults(func=cmd_clone)
p_design = subparsers.add_parser("design", help="Voice design")
p_design.add_argument("--text", required=True)
p_design.add_argument("--instruct", required=True)
p_design.add_argument("--output")
p_design.add_argument("--steps", type=int)
p_design.add_argument("--speed", type=float)
p_design.add_argument("--profile", help="Save as profile")
p_design.set_defaults(func=cmd_design)
p_auto = subparsers.add_parser("auto", help="Auto voice")
p_auto.add_argument("--text", required=True)
p_auto.add_argument("--output")
p_auto.add_argument("--steps", type=int)
p_auto.add_argument("--speed", type=float)
p_auto.set_defaults(func=cmd_auto)
p_list = subparsers.add_parser("profiles", help="List profiles")
p_list.set_defaults(func=cmd_profile_list)
p_add = subparsers.add_parser("profile-add", help="Add profile")
p_add.add_argument("--name", required=True)
p_add.add_argument("--ref-audio")
p_add.add_argument("--ref-text")
p_add.add_argument("--instruct")
p_add.add_argument("--description")
p_add.set_defaults(func=cmd_profile_add)
p_rm = subparsers.add_parser("profile-remove", help="Remove profile")
p_rm.add_argument("--name", required=True)
p_rm.set_defaults(func=cmd_profile_remove)
p_use = subparsers.add_parser("profile-use", help="Generate using profile")
p_use.add_argument("--profile", required=True)
p_use.add_argument("--text", required=True)
p_use.add_argument("--output")
p_use.add_argument("--steps", type=int)
p_use.add_argument("--speed", type=float)
p_use.set_defaults(func=cmd_profile_use)
args = parser.parse_args(argv)
if not hasattr(args, "func"):
parser.print_help()
return 1
return args.func(args)
if __name__ == "__main__":
sys.exit(main())