import argparse import sys from pathlib import Path def cmd_clone(args): from .api import api from .profiles import profiles audio = api.clone( text=args.text, ref_audio=args.ref_audio, ref_text=args.ref_text, num_steps=args.steps, speed=args.speed, ) path = api.save_audio(audio[0], args.output) if path: print(f"Saved: {path}") else: print("Playing...") if args.profile: profiles.save_from_generated( name=args.profile, text=args.text, ref_audio=args.ref_audio, ref_text=args.ref_text, ) print(f"Profile saved: {args.profile}") return 0 def cmd_design(args): from .api import api from .profiles import profiles audio = api.design( text=args.text, instruct=args.instruct, num_steps=args.steps, speed=args.speed, ) path = api.save_audio(audio[0], args.output) if path: print(f"Saved: {path}") else: print("Playing...") if args.profile: profiles.save_from_generated( name=args.profile, text=args.text, instruct=args.instruct, ) print(f"Profile saved: {args.profile}") return 0 def cmd_auto(args): from .api import api audio = api.auto( text=args.text, num_steps=args.steps, speed=args.speed, ) path = api.save_audio(audio[0], args.output) if path: print(f"Saved: {path}") else: print("Playing...") return 0 def cmd_profile_list(args): from .profiles import profiles for name in profiles.list(): p = profiles.get(name) print(f"{name} [{p.mode}]: {p.description}") return 0 def cmd_profile_add(args): from .profiles import profiles, VoiceProfile profile = VoiceProfile( name=args.name, ref_audio=args.ref_audio, ref_text=args.ref_text, instruct=args.instruct, description=args.description or "", ) profiles.add(profile) print(f"Profile added: {args.name}") return 0 def cmd_profile_remove(args): from .profiles import profiles if profiles.remove(args.name): print(f"Profile removed: {args.name}") return 0 print(f"Profile not found: {args.name}", file=sys.stderr) return 1 def cmd_profile_use(args): from .api import api from .profiles import profiles audio = profiles.generate(args.profile, args.text, args.steps, args.speed) path = api.save_audio(audio[0], args.output) if path: print(f"Saved: {path}") else: print("Playing...") return 0 def cmd_download(args): from huggingface_hub import snapshot_download import src.utils.config_manager as config from modules.omnivoice.config import ( MODEL_PATH, DEFAULT_MODEL_PATH, MODEL_NAME, DEFAULT_MODEL_NAME, ) model_name = ( args.model or config.config.get(MODEL_NAME, cat="omnivoice") or DEFAULT_MODEL_NAME ) if args.path: model_path = Path(args.path) else: model_path_str = ( config.config.get(MODEL_PATH, cat="omnivoice") or DEFAULT_MODEL_PATH ) model_path = Path(model_path_str) if model_path.exists() and any(model_path.iterdir()): print(f"Model already exists: {model_path}") print("Delete folder to re-download") return 0 print(f"Downloading model {model_name} to {model_path}...") model_path.mkdir(parents=True, exist_ok=True) snapshot_download( repo_id=model_name, local_dir=str(model_path), ignore_patterns=["*.pt", "*.bin", "*.pth"], ) print(f"Model saved to: {model_path}") return 0 def main(argv=None): argv = argv or sys.argv[1:] import src.utils.config_manager as config from modules.omnivoice import register_config try: register_config() except Exception as e: print(f"Failed to register omnivoice config: {e}") config.config.load() parser = argparse.ArgumentParser(prog="komai-voice") subparsers = parser.add_subparsers() p_download = subparsers.add_parser("download", help="Download model locally") p_download.add_argument( "--model", default="k2-fsa/OmniVoice", help="Model name on HuggingFace" ) p_download.add_argument("--path", help="Local path to save model") p_download.set_defaults(func=cmd_download) p_clone = subparsers.add_parser("clone", help="Voice cloning") p_clone.add_argument("--text", required=True) p_clone.add_argument("--ref-audio", required=True) p_clone.add_argument("--ref-text") p_clone.add_argument("--output") p_clone.add_argument("--steps", type=int) p_clone.add_argument("--speed", type=float) p_clone.add_argument("--profile", help="Save as profile") p_clone.set_defaults(func=cmd_clone) p_design = subparsers.add_parser("design", help="Voice design") p_design.add_argument("--text", required=True) p_design.add_argument("--instruct", required=True) p_design.add_argument("--output") p_design.add_argument("--steps", type=int) p_design.add_argument("--speed", type=float) p_design.add_argument("--profile", help="Save as profile") p_design.set_defaults(func=cmd_design) p_auto = subparsers.add_parser("auto", help="Auto voice") p_auto.add_argument("--text", required=True) p_auto.add_argument("--output") p_auto.add_argument("--steps", type=int) p_auto.add_argument("--speed", type=float) p_auto.set_defaults(func=cmd_auto) p_list = subparsers.add_parser("profiles", help="List profiles") p_list.set_defaults(func=cmd_profile_list) p_add = subparsers.add_parser("profile-add", help="Add profile") p_add.add_argument("--name", required=True) p_add.add_argument("--ref-audio") p_add.add_argument("--ref-text") p_add.add_argument("--instruct") p_add.add_argument("--description") p_add.set_defaults(func=cmd_profile_add) p_rm = subparsers.add_parser("profile-remove", help="Remove profile") p_rm.add_argument("--name", required=True) p_rm.set_defaults(func=cmd_profile_remove) p_use = subparsers.add_parser("profile-use", help="Generate using profile") p_use.add_argument("--profile", required=True) p_use.add_argument("--text", required=True) p_use.add_argument("--output") p_use.add_argument("--steps", type=int) p_use.add_argument("--speed", type=float) p_use.set_defaults(func=cmd_profile_use) args = parser.parse_args(argv) if not hasattr(args, "func"): parser.print_help() return 1 return args.func(args) if __name__ == "__main__": sys.exit(main())