Add OmniVoice TTS module with config, API, profiles and CLI

- Create modules/omnivoice/ with VoiceAPI, VoiceProfiles, CLI - Add config manager integration with local model support - Add app/komAI.py entry point - Add tests/test_omnivoice.py - Clone OmniVoice to external/ for development - Add omnivoice config to global.yaml
2026-04-16 17:51:15 +03:00
parent 22b85455e1
commit 55353654b7
11 changed files with 1064 additions and 3 deletions
--- a/modules/omnivoice/profiles.py
+++ b/modules/omnivoice/profiles.py
@@ -0,0 +1,171 @@
+import json
+from pathlib import Path
+from typing import Optional, Dict, List
+import shutil
+
+import src.utils.config_manager as config_mgr
+from .config import PROFILES_DIR, DEFAULT_PROFILES_DIR
+from .api import api
+
+config = config_mgr.config
+
+
+class VoiceProfile:
+    def __init__(
+        self,
+        name: str,
+        ref_audio: Optional[str] = None,
+        ref_text: Optional[str] = None,
+        instruct: Optional[str] = None,
+        description: str = "",
+    ):
+        self.name = name
+        self.ref_audio = ref_audio
+        self.ref_text = ref_text
+        self.instruct = instruct
+        self.description = description
+
+    @property
+    def mode(self) -> str:
+        if self.ref_audio:
+            return "clone"
+        elif self.instruct:
+            return "design"
+        return "auto"
+
+    def to_dict(self) -> Dict:
+        return {
+            "name": self.name,
+            "ref_audio": self.ref_audio,
+            "ref_text": self.ref_text,
+            "instruct": self.instruct,
+            "description": self.description,
+            "mode": self.mode,
+        }
+
+    @classmethod
+    def from_dict(cls, data: Dict) -> "VoiceProfile":
+        return cls(
+            name=data["name"],
+            ref_audio=data.get("ref_audio"),
+            ref_text=data.get("ref_text"),
+            instruct=data.get("instruct"),
+            description=data.get("description", ""),
+        )
+
+
+class VoiceProfiles:
+    def __init__(self, profiles_dir: Optional[str] = None):
+        self._profiles_dir = (
+            profiles_dir
+            or config.get(PROFILES_DIR, cat="omnivoice")
+            or DEFAULT_PROFILES_DIR
+        )
+        self._profiles_path = Path(self._profiles_dir) / "profiles.json"
+        self._profiles: Dict[str, VoiceProfile] = {}
+        self._load()
+
+    def _load(self):
+        if self._profiles_path.exists():
+            with open(self._profiles_path, "r", encoding="utf-8") as f:
+                data = json.load(f)
+                for name, profile_data in data.items():
+                    self._profiles[name] = VoiceProfile.from_dict(profile_data)
+
+    def _save(self):
+        self._profiles_path.parent.mkdir(parents=True, exist_ok=True)
+        data = {name: profile.to_dict() for name, profile in self._profiles.items()}
+        with open(self._profiles_path, "w", encoding="utf-8") as f:
+            json.dump(data, f, indent=2, ensure_ascii=False)
+
+    def list(self) -> List[str]:
+        return sorted(self._profiles.keys())
+
+    def get(self, name: str) -> Optional[VoiceProfile]:
+        return self._profiles.get(name)
+
+    def add(self, profile: VoiceProfile) -> None:
+        self._profiles[profile.name] = profile
+        self._save()
+
+    def remove(self, name: str) -> bool:
+        if name in self._profiles:
+            del self._profiles[name]
+            self._save()
+            return True
+        return False
+
+    def save_from_generated(
+        self,
+        name: str,
+        text: str,
+        ref_audio: Optional[str] = None,
+        ref_text: Optional[str] = None,
+        instruct: Optional[str] = None,
+        description: str = "",
+    ) -> VoiceProfile:
+        profile = VoiceProfile(
+            name=name,
+            ref_audio=ref_audio,
+            ref_text=ref_text,
+            instruct=instruct,
+            description=description or f"Generated from: {text[:50]}...",
+        )
+        self.add(profile)
+        return profile
+
+    def generate(
+        self,
+        profile_name: str,
+        text: str,
+        num_steps: Optional[int] = None,
+        speed: Optional[float] = None,
+    ) -> List:
+        profile = self._profiles.get(profile_name)
+        if not profile:
+            raise ValueError(f"Profile '{profile_name}' not found")
+
+        if profile.mode == "clone":
+            return api.clone(
+                text=text,
+                ref_audio=profile.ref_audio,
+                ref_text=profile.ref_text,
+                num_steps=num_steps,
+                speed=speed,
+            )
+        elif profile.mode == "design":
+            return api.design(
+                text=text,
+                instruct=profile.instruct,
+                num_steps=num_steps,
+                speed=speed,
+            )
+        else:
+            return api.auto(
+                text=text,
+                num_steps=num_steps,
+                speed=speed,
+            )
+
+
+profiles = VoiceProfiles()
+
+
+def get_profiles() -> VoiceProfiles:
+    return profiles
+
+
+def list_profiles() -> List[str]:
+    return profiles.list()
+
+
+def get_profile(name: str) -> Optional[VoiceProfile]:
+    return profiles.get(name)
+
+
+def add_profile(profile: VoiceProfile) -> None:
+    profiles.add(profile)
+
+
+def remove_profile(name: str) -> bool:
+    return profiles.remove(name)