gradio>=4.0.0 torch>=2.0.0 torchaudio>=2.0.0 transformers<4.50 transformers_stream_generator soundfile numpy<=1.26.4 vocos torchcodec cached_path accelerate>=0.33.0 safetensors tqdm>=4.65.0 matplotlib pydub huggingface_hub x_transformers>=1.31.14 ema_pytorch>=0.5.2 torchdiffeq librosa jieba pypinyin datasets click tomli hydra-core>=1.3.0 wandb