Pendrokar commited on
Commit
dee6d0d
ยท
1 Parent(s): 7eb29b6

the test TTS python scripts

Browse files
test_tts_e2_f5_e2.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from gradio_client import Client, handle_file
3
+
4
+ client = Client("mrfakename/E2-F5-TTS", hf_token=os.getenv('HF_TOKEN'))
5
+ endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
6
+ print(endpoints)
7
+ result = client.predict(
8
+ ref_audio_orig=handle_file('https://cdn-uploads.huggingface.co/production/uploads/63d52e0c4e5642795617f668/V6-rMmI-P59DA4leWDIcK.wav'),
9
+ ref_text="The Hispaniola was rolling scuppers under in the ocean swell. The booms were tearing at the blocks, the rudder was banging to and fro, and the whole ship creaking, groaning, and jumping like a manufactory.",
10
+ gen_text="Please surprise me and speak in whatever voice you enjoy.",
11
+ exp_name="E2-TTS",
12
+ remove_silence=False,
13
+ api_name="/infer",
14
+ )
test_tts_e2_f5_f5.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from gradio_client import Client, handle_file
3
+
4
+ client = Client("mrfakename/E2-F5-TTS", hf_token=os.getenv('HF_TOKEN'))
5
+ endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
6
+ print(endpoints)
7
+ result = client.predict(
8
+ ref_audio_orig=handle_file('https://cdn-uploads.huggingface.co/production/uploads/63d52e0c4e5642795617f668/V6-rMmI-P59DA4leWDIcK.wav'),
9
+ ref_text="The Hispaniola was rolling scuppers under in the ocean swell. The booms were tearing at the blocks, the rudder was banging to and fro, and the whole ship creaking, groaning, and jumping like a manufactory.",
10
+ gen_text="Please surprise me and speak in whatever voice you enjoy.",
11
+ exp_name="F5-TTS",
12
+ remove_silence=False,
13
+ api_name="/infer",
14
+ )
test_tts_edge.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from gradio_client import Client, file
3
+
4
+ client = Client("innoai/Edge-TTS-Text-to-Speech", hf_token=os.getenv('HF_TOKEN'))
5
+ endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
6
+ # print(endpoints)
7
+ result = client.predict(
8
+ "Please surprise me and speak in whatever voice you enjoy.",
9
+ "en-US-EmmaMultilingualNeural - en-US (Female)",
10
+ 0,
11
+ 0,
12
+ api_name="/predict"
13
+ )
test_tts_fish.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from gradio_client import Client, handle_file
3
+
4
+ client = Client("fishaudio/fish-speech-1", hf_token=os.getenv('HF_TOKEN'))
5
+ # printz = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
6
+ # print(printz)
7
+ result = client.predict(
8
+ text="Please surprise me and speak in whatever voice you enjoy.",
9
+ enable_reference_audio=True,
10
+ reference_audio=handle_file('https://cdn-uploads.huggingface.co/production/uploads/641de0213239b631552713e4/iKHHqWxWy6Zfmp6QP6CZZ.wav'),
11
+ reference_text="In the first half of the 20th century, science fiction familiarized the world with the concept of artificially intelligent robots. It began with the โ€œheartlessโ€ Tin man from the Wizard of Oz and continued with the humanoid robot that impersonated Maria in Metropolis. By the 1950s, we had a generation of scientists, mathematicians, and philosophers with the concept of artificial intelligence (or AI) culturally assimilated in their minds.",
12
+ max_new_tokens=1024,
13
+ chunk_length=200,
14
+ top_p=0.7,
15
+ repetition_penalty=1.2,
16
+ temperature=0.7,
17
+ batch_infer_num=1,
18
+ if_load_asr_model=False,
19
+ api_name="/inference_wrapper"
20
+ )
21
+ print(result[1])
test_tts_melo.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from gradio_client import Client
3
+
4
+ client = Client("mrfakename/MeloTTS", hf_token=os.getenv('HF_TOKEN'))
5
+ endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
6
+ # print(endpoints)
7
+ result = client.predict(
8
+ "Please surprise me and speak in whatever voice you enjoy.", # str in 'Text to speak' Textbox component
9
+ "EN-US", # Literal['EN-US', 'EN-BR', 'EN_INDIA', 'EN-AU', 'EN-Default'] in 'Speaker' Dropdown component
10
+ 1.0, # float (numeric value between 0.1 and 10.0)
11
+ "EN", # Literal['EN', 'ES', 'FR', 'ZH', 'JP', 'KR'] in 'Language' Radio component
12
+ api_name="/synthesize"
13
+ )
test_tts_metavoice.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from gradio_client import Client, file
3
+
4
+ client = Client("mrfakename/MetaVoice-1B-v0.1", hf_token=os.getenv('HF_TOKEN'))
5
+ endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
6
+ print(endpoints)
7
+ result = client.predict(
8
+ "Please surprise me and speak in whatever voice you enjoy.", # str in 'What should I say!? (max 512 characters).' Textbox component
9
+ 5, # float (numeric value between 0.0 and 10.0) in 'Speech Stability - improves text following for a challenging speaker' Slider component
10
+ 5, # float (numeric value between 1.0 and 5.0) in 'Speaker similarity - How closely to match speaker identity and speech style.' Slider component
11
+ "Preset voices", # Literal['Preset voices', 'Upload target voice'] in 'Choose voice' Radio component
12
+ "Bria", # Literal['Bria', 'Alex', 'Jacob'] in 'Preset voices' Dropdown component
13
+ None, # filepath in 'Upload a clean sample to clone. Sample should contain 1 speaker, be between 30-90 seconds and not contain background noise.' Audio component
14
+ api_name="/tts"
15
+ )
test_tts_parler.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from gradio_client import Client
3
+
4
+ client = Client("parler-tts/parler_tts", hf_token=os.getenv('HF_TOKEN'))
5
+ endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
6
+ print(endpoints)
7
+ result = client.predict(
8
+ text="Please surprise me and speak in whatever voice you enjoy.",
9
+ description="Elisabeth; Elisabeth\'s female voice; very clear audio",
10
+ # 3,
11
+ api_name="/gen_tts"
12
+ )
test_tts_parler_ex.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from gradio_client import Client
3
+
4
+ client = Client("parler-tts/parler-tts-expresso", hf_token=os.getenv('HF_TOKEN'))
5
+ endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
6
+ print(endpoints)
7
+ result = client.predict(
8
+ text="Please surprise me and speak in whatever voice you enjoy.",
9
+ description="Elisabeth; Elisabeth\'s female voice; very clear audio",
10
+ # 3,
11
+ api_name="/gen_tts"
12
+ )
test_tts_tortoise.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from gradio_client import Client
3
+
4
+ client = Client("Manmay/tortoise-tts", hf_token=os.getenv('HF_TOKEN'))
5
+ result = client.predict(
6
+ text="Please surprise me and speak in whatever voice you enjoy.",
7
+ script=None,
8
+ voice="angie",
9
+ voice_b="disabled",
10
+ seed="No",
11
+ api_name="/predict"
12
+ )
13
+ print(result)
test_tts_voicecraft.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from gradio_client import Client
3
+
4
+ client = Client("pyp1/VoiceCraft_gradio", hf_token=os.getenv('HF_TOKEN'))
5
+ endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
6
+ print(endpoints)
7
+ result = client.predict(
8
+ -1, #seed
9
+ 0.08, #left_margin
10
+ 0.08, #right_margin
11
+ 16000, #codec_audio_sr
12
+ 50, #codec_sr
13
+ 0, #top_k
14
+ 0.9, #top_p
15
+ 1, #temperature
16
+ "3", #stop_repetition
17
+ 4, #sample_batch_size
18
+ "1", #kvcache
19
+ "[1388,1898,131]", #silence_tokens
20
+ 'https://cdn-uploads.huggingface.co/production/uploads/641de0213239b631552713e4/iKHHqWxWy6Zfmp6QP6CZZ.wav', #audio_path
21
+ "I cannot believe that the same model can also do text to speech synthesis too!", #transcript
22
+ True, #smart_transcript
23
+ 3.016, #prompt_end_time
24
+ 0.46, #edit_start_time
25
+ 3.808, #edit_end_time
26
+ "Newline", #split_text
27
+ None, #selected_sentence
28
+ api_name="/run" #api_name
29
+ )
test_tts_whisper.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from gradio_client import Client, file
3
+
4
+ # client = Client("Pendrokar/WhisperSpeech", hf_token=os.getenv('HF_TOKEN'))
5
+ # client = Client("collabora/WhisperSpeech")
6
+
7
+ # client = Client(src="https://collabora-whisperspeech.hf.space", max_workers=1, hf_token=os.getenv('HF_TOKEN'))
8
+ client = Client(src="collabora/WhisperSpeech", max_workers=1, hf_token=os.getenv('HF_TOKEN'))
9
+ # endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
10
+ # print(endpoints)
11
+
12
+ def somefunc():
13
+ pass
14
+ result = client.predict(
15
+ # "/whisper_speech_demo",
16
+ # somefunc,
17
+ multilingual_text="Test.",
18
+ # speaker_audio=file('https://upload.wikimedia.org/wikipedia/commons/7/75/Winston_Churchill_-_Be_Ye_Men_of_Valour.ogg'),
19
+ speaker_audio=None,
20
+ # speaker_url=file('https://upload.wikimedia.org/wikipedia/commons/7/75/Winston_Churchill_-_Be_Ye_Men_of_Valour.ogg'),
21
+ # speaker_url="",
22
+ speaker_url=None,
23
+ cps=14,
24
+ api_name="/whisper_speech_demo",
25
+ # fn_index=0
26
+ )
27
+ # result = client.predict(
28
+ # ["Please surprise me and speak in whatever voice you enjoy.",
29
+ # None,
30
+ # 'https://cdn-uploads.huggingface.co/production/uploads/641de0213239b631552713e4/iKHHqWxWy6Zfmp6QP6CZZ.wav',
31
+ # 14],
32
+ # api_name="/whisper_speech_demo",
33
+ # fn_index=0
34
+ # )
test_tts_xtts.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from gradio_client import Client, file
3
+
4
+ client = Client("coqui/xtts", hf_token=os.getenv('HF_TOKEN'))
5
+ endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
6
+ # print(endpoints)
7
+ result = client.predict(
8
+ "Quick test.", # str in 'What should I say!? (max 512 characters).' Textbox component
9
+ 'en', #lang
10
+ 'https://cdn-uploads.huggingface.co/production/uploads/63d52e0c4e5642795617f668/V6-rMmI-P59DA4leWDIcK.wav', # voice sample
11
+ None, # mic voice sample
12
+ False, #use_mic
13
+ False, #cleanup_reference
14
+ False, #auto_detect
15
+ True, #ToS
16
+ fn_index=1
17
+ )
test_tts_xva.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from gradio_client import Client, file
3
+
4
+ client = Client("Pendrokar/xVASynth-TTS", hf_token=os.getenv('HF_TOKEN'))
5
+ endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
6
+ # print(endpoints)
7
+ result = client.predict(
8
+ "Well, hello there!!", # str in 'Input Text' Textbox component
9
+ "x_ex04", # Literal['x_ex04', 'x_ex01', 'cnc_cabal', 'ccby_nvidia_hifi_92_F', 'ccby_nvidia_hifi_6671_M', 'more'] in 'Voice' Radio component
10
+ "en", # Literal['en', 'de', 'es', 'hi', 'zh', 'more'] in 'Language' Radio component
11
+ 1.0, # float (numeric value between 0.5 and 2.0) in 'Duration' Slider component
12
+
13
+ 0, # UNUSED; float (numeric value between 0 and 1.0) in 'Pitch' Slider component
14
+ 0.1, # UNUSED; float (numeric value between 0.1 and 1.0) in 'Energy' Slider component
15
+
16
+ 0, # Overriden by DeepMoji; float (numeric value between 0 and 1.0) in '๐Ÿ˜  Anger' Slider component
17
+ 0, # Overriden by DeepMoji; float (numeric value between 0 and 1.0) in '๐Ÿ˜ƒ Happiness' Slider component
18
+ 0, # Overriden by DeepMoji; float (numeric value between 0 and 1.0) in '๐Ÿ˜ญ Sadness' Slider component
19
+ 0, # Overriden by DeepMoji; float (numeric value between 0 and 1.0) in '๐Ÿ˜ฎ Surprise' Slider component
20
+ True, # bool in 'Use DeepMoji' Checkbox component
21
+
22
+ api_name="/predict"
23
+ )