Spaces:

aspmirlab
/

ASPMIR-MACHINE-TRANSLATION-TESTBED

Sleeping

App Files Files Community

ASPMIR-MACHINE-TRANSLATION-TESTBED / ASPMIR-YorTTS.py

aspmirlab

Upload folder using huggingface_hub

95b880a verified about 1 year ago

raw

history blame contribute delete

11.4 kB

	#!/usr/bin/env python
	# coding: utf-8

	# In[1]:


	#Install All the Required Dependencies
	#!pip3 install torch torchvision torchaudio
	#!pip install transformers ipywidgets gradio --upgrade
	#!pip install --upgrade transformers accelerate
	#!pip install --upgrade gradio
	#!pip install nltk
	#!pip install jiwer
	#!pip install sentencepiece
	#!pip install sacremoses
	#!pip install soundfile
	#!pip install librosa numpy jiwer nltk
	#!pip install --upgrade pip
	#!pip install huggingface_hub


	# In[2]:


	#Import Required Libraries
	from transformers import pipeline
	from jiwer import wer
	from transformers import VitsModel, AutoTokenizer, set_seed
	import torch
	import soundfile as sf
	import librosa
	from scipy.spatial.distance import euclidean
	import numpy as np
	import string
	import os
	from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
	from nltk.translate.meteor_score import meteor_score
	import string
	import numpy as np
	import librosa
	from scipy.spatial.distance import euclidean
	import string


	# In[3]:


	import nltk
	nltk.download('wordnet')
	nltk.download('omw-1.4') # Optional if using WordNet's multilingual features
	import nltk
	print(nltk.data.path)
	import nltk
	nltk.data.path.append('./nltk_data')


	# In[4]:


	#Define all Utility Functions
	# Function to compute BLEU score
	def compute_bleu(reference_text, predicted_text):
	"""
	Computes the BLEU score for a single translation.
	:param reference_text: The ground truth text (in Yoruba).
	:param predicted_text: The machine-generated translation text (in Yoruba).
	:return: BLEU score (float).
	"""
	print("The Reference Text = ", reference_text)
	print("The Predicted Text = ",predicted_text)
	# Tokenize the reference and predicted texts
	reference_tokens = [reference_text.split()] # Reference should be wrapped in a list
	predicted_tokens = predicted_text.split()

	# Add smoothing to handle cases with few n-gram matches
	smoothing_function = SmoothingFunction().method1

	# Compute BLEU score
	bleu_score = sentence_bleu(reference_tokens, predicted_tokens, smoothing_function=smoothing_function)
	#print("The Computed bleu_score in the Compute_Blue Fn = ",bleu_score)
	return round(bleu_score,2)
	# Function to compute Word Error Rate (WER)
	def compute_wer(reference_text, predicted_text):
	"""
	Computes the Word Error Rate (WER) for a single translation.
	:param reference_text: The ground truth text (in Yoruba).
	:param predicted_text: The machine-generated translation text (in Yoruba).
	:return: WER score (float).
	"""
	# Normalize text: lowercase and remove punctuation
	reference_text = reference_text.lower().translate(str.maketrans('', '', string.punctuation))
	predicted_text = predicted_text.lower().translate(str.maketrans('', '', string.punctuation))

	# Compute WER
	wer_score = wer(reference_text, predicted_text)

	return round(wer_score,2)

	# Function to compute METEOR score
	def compute_meteor(reference_text, predicted_text):
	"""
	Computes the METEOR score for a single translation.
	:param reference_text: The ground truth text (in Yoruba).
	:param predicted_text: The machine-generated translation text (in Yoruba).
	:return: METEOR score (float).
	"""
	# Normalize text: lowercase and remove punctuation
	reference_text = reference_text.lower().translate(str.maketrans('', '', string.punctuation))
	predicted_text = predicted_text.lower().translate(str.maketrans('', '', string.punctuation))

	# Tokenize text into lists of words
	reference_tokens = reference_text.split()
	predicted_tokens = predicted_text.split()

	# Compute METEOR score
	meteor = meteor_score([reference_tokens], predicted_tokens)

	return round(meteor,2)

	# Function to compute Mel Cepstral Distance (MCD)
	def compute_mcd(ground_truth_audio_path, predicted_audio_path):
	"""
	Computes the Mel Cepstral Distance (MCD) between two audio files.
	:param ground_truth_audio_path: Path to the ground truth audio file.
	:param predicted_audio_path: Path to the predicted audio file.
	:return: MCD score (float).
	"""
	# Load audio files
	y_true, sr_true = librosa.load(ground_truth_audio_path, sr=16000)
	y_pred, sr_pred = librosa.load(predicted_audio_path, sr=16000)

	# Ensure the sampling rates match
	assert sr_true == sr_pred, "Sampling rates do not match between audio files."

	# Compute MFCCs
	mfcc_true = librosa.feature.mfcc(y=y_true, sr=sr_true, n_mfcc=13).T
	mfcc_pred = librosa.feature.mfcc(y=y_pred, sr=sr_pred, n_mfcc=13).T

	# Align the MFCC frames
	min_frames = min(len(mfcc_true), len(mfcc_pred))
	mfcc_true = mfcc_true[:min_frames]
	mfcc_pred = mfcc_pred[:min_frames]

	# Compute the Euclidean distance for each frame and average
	mcd = 0.0
	for i in range(min_frames):
	mcd += euclidean(mfcc_true[i], mfcc_pred[i])
	mcd = (10.0 / np.log(10)) * (mcd / min_frames)

	return round(mcd,2)


	# In[5]:


	#Define Translation and Synthesis Function
	def translate_transformers(modelName, sourceLangText):
	#results = translation_pipeline(input_text)
	translation_pipeline = pipeline('translation_en_to_yo', model = modelName, max_length=500)
	translated_text = translation_pipeline(sourceLangText) #translator(text)[0]["translation_text"]
	translated_text_target = translated_text[0]['translation_text']
	#reference_translations = "awon apositeli, awon woli, awon ajinrere ati awon oluso agutan ati awon oluko." #'recorder_2024-01-13_11-24-41_453538.wav'#"My name is Joy, I love reading"

	#TTS for the translated_text_target
	#TTS Exp1
	ttsModel = VitsModel.from_pretrained("facebook/mms-tts-yor")
	tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-yor")
	ttsInputs = tokenizer(translated_text_target, return_tensors="pt")
	set_seed(555) # make deterministic
	with torch.no_grad():
	ttsOutput = ttsModel(**ttsInputs).waveform
	#Convert the tensor to a numpy array
	ttsWaveform = ttsOutput.numpy()[0]
	#Save the waveform to an audio file
	#sf.write('output.wav', waveform, 22050)
	sf.write('ttsOutput.wav', ttsWaveform, 16000)

	# Sample ground truth and predicted text2text translations for Clinical Text
	#ground_truth_text = "Àrùn jẹjẹrẹ ọmú jẹ́ ọ̀kan pàtàkì lára ohun tó ń ṣàkóbá fún ìlera gbogbo ènìyàn ní Nàìjíríà, ó sì jẹ́ ọ̀kan pàtàkì lára ohun tó ń fa ikú àwọn obìnrin tí àrùn jẹjẹrẹ ń pa lórílẹ̀-èdè náà."
	#predicted_text = translated_text_target #"<extra_id_0> breast cancer is a"

	# Sample ground truth and predicted text2text translations for News Text
	#ground_truth_text = "Wọ́n ní ìgbà àkọ́kọ́ nìyí tí irú ìwà ipá bẹ́ẹ̀ máa wáyé ní ìpínlẹ̀ Ondo."
	#predicted_text = translated_text_target #"<extra_id_0> breast cancer is a"

	# Sample ground truth and predicted text2text translations for Religion Text
	ground_truth_text = "Àwọn aposteli, àwọn wòlíì, àwọn ajíhìnrere, àwọn olùṣọ́-àgùntàn àti àwọn olùkọ́."
	predicted_text = translated_text_target #"<extra_id_0> breast cancer is a"

	#Compute bleu_score
	bleu_score = compute_bleu(ground_truth_text, predicted_text)
	print(f"Bleu Score (BLEU): {bleu_score:.2f}")

	#Compute WER
	wer_score = compute_wer(ground_truth_text, predicted_text)
	print(f"Word Error Rate (WER): {wer_score:.2f}")

	#Compute METEOR
	meteor = compute_meteor(ground_truth_text, predicted_text)
	print(f"METEOR Score: {meteor:.2f}")

	# Paths to sample audio files for MCD computation in current directory
	ground_truth_audio = os.path.join(os.getcwd(), "gt_ttsOutput.wav")
	predicted_audio = os.path.join(os.getcwd(), "ttsOutput.wav")

	# Compute Mel Cepstral Distance (MCD)
	try:
	mcd = compute_mcd(ground_truth_audio, predicted_audio)
	print(f"Mel Cepstral Distance (MCD): {mcd:.2f}")
	except Exception as e:
	print(f"Error computing MCD: {e}")

	return translated_text_target,bleu_score,wer_score,meteor,mcd,'ttsOutput.wav'


	# In[6]:


	#Define User Interface Function using Gradio and IPython Libraries
	import gradio as gr
	from IPython.display import Audio
	interface = gr.Interface(
	fn=translate_transformers,
	inputs=[
	gr.Dropdown(["Davlan/byt5-base-eng-yor-mt", #Exp1
	"Davlan/m2m100_418M-eng-yor-mt", #Exp2
	"Davlan/mbart50-large-eng-yor-mt", #Exp3
	"Davlan/mt5_base_eng_yor_mt", #Exp4
	"omoekan/opus-tatoeba-eng-yor", #Exp5
	"masakhane/afrimt5_en_yor_news", #Exp6
	"masakhane/afrimbart_en_yor_news", #Exp7
	"masakhane/afribyt5_en_yor_news", #Exp8
	"masakhane/byt5_en_yor_news", #Exp9
	"masakhane/mt5_en_yor_news", #Exp10
	"masakhane/mbart50_en_yor_news", #Exp11
	"masakhane/m2m100_418M_en_yor_news", #Exp12
	"masakhane/m2m100_418M_en_yor_rel_news", #Exp13
	"masakhane/m2m100_418M_en_yor_rel_news_ft", #Exp14
	"masakhane/m2m100_418M_en_yor_rel", #Exp15
	"dabagyan/menyo_en2yo", #Exp16
	#"facebook/nllb-200-distilled-600M", #Exp17
	#"facebook/nllb-200-3.3B", #Exp18
	#"facebook/nllb-200-1.3B", #Exp19
	#"facebook/nllb-200-distilled-1.3B", #Exp20
	#"keithhon/nllb-200-3.3B" #Exp21
	#"CohereForAI/aya-101" #Exp22
	"facebook/m2m100_418M", #Exp17
	#"facebook/m2m100_1.2B",#Exp18
	#"facebook/m2m100-12B-avg-5-ckpt", #Exp19
	"google/mt5-base", #Exp20
	"google/byt5-large" #Exp21
	],
	label="Select Finetuned Eng2Yor Translation Model"),
	gr.Textbox(lines=2, placeholder="Enter English Text Here...", label="English Text")
	],
	#outputs = "text",
	#outputs=outputs=["text", "text"],#"text"
	#outputs= gr.Textbox(value="text", label="Translated Text"),
	outputs=[
	gr.Textbox(value="text", label="Translated Yoruba Text"),
	#gr.Textbox(value="text", label=translated_text_actual),
	gr.Textbox(value="number", label="BLEU SCORE"),
	gr.Textbox(value="number", label="WER(WORD ERROR RATE) SCORE - The Lower the Better"),
	gr.Textbox(value="number", label="METEOR SCORE"),
	gr.Textbox(value="number", label="MCD(MEL CESPRAL DISTANCE) SCORE"),
	gr.Audio(type="filepath", label="Click to Generate Yoruba Speech from the Translated Text")
	],
	title="ASPMIR-MACHINE-TRANSLATION-TESTBED FOR LOW RESOURCED AFRICAN LANGUAGES",
	#gr.Markdown("This Tool Allows Developers and Researchers to Carry Out Experiments on Low Resourced African Languages with State-of-the-Art NMT Finetuned Models."),
	description="{This Tool Allows Developers and Researchers to Carry Out Experiments on Low Resourced African Languages with State-of-the-Art Pretrained or Finetuned Models.}"
	)
	#interface.launch(share=True)


	# In[7]:


	if __name__ == "__main__":
	interface.launch(share=True)