| | |
| | |
| |
|
| | |
| |
|
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| |
|
| | |
| |
|
| |
|
| | |
| | from transformers import pipeline |
| | from jiwer import wer |
| | from transformers import VitsModel, AutoTokenizer, set_seed |
| | import torch |
| | import soundfile as sf |
| | import librosa |
| | from scipy.spatial.distance import euclidean |
| | import numpy as np |
| | import string |
| | import os |
| | from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction |
| | from nltk.translate.meteor_score import meteor_score |
| | import string |
| | import numpy as np |
| | import librosa |
| | from scipy.spatial.distance import euclidean |
| | import string |
| |
|
| |
|
| | |
| |
|
| |
|
| | import nltk |
| | nltk.download('wordnet') |
| | nltk.download('omw-1.4') |
| | import nltk |
| | print(nltk.data.path) |
| | import nltk |
| | nltk.data.path.append('./nltk_data') |
| |
|
| |
|
| | |
| |
|
| |
|
| | |
| | |
| | def compute_bleu(reference_text, predicted_text): |
| | """ |
| | Computes the BLEU score for a single translation. |
| | :param reference_text: The ground truth text (in Yoruba). |
| | :param predicted_text: The machine-generated translation text (in Yoruba). |
| | :return: BLEU score (float). |
| | """ |
| | print("The Reference Text = ", reference_text) |
| | print("The Predicted Text = ",predicted_text) |
| | |
| | reference_tokens = [reference_text.split()] |
| | predicted_tokens = predicted_text.split() |
| |
|
| | |
| | smoothing_function = SmoothingFunction().method1 |
| |
|
| | |
| | bleu_score = sentence_bleu(reference_tokens, predicted_tokens, smoothing_function=smoothing_function) |
| | |
| | return round(bleu_score,2) |
| | |
| | def compute_wer(reference_text, predicted_text): |
| | """ |
| | Computes the Word Error Rate (WER) for a single translation. |
| | :param reference_text: The ground truth text (in Yoruba). |
| | :param predicted_text: The machine-generated translation text (in Yoruba). |
| | :return: WER score (float). |
| | """ |
| | |
| | reference_text = reference_text.lower().translate(str.maketrans('', '', string.punctuation)) |
| | predicted_text = predicted_text.lower().translate(str.maketrans('', '', string.punctuation)) |
| |
|
| | |
| | wer_score = wer(reference_text, predicted_text) |
| |
|
| | return round(wer_score,2) |
| |
|
| | |
| | def compute_meteor(reference_text, predicted_text): |
| | """ |
| | Computes the METEOR score for a single translation. |
| | :param reference_text: The ground truth text (in Yoruba). |
| | :param predicted_text: The machine-generated translation text (in Yoruba). |
| | :return: METEOR score (float). |
| | """ |
| | |
| | reference_text = reference_text.lower().translate(str.maketrans('', '', string.punctuation)) |
| | predicted_text = predicted_text.lower().translate(str.maketrans('', '', string.punctuation)) |
| |
|
| | |
| | reference_tokens = reference_text.split() |
| | predicted_tokens = predicted_text.split() |
| |
|
| | |
| | meteor = meteor_score([reference_tokens], predicted_tokens) |
| | |
| | return round(meteor,2) |
| |
|
| | |
| | def compute_mcd(ground_truth_audio_path, predicted_audio_path): |
| | """ |
| | Computes the Mel Cepstral Distance (MCD) between two audio files. |
| | :param ground_truth_audio_path: Path to the ground truth audio file. |
| | :param predicted_audio_path: Path to the predicted audio file. |
| | :return: MCD score (float). |
| | """ |
| | |
| | y_true, sr_true = librosa.load(ground_truth_audio_path, sr=16000) |
| | y_pred, sr_pred = librosa.load(predicted_audio_path, sr=16000) |
| |
|
| | |
| | assert sr_true == sr_pred, "Sampling rates do not match between audio files." |
| |
|
| | |
| | mfcc_true = librosa.feature.mfcc(y=y_true, sr=sr_true, n_mfcc=13).T |
| | mfcc_pred = librosa.feature.mfcc(y=y_pred, sr=sr_pred, n_mfcc=13).T |
| |
|
| | |
| | min_frames = min(len(mfcc_true), len(mfcc_pred)) |
| | mfcc_true = mfcc_true[:min_frames] |
| | mfcc_pred = mfcc_pred[:min_frames] |
| |
|
| | |
| | mcd = 0.0 |
| | for i in range(min_frames): |
| | mcd += euclidean(mfcc_true[i], mfcc_pred[i]) |
| | mcd = (10.0 / np.log(10)) * (mcd / min_frames) |
| |
|
| | return round(mcd,2) |
| |
|
| |
|
| | |
| |
|
| |
|
| | |
| | def translate_transformers(modelName, sourceLangText): |
| | |
| | translation_pipeline = pipeline('translation_en_to_yo', model = modelName, max_length=500) |
| | translated_text = translation_pipeline(sourceLangText) |
| | translated_text_target = translated_text[0]['translation_text'] |
| | |
| | |
| | |
| | |
| | ttsModel = VitsModel.from_pretrained("facebook/mms-tts-yor") |
| | tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-yor") |
| | ttsInputs = tokenizer(translated_text_target, return_tensors="pt") |
| | set_seed(555) |
| | with torch.no_grad(): |
| | ttsOutput = ttsModel(**ttsInputs).waveform |
| | |
| | ttsWaveform = ttsOutput.numpy()[0] |
| | |
| | |
| | sf.write('ttsOutput.wav', ttsWaveform, 16000) |
| | |
| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| |
|
| | |
| | ground_truth_text = "Àwọn aposteli, àwọn wòlíì, àwọn ajíhìnrere, àwọn olùṣọ́-àgùntàn àti àwọn olùkọ́." |
| | predicted_text = translated_text_target |
| | |
| | |
| | bleu_score = compute_bleu(ground_truth_text, predicted_text) |
| | print(f"Bleu Score (BLEU): {bleu_score:.2f}") |
| | |
| | |
| | wer_score = compute_wer(ground_truth_text, predicted_text) |
| | print(f"Word Error Rate (WER): {wer_score:.2f}") |
| |
|
| | |
| | meteor = compute_meteor(ground_truth_text, predicted_text) |
| | print(f"METEOR Score: {meteor:.2f}") |
| |
|
| | |
| | ground_truth_audio = os.path.join(os.getcwd(), "gt_ttsOutput.wav") |
| | predicted_audio = os.path.join(os.getcwd(), "ttsOutput.wav") |
| |
|
| | |
| | try: |
| | mcd = compute_mcd(ground_truth_audio, predicted_audio) |
| | print(f"Mel Cepstral Distance (MCD): {mcd:.2f}") |
| | except Exception as e: |
| | print(f"Error computing MCD: {e}") |
| | |
| | return translated_text_target,bleu_score,wer_score,meteor,mcd,'ttsOutput.wav' |
| |
|
| |
|
| | |
| |
|
| |
|
| | |
| | import gradio as gr |
| | from IPython.display import Audio |
| | interface = gr.Interface( |
| | fn=translate_transformers, |
| | inputs=[ |
| | gr.Dropdown(["Davlan/byt5-base-eng-yor-mt", |
| | "Davlan/m2m100_418M-eng-yor-mt", |
| | "Davlan/mbart50-large-eng-yor-mt", |
| | "Davlan/mt5_base_eng_yor_mt", |
| | "omoekan/opus-tatoeba-eng-yor", |
| | "masakhane/afrimt5_en_yor_news", |
| | "masakhane/afrimbart_en_yor_news", |
| | "masakhane/afribyt5_en_yor_news", |
| | "masakhane/byt5_en_yor_news", |
| | "masakhane/mt5_en_yor_news", |
| | "masakhane/mbart50_en_yor_news", |
| | "masakhane/m2m100_418M_en_yor_news", |
| | "masakhane/m2m100_418M_en_yor_rel_news", |
| | "masakhane/m2m100_418M_en_yor_rel_news_ft", |
| | "masakhane/m2m100_418M_en_yor_rel", |
| | "dabagyan/menyo_en2yo", |
| | |
| | |
| | |
| | |
| | |
| | |
| | "facebook/m2m100_418M", |
| | |
| | |
| | "google/mt5-base", |
| | "google/byt5-large" |
| | ], |
| | label="Select Finetuned Eng2Yor Translation Model"), |
| | gr.Textbox(lines=2, placeholder="Enter English Text Here...", label="English Text") |
| | ], |
| | |
| | |
| | |
| | outputs=[ |
| | gr.Textbox(value="text", label="Translated Yoruba Text"), |
| | |
| | gr.Textbox(value="number", label="BLEU SCORE"), |
| | gr.Textbox(value="number", label="WER(WORD ERROR RATE) SCORE - The Lower the Better"), |
| | gr.Textbox(value="number", label="METEOR SCORE"), |
| | gr.Textbox(value="number", label="MCD(MEL CESPRAL DISTANCE) SCORE"), |
| | gr.Audio(type="filepath", label="Click to Generate Yoruba Speech from the Translated Text") |
| | ], |
| | title="ASPMIR-MACHINE-TRANSLATION-TESTBED FOR LOW RESOURCED AFRICAN LANGUAGES", |
| | |
| | description="{This Tool Allows Developers and Researchers to Carry Out Experiments on Low Resourced African Languages with State-of-the-Art Pretrained or Finetuned Models.}" |
| | ) |
| | |
| |
|
| |
|
| | |
| |
|
| |
|
| | if __name__ == "__main__": |
| | interface.launch(share=True) |
| |
|
| |
|