import torch from transformers import AutoTokenizer, AutoModelForSeq2SeqLM #MODEL_NAME = "Wojtekb30/plt5-paraphraser-pl" #MODEL_NAME = "plt5-paraphraser-pl" MODEL_NAME = "./" print("Loading model...") tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME) # Use GPU if available device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) model.eval() def paraphrase(text, num_return_sequences=3): """ Generate paraphrases for a given Polish input sentence. """ # The model requires this prefix input_text = f"Parafrazuj: {text}" inputs = tokenizer( input_text, return_tensors="pt", max_length=256, truncation=True ).to(device) outputs = model.generate( **inputs, max_length=256, num_return_sequences=num_return_sequences, num_beams=5, do_sample=True, temperature=1.0, top_k=50, top_p=0.95, ) paraphrases = [ tokenizer.decode(output, skip_special_tokens=True) for output in outputs ] return paraphrases if __name__ == "__main__": test_sentences = [ "W nocy zapowiadane są bardzo silne opady deszczu, dlatego lepiej nie wychodzić z domu.", "Pomimo zmęczenia po ciężkim dniu pracy, Janek zdecydował się pójść na długi spacer z psem do lasu." ] for sentence in test_sentences: print("\nOriginal:", sentence) print("Paraphrases:") for i, p in enumerate(paraphrase(sentence), 1): print(f"{i}. {p}")