The Transformer architecture has revolutionized NLP by enabling models to generate, summarize, and translate text more effectively than traditional approaches like RNNs and LSTMs. With the introduction of self-attention and parallel processing, Transformers have significantly improved the quality, fluency, and speed of text-based applications.
This lecture explores three major applications of Transformers in NLP:
Text generation is typically performed by decoder-only Transformers, such as the GPT family (GPT-2, GPT-3, GPT-4), which use autoregressive language modeling. These models generate text one token at a time, predicting the next token based on previously generated tokens.

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import warnings
def generate_text(prompt, max_length=150, model_name="gpt2"):
"""
Generate text using a pre-trained language model with improved configuration.
Args:
prompt (str): Starting text to generate from
max_length (int): Maximum total length of generated text
model_name (str): Hugging Face model identifier
Returns:
str: Generated text
"""
# Suppress specific HuggingFace warnings
warnings.filterwarnings("ignore", category=UserWarning)
# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
# Create text generation pipeline with explicit truncation
generator = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
max_length=max_length,
truncation=True, # Explicitly set truncation
pad_token_id=tokenizer.eos_token_id # Set padding token
)
# Generate text
try:
generated_texts = generator(prompt)
return generated_texts[0]['generated_text']
except Exception as e:
print(f"Error during text generation: {e}")
return prompt
# Example usage
def main():
# Different prompts to showcase text generation
prompts = [
"Once upon a time,",
"In a world where technology revolutionized everything,",
"The curious scientist discovered",
]
# Generate text for each prompt
for prompt in prompts:
print(f"Prompt: {prompt}")
result = generate_text(prompt, max_length=200)
print(f"Generated Text: {result}\n")
if __name__ == "__main__":
main()Output:
import textwrap
from transformers import pipeline
def demonstrate_bart_summarization():
# Initialize the BART summarization pipeline
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
# Different types of text to summarize
texts = {
"Scientific": "Transformers have significantly changed the landscape of Natural Language Processing (NLP) by introducing the self-attention mechanism. Unlike traditional Recurrent Neural Networks (RNNs), transformer models can process entire sequences in parallel, leading to more efficient and effective language understanding and generation.",
"News Article": "A breakthrough in renewable energy technology has been announced by a team of researchers at Stanford University. The new solar panel design increases energy conversion efficiency by 40% compared to current commercial panels. This innovation could potentially reduce solar energy costs and make sustainable power more accessible to communities worldwide.",
"Academic": "The interdisciplinary study of cognitive neuroscience explores the complex relationships between brain functions and cognitive processes. By integrating methodologies from psychology, biology, and computer science, researchers aim to develop comprehensive models of human perception, memory, and decision-making mechanisms."
}
# Summarization parameters
summarization_params = {
"max_length": 50, # Maximum length of summary
"min_length": 20, # Minimum length of summary
"do_sample": False # Use deterministic summarization
}
# Demonstrate summarization for each text
print("BART Summarization Examples:\n")
for text_type, text in texts.items():
print(f"{text_type} Text:")
print("Original:")
print(textwrap.fill(text, width=80))
print("\nSummary:")
summary = summarizer(text, **summarization_params)[0]['summary_text']
print(textwrap.fill(summary, width=80))
print("\n" + "-"*80 + "\n")
# Run the demonstration
demonstrate_bart_summarization() from transformers import MarianMTModel, MarianTokenizer
def translate_text(text, source_lang, target_lang):
"""
Translate text between languages using Helsinki-NLP's Marian MT models.
Args:
text (str): Text to be translated
source_lang (str): Source language code (e.g., 'en', 'fr', 'es')
target_lang (str): Target language code
Returns:
str: Translated text
"""
try:
# Format language codes for model name
source_lang = source_lang.lower()
target_lang = target_lang.lower()
# Determine the correct model name based on language pair
if source_lang == "fr" and target_lang == "es":
# For French to Spanish, we'll use Romance to Spanish model
model_name = "Helsinki-NLP/opus-mt-fr-es" # Direct model
else:
model_name = f"Helsinki-NLP/opus-mt-{source_lang}-{target_lang}"
# Load tokenizer and model
tokenizer = MarianTokenizer.from_pretrained(model_name)
model = MarianMTModel.from_pretrained(model_name)
# Tokenize and translate
inputs = tokenizer(text, return_tensors="pt", padding=True)
translated = model.generate(**inputs, max_length=100)
translated_text = tokenizer.batch_decode(translated, skip_special_tokens=True)[0]
return translated_text
except Exception as e:
print(f"Detailed error: {type(e).__name__} - {str(e)}")
# If direct translation failed, try using English as pivot
if source_lang != "en" and target_lang != "en":
try:
print(f"Trying two-step translation via English...")
# First translate to English
en_model_name = f"Helsinki-NLP/opus-mt-{source_lang}-en"
en_tokenizer = MarianTokenizer.from_pretrained(en_model_name)
en_model = MarianMTModel.from_pretrained(en_model_name)
# Source → English
en_inputs = en_tokenizer(text, return_tensors="pt", padding=True)
en_translated = en_model.generate(**en_inputs, max_length=100)
english_text = en_tokenizer.batch_decode(en_translated,skip_special_tokens=True[0]
# English → Target
target_model_name = f"Helsinki-NLP/opus-mt-en-{target_lang}"
target_tokenizer = MarianTokenizer.from_pretrained(target_model_name)
target_model = MarianMTModel.from_pretrained(target_model_name)
# English → Target
target_inputs = target_tokenizer(english_text, return_tensors="pt", padding=True)
final_translated = target_model.generate(**target_inputs, max_length=100)
final_text = target_tokenizer.batch_decode(final_translated, skip_special_tokens=True)[0]
return final_text
except Exception as pivot_error:
return f"Translation failed: {str(e)}. Pivot translation also failed: {str(pivot_error)}"
return f"Translation failed: {str(e)}"
def main():
# Translation examples
translations = [
{"text": "Hello, how are you?", "source": "en", "target": "fr"},
{"text": "Bonjour le monde", "source": "fr", "target": "es"},
{"text": "Machine learning is fascinating", "source": "en", "target": "de"}
]
# Perform translations
for translation in translations:
result = translate_text(
translation['text'],
translation['source'],
translation['target']
)
print(f"Original ({translation['source']}): {translation['text']}")
print(f"Translated ({translation['target']}): {result}\n")
if __name__ == "__main__":
main()Output:
Original (en): Hello, how are you?
Translated (fr): Bonjour, comment allez-vous ?
Original (fr): Bonjour le monde
Translated (es): Hola, mundo.
Original (en): Machine learning is fascinating
Translated (de): Maschinelles Lernen ist faszinierend
While Transformers have made breakthroughs, challenges remain: