import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from transformers import GPT2Tokenizer, GPT2LMHeadModel
# Define a simple dataset for training
class ConversationDataset(Dataset):
def __init__(self, conversations, tokenizer, max_length=512):
self.conversations = conversations
self.tokenizer = tokenizer
self.max_length = max_length
def __len__(self):
return len(self.conversations)
def __getitem__(self, idx):
input_text, target_text = self.conversations[idx]
encoding = self.tokenizer.encode_plus(
input_text,
add_special_tokens=True,
max_length=self.max_length,
padding='max_length',
truncation=True,
return_tensors='pt'
)
target_encoding = self.tokenizer.encode_plus(
target_text,
add_special_tokens=True,
max_length=self.max_length,
padding='max_length',
truncation=True,
return_tensors='pt'
)
return encoding['input_ids'].flatten(), target_encoding['input_ids'].flatten()
# Sample data (normally you would have a much larger dataset)
conversations = [
("What are the benefits of regular exercise?", "Regular exercise improves health, boosts mood, and enhances physical fitness."),
("How do I cook pasta?", "To cook pasta, boil water, add pasta, cook for 8-10 minutes, then drain."),
# Add more conversation pairs here...
]
# Initialize tokenizer and dataset
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
dataset = ConversationDataset(conversations, tokenizer)
data_loader = DataLoader(dataset, batch_size=2, shuffle=True)
# Initialize the GPT-2 model
model = GPT2LMHeadModel.from_pretrained('gpt2')
model.train()
# Define the optimizer and loss function
optimizer = optim.AdamW(model.parameters(), lr=1e-5)
criterion = nn.CrossEntropyLoss()
# Training loop
epochs = 3
for epoch in range(epochs):
for batch_idx, (input_ids, target_ids) in enumerate(data_loader):
optimizer.zero_grad()
# Forward pass
outputs = model(input_ids, labels=target_ids)
loss = outputs.loss
# Backward pass and optimization
loss.backward()
optimizer.step()
# Print loss for monitoring
if batch_idx % 10 == 0:
print(f'Epoch {epoch + 1}/{epochs}, Batch {batch_idx}, Loss: {loss.item()}')
# Save the trained model
model.save_pretrained('./trained_conversational_model')
tokenizer.save_pretrained('./trained_conversational_model')
print("Model training complete!")