from
transformers
import
GPT2Tokenizer, GPT2LMHeadModel, Trainer, TrainingArguments
tokenizer
=
GPT2Tokenizer.from_pretrained(
'gpt2'
)
model
=
GPT2LMHeadModel.from_pretrained(
'gpt2'
)
train_data
=
load_data(
'path/to/cleaned_data.json'
)
training_args
=
TrainingArguments(
output_dir
=
'./results'
,
num_train_epochs
=
3
,
per_device_train_batch_size
=
4
,
per_device_eval_batch_size
=
4
,
warmup_steps
=
500
,
weight_decay
=
0.01
,
logging_dir
=
'./logs'
,
)
trainer
=
Trainer(
model
=
model,
args
=
training_args,
train_dataset
=
train_data,
)
trainer.train()