Skip to content

Commit 09f7eb0

Browse files
committed
Remove tokenizer parameter from SFTTrainer instantiation
1 parent 5e1314b commit 09f7eb0

File tree

19 files changed

+1
-22
lines changed

19 files changed

+1
-22
lines changed

notebooks/1/4.ipynb

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1944,7 +1944,7 @@
19441944
},
19451945
{
19461946
"cell_type": "code",
1947-
"execution_count": 36,
1947+
"execution_count": null,
19481948
"metadata": {
19491949
"colab": {
19501950
"base_uri": "https://localhost:8080/",
@@ -2384,7 +2384,6 @@
23842384
" model=model,\n",
23852385
" args=sft_config,\n",
23862386
" train_dataset=train_dataset,\n",
2387-
" tokenizer=tokenizer,\n",
23882387
" peft_config=peft_config,\n",
23892388
" packing=True,\n",
23902389
" dataset_kwargs={\n",

units/en/unit1/3.md

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,6 @@ from trl import SFTTrainer
289289

290290
trainer = SFTTrainer(
291291
model=model,
292-
tokenizer=tokenizer,
293292
train_dataset=dataset["train"],
294293
args=config,
295294
)
@@ -504,7 +503,6 @@ config = SFTConfig(
504503
# Train!
505504
trainer = SFTTrainer(
506505
model=model,
507-
tokenizer=tokenizer,
508506
train_dataset=dataset["train"],
509507
args=config,
510508
)

units/en/unit1/3a.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,6 @@ trainer = SFTTrainer(
142142
model=model,
143143
args=SFTConfig(output_dir="lora-adapter", num_train_epochs=1, per_device_train_batch_size=2),
144144
train_dataset=dataset["train"],
145-
tokenizer=tokenizer,
146145
peft_config=peft_config,
147146
packing=True,
148147
)

units/en/unit1/4.md

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1186,7 +1186,6 @@ from trl import SFTTrainer
11861186

11871187
lora_trainer = SFTTrainer(
11881188
model=model,
1189-
tokenizer=tokenizer,
11901189
train_dataset=formatted_dataset, # dataset with a "text" field or messages + dataset_text_field in config
11911190
args=training_config,
11921191
peft_config=peft_config, # << enable LoRA
@@ -1204,7 +1203,6 @@ We instantiate the trainer, capture a pre-training baseline generation, launch `
12041203

12051204
trainer = SFTTrainer(
12061205
model=model,
1207-
tokenizer=tokenizer,
12081206
train_dataset=dataset["train"],
12091207
args=config,
12101208
)

units/en/unit1/5.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,6 @@ config = SFTConfig(
6464
# Train
6565
trainer = SFTTrainer(
6666
model=model,
67-
tokenizer=tokenizer,
6867
train_dataset=dataset["train"],
6968
args=config,
7069
)

v1/3_parameter_efficient_finetuning/notebooks/finetune_sft_peft.ipynb

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,6 @@
279279
" train_dataset=dataset[\"train\"],\n",
280280
" peft_config=peft_config, # LoRA configuration\n",
281281
" max_seq_length=max_seq_length, # Maximum sequence length\n",
282-
" tokenizer=tokenizer,\n",
283282
" packing=True, # Enable input packing for efficiency\n",
284283
" dataset_kwargs={\n",
285284
" \"add_special_tokens\": False, # Special tokens handled by template\n",

v1/5_vision_language_models/notebooks/vlm_sft_sample.ipynb

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -369,7 +369,6 @@
369369
" eval_dataset=ds[\"test\"],\n",
370370
" data_collator=collate_fn,\n",
371371
" peft_config=peft_config,\n",
372-
" tokenizer=processor.tokenizer,\n",
373372
")\n",
374373
"\n",
375374
"# TODO: 🦁 🐕 align the SFTTrainer params with your chosen dataset. For example, if you are using the `bigcode/the-stack-smol` dataset, you will need to choose the `content` column`"

v1/es/1_instruction_tuning/notebooks/sft_finetuning_example.ipynb

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,6 @@
168168
" model=model,\n",
169169
" args=sft_config,\n",
170170
" train_dataset=ds[\"train\"],\n",
171-
" tokenizer=tokenizer,\n",
172171
" eval_dataset=ds[\"test\"],\n",
173172
")\n",
174173
"\n",

v1/es/3_parameter_efficient_finetuning/notebooks/finetune_sft_peft.ipynb

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,6 @@
279279
" train_dataset=dataset[\"train\"],\n",
280280
" peft_config=peft_config, # Configuración LoRA\n",
281281
" max_seq_length=max_seq_length, # Longitud máxima de la secuencia\n",
282-
" tokenizer=tokenizer,\n",
283282
" packing=True, # Habilita el empaquetado de entrada para mayor eficiencia\n",
284283
" dataset_kwargs={\n",
285284
" \"add_special_tokens\": False, # Los tokens especiales son manejados por la plantilla\n",

v1/ja/1_instruction_tuning/notebooks/chat_templates_example.ipynb

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,6 @@
168168
" model=model,\n",
169169
" args=sft_config,\n",
170170
" train_dataset=ds[\"train\"],\n",
171-
" tokenizer=tokenizer,\n",
172171
" eval_dataset=ds[\"test\"],\n",
173172
")\n",
174173
"\n",

0 commit comments

Comments
 (0)