huggingface
diff --git a/‎notebooks/1/4.ipynb‎
Lines changed: 5 additions & 5 deletions b/‎notebooks/1/4.ipynb‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎notebooks/4/4.ipynb‎
Lines changed: 624 additions & 0 deletions b/‎notebooks/4/4.ipynb‎
Lines changed: 624 additions & 0 deletions
diff --git a/‎pull_request_template.md‎
Lines changed: 39 additions & 0 deletions b/‎pull_request_template.md‎
Lines changed: 39 additions & 0 deletions
diff --git a/‎units/en/_toctree.yml‎
Lines changed: 33 additions & 0 deletions b/‎units/en/_toctree.yml‎
Lines changed: 33 additions & 0 deletions
diff --git a/‎units/en/unit0/1.mdx‎
Lines changed: 1 addition & 1 deletion b/‎units/en/unit0/1.mdx‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎units/en/unit1/4.md‎
Lines changed: 17 additions & 14 deletions b/‎units/en/unit1/4.md‎
Lines changed: 17 additions & 14 deletions
diff --git a/‎units/en/unit1/5.md‎
Lines changed: 4 additions & 6 deletions b/‎units/en/unit1/5.md‎
Lines changed: 4 additions & 6 deletions
@@ -125,7 +125,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 5,
+      "execution_count": null,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/",
@@ -681,11 +681,11 @@
         "\n",
         "# Load models (use smaller precision for memory efficiency)\n",
         "base_model = AutoModelForCausalLM.from_pretrained(\n",
-        "    base_model_name, torch_dtype=torch.float16, device_map=\"auto\"\n",
+        "    base_model_name, dtype=torch.float16, device_map=\"auto\"\n",
         ")\n",
         "\n",
         "instruct_model = AutoModelForCausalLM.from_pretrained(\n",
-        "    instruct_model_name, torch_dtype=torch.float16, device_map=\"auto\"\n",
+        "    instruct_model_name, dtype=torch.float16, device_map=\"auto\"\n",
         ")\n",
         "\n",
         "print(\"Models loaded successfully!\")\n"
@@ -1944,7 +1944,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 36,
+      "execution_count": null,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/",
@@ -2010,7 +2010,7 @@
         "print(f\"Loading {model_name}...\")\n",
         "model = AutoModelForCausalLM.from_pretrained(\n",
         "    model_name,\n",
-        "    torch_dtype=torch.float16,  # Use float16 for memory efficiency\n",
+        "    dtype=torch.float16,  # Use float16 for memory efficiency\n",
         "    device_map=\"auto\",\n",
         "    trust_remote_code=True,\n",
         ")\n",
 
@@ -0,0 +1,39 @@
+# December 2024 Student Submission
+
+## Module Completed
+- [ ] Module 1: Instruction Tuning
+- [ ] Module 2: Preference Alignment
+- [ ] Module 3: Parameter-efficient Fine-tuning
+- [ ] Module 4: Evaluation
+- [ ] Module 5: Vision-language Models
+- [ ] Module 6: Synthetic Datasets
+- [ ] Module 7: Inference
+- [ ] Module 8: Deployment
+
+## Changes Made
+Describe what you've done in this PR:
+1. What concepts did you learn?
+2. What changes or additions did you make?
+3. Any challenges you faced?
+
+## Notebooks Added/Modified
+List any notebooks you've added or modified:
+- [ ] Added new example in `module_name/student_examples/my_example.ipynb`
+- [ ] Modified existing notebook with additional examples
+- [ ] Added documentation or comments
+
+## Checklist
+
+- [ ] I have read the module materials
+- [ ] My code runs without errors
+- [ ] I have pushed models and datasets to the huggingface hub
+- [ ] My PR is based on the `december-2024` branch
+
+## Questions or Discussion Points
+Add any questions you have or points you'd like to discuss:
+1. 
+2. 
+
+## Additional Notes
+Any other information that might be helpful for reviewers:
+
@@ -20,4 +20,37 @@
   - local: unit1/6
     title: Submit your final project!
 
+- title: "2. Model Evaluation"
+  sections:
+  - local: unit2/1
+    title: Introduction to Model Evaluation
+  - local: unit2/2
+    title: vLLM Inference with Hugging Face Models
+  - local: unit2/3
+    title: Automatic Benchmarks
+  - local: unit2/4
+    title: Custom Domain Evaluation
+  - local: unit2/5
+    title: Submit your evaluation results!
+
+- title: "3. Preference Alignment"
+  sections:
+  - local: unit3/1
+    title: Introduction to Preference Alignment
+  - local: unit3/2
+    title: Direct Preference Optimization (DPO)
+  - local: unit3/3
+    title: Advanced DPO Techniques
+  - local: unit3/4
+    title: DPO Hands-on Implementation
 
+- title: "4. Vision Language Models"
+  sections:
+  - local: unit4/1
+    title: Introduction to Vision Language Models
+  - local: unit4/2
+    title: Using Pretrained VLMs
+  - local: unit4/3
+    title: Fine-Tuning VLMs
+  - local: unit4/4
+    title: Hands-On Fine-Tuning VLMs
@@ -26,7 +26,7 @@ This course is smol but fast! It's for software developers and engineers looking
 
 In this course, you will:
 
-📖 Study instruction tuning, supervised fine-tuning, preference alignment, evaluation, vision language models… and more!
+* 📖 Study instruction tuning, supervised fine-tuning, and preference alignment in theory and practice.
 * 🧑‍💻 Learn to use established fine-tuning frameworks and tools like TRL and Transformers.
 * 💾 Share your projects and explore fine-tuning applications created by the community.
 * 🏆 Participate in challenges where you will evaluate your fine-tuned models against other students.
 
@@ -526,7 +526,7 @@ If we dive into the out put below, we can see that the instruct model's hybrid r
 <details>
 <summary>Output</summary>
 
-````python output
+```python output
 
 === TESTING REASONING CAPABILITIES ===
 
@@ -710,7 +710,7 @@ Starting with the dollars: 18 dollars plus 12 dollars is 30 dollars. Then the ce
 
 --------------------------------------------------
 
-````
+```
 
 </details>
 
@@ -1067,7 +1067,7 @@ print("=== PREPARING DATASET ===\n")
 
 # Option 1: Use SmolTalk2 (recommended for beginners)
 dataset = load_dataset("HuggingFaceTB/smoltalk2", "SFT")
-train_dataset = dataset["train"].select(range(1000))  # Use subset for faster training
+train_dataset = dataset["smoltalk_everyday_convs_reasoning_Qwen3_32B_think"].select(range(1000))  # Use subset for faster training
 
 # Option 2: Use your own processed dataset from Exercise 2
 # train_dataset = gsm8k_formatted.select(range(500))
@@ -1089,7 +1089,7 @@ def format_chat_template(example):
         ]
 
     # Apply chat template
-    text = tokenizer.apply_chat_template(
+    text = instruct_tokenizer.apply_chat_template(
         messages, 
         tokenize=False,
         add_generation_prompt=False
@@ -1098,6 +1098,9 @@ def format_chat_template(example):
 
 # Apply formatting
 formatted_dataset = train_dataset.map(format_chat_template)
+formatted_dataset = formatted_dataset.remove_columns(
+    [col for col in formatted_dataset.column_names if col != "text"]
+)
 print(f"Formatted example: {formatted_dataset[0]['text'][:200]}...")
 ```
 
@@ -1203,7 +1206,7 @@ We instantiate the trainer, capture a pre-training baseline generation, launch `
 
 trainer = SFTTrainer(
     model=model,
-    train_dataset=dataset["train"],
+    train_dataset=formatted_dataset,
     args=config,
 )
 ```
@@ -1249,22 +1252,23 @@ In the previous exercises we've dived deep into using TRL's Python API for fine-
 
 We can define a command in TRL CLI to fine-tune a model. We'll be able to run it with `trl sft` command. The CLI command and Python API share the same configuration options.
 
+We preprocessed the `smoltalk_everyday_convs_reasoning_Qwen3_32B_think` subset of SmolTalk2 so that is easier to work with it when using the TRL CLI.
+
 ```bash
 # Fine-tune SmolLM3 using TRL CLI
 trl sft \
     --model_name_or_path HuggingFaceTB/SmolLM3-3B-Base \
-    --dataset_name HuggingFaceTB/smoltalk2 \
-    --dataset_config SFT \
+    --dataset_name HuggingFaceTB/smoltalk2_everyday_convs_think \
     --output_dir ./smollm3-sft-cli \
     --per_device_train_batch_size 4 \
     --gradient_accumulation_steps 2 \
     --learning_rate 5e-5 \
     --num_train_epochs 1 \
-    --max_seq_length 2048 \
+    --max_length 2048 \
     --logging_steps 10 \
     --save_steps 500 \
     --warmup_steps 100 \
-    --bf16 \
+    --bf16 True \
     --push_to_hub \
     --hub_model_id your-username/smollm3-sft-cli
 ```
@@ -1275,16 +1279,15 @@ For convenience and reproducibility, we can also create a configuration file to
 ```yaml
 # Model and dataset
 model_name_or_path: HuggingFaceTB/SmolLM3-3B-Base
-dataset_name: HuggingFaceTB/smoltalk2
-dataset_config: SFT
+dataset_name: HuggingFaceTB/smoltalk2_everyday_convs_think
 output_dir: ./smollm3-advanced-sft
 
 # Training hyperparameters
 per_device_train_batch_size: 2
 gradient_accumulation_steps: 4
 learning_rate: 3e-5
 num_train_epochs: 2
-max_seq_length: 4096
+max_length: 4096
 
 # Optimization
 warmup_steps: 200
@@ -1302,7 +1305,7 @@ remove_unused_columns: false
 logging_steps: 25
 eval_steps: 250
 save_steps: 500
-evaluation_strategy: steps
+eval_strategy: steps
 load_best_model_at_end: true
 metric_for_best_model: eval_loss
 
@@ -1324,7 +1327,7 @@ trl sft --config sft_config.yaml
 
 **If you get GPU out of memory errors:**
 - Reduce `per_device_train_batch_size` to 1
-- Reduce `max_seq_length` to 1024 or 512
+- Reduce `max_length` to 1024 or 512
 - Use `torch.cuda.empty_cache()` to clear GPU memory
 
 **If models fail to load:**
 
@@ -64,7 +64,7 @@ config = SFTConfig(
 # Train
 trainer = SFTTrainer(
     model=model,
-    train_dataset=dataset["train"],
+    train_dataset=dataset["smoltalk_everyday_convs_reasoning_Qwen3_32B_think"],
     args=config,
 )
 trainer.train()
@@ -116,8 +116,7 @@ hf jobs uv run \
     --secrets HF_TOKEN \
     "https://raw.githubusercontent.com/huggingface/trl/main/trl/scripts/sft.py" \
     --model_name_or_path HuggingFaceTB/SmolLM3-3B-Base \
-    --dataset_name HuggingFaceTB/smoltalk2 \
-    --dataset_config SFT \
+    --dataset_name HuggingFaceTB/smoltalk2_everyday_convs_think \
     --learning_rate 5e-5 \
     --per_device_train_batch_size 4 \
     --max_steps 1000 \
@@ -173,8 +172,7 @@ hf jobs uv run \
   --secrets HF_TOKEN \
   "https://raw.githubusercontent.com/huggingface/trl/main/trl/scripts/sft.py" \
   --model_name_or_path HuggingFaceTB/SmolLM3-3B-Base \
-  --dataset_name HuggingFaceTB/smoltalk2 \
-  --dataset_config SFT \
+  --dataset_name HuggingFaceTB/smoltalk2_everyday_convs_think \
   --output_dir smollm3-lora-sft-jobs \
   --per_device_train_batch_size 4 \
   --learning_rate 5e-5 \
@@ -221,7 +219,7 @@ Training typically takes 30-90 minutes for 1000 steps depending on hardware and
 **Out of Memory Errors**:
 - Reduce `per_device_train_batch_size`
 - Enable gradient checkpointing
-- Use smaller `max_seq_length`
+- Use smaller `max_length`
 
 **Timeout Issues**:
 - Increase timeout parameter