Skip to content

Commit bc32b50

Browse files
committed
clear notebook outputs
1 parent 77d00f0 commit bc32b50

File tree

2 files changed

+34
-178
lines changed

2 files changed

+34
-178
lines changed

labml_nn/transformers/LoRA/experiment.ipynb

Lines changed: 12 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,18 @@
11
{
22
"cells": [
33
{
4-
"metadata": {
5-
"ExecuteTime": {
6-
"end_time": "2024-07-31T12:22:57.496965Z",
7-
"start_time": "2024-07-31T12:22:55.151730Z"
8-
}
9-
},
4+
"metadata": {},
105
"cell_type": "code",
116
"source": [
127
"from labml_nn.transformers.LoRA.GPT2 import GPTModel\n",
138
"import torch"
149
],
1510
"id": "cffa3ec341b4905a",
1611
"outputs": [],
17-
"execution_count": 1
12+
"execution_count": null
1813
},
1914
{
20-
"metadata": {
21-
"ExecuteTime": {
22-
"end_time": "2024-07-31T12:22:57.986397Z",
23-
"start_time": "2024-07-31T12:22:57.498305Z"
24-
}
25-
},
15+
"metadata": {},
2616
"cell_type": "code",
2717
"source": [
2818
"from transformers import AutoTokenizer\n",
@@ -31,17 +21,13 @@
3121
],
3222
"id": "c2b0b7e18394ea9e",
3323
"outputs": [],
34-
"execution_count": 2
24+
"execution_count": null
3525
},
3626
{
3727
"cell_type": "code",
3828
"id": "initial_id",
3929
"metadata": {
40-
"collapsed": true,
41-
"ExecuteTime": {
42-
"end_time": "2024-07-31T12:22:58.562136Z",
43-
"start_time": "2024-07-31T12:22:57.987296Z"
44-
}
30+
"collapsed": true
4531
},
4632
"source": [
4733
"model = GPTModel()\n",
@@ -54,32 +40,11 @@
5440
"if unexpected_keys:\n",
5541
" print(f\"Unexpected keys: {unexpected_keys}\")"
5642
],
57-
"outputs": [
58-
{
59-
"name": "stderr",
60-
"output_type": "stream",
61-
"text": [
62-
"/tmp/ipykernel_7130/2581223434.py:3: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n",
63-
" state_dict = torch.load('transformed.pth')\n"
64-
]
65-
},
66-
{
67-
"name": "stdout",
68-
"output_type": "stream",
69-
"text": [
70-
"Missing keys: ['token_embedding.lora_a', 'token_embedding.lora_b', 'position_embedding.lora_a', 'position_embedding.lora_b', 'blocks.0.attn.c_att.lora_a', 'blocks.0.attn.c_att.lora_b', 'blocks.0.attn.c_proj.lora_a', 'blocks.0.attn.c_proj.lora_b', 'blocks.0.ffn.c_fc.lora_a', 'blocks.0.ffn.c_fc.lora_b', 'blocks.0.ffn.c_proj.lora_a', 'blocks.0.ffn.c_proj.lora_b', 'blocks.1.attn.c_att.lora_a', 'blocks.1.attn.c_att.lora_b', 'blocks.1.attn.c_proj.lora_a', 'blocks.1.attn.c_proj.lora_b', 'blocks.1.ffn.c_fc.lora_a', 'blocks.1.ffn.c_fc.lora_b', 'blocks.1.ffn.c_proj.lora_a', 'blocks.1.ffn.c_proj.lora_b', 'blocks.2.attn.c_att.lora_a', 'blocks.2.attn.c_att.lora_b', 'blocks.2.attn.c_proj.lora_a', 'blocks.2.attn.c_proj.lora_b', 'blocks.2.ffn.c_fc.lora_a', 'blocks.2.ffn.c_fc.lora_b', 'blocks.2.ffn.c_proj.lora_a', 'blocks.2.ffn.c_proj.lora_b', 'blocks.3.attn.c_att.lora_a', 'blocks.3.attn.c_att.lora_b', 'blocks.3.attn.c_proj.lora_a', 'blocks.3.attn.c_proj.lora_b', 'blocks.3.ffn.c_fc.lora_a', 'blocks.3.ffn.c_fc.lora_b', 'blocks.3.ffn.c_proj.lora_a', 'blocks.3.ffn.c_proj.lora_b', 'blocks.4.attn.c_att.lora_a', 'blocks.4.attn.c_att.lora_b', 'blocks.4.attn.c_proj.lora_a', 'blocks.4.attn.c_proj.lora_b', 'blocks.4.ffn.c_fc.lora_a', 'blocks.4.ffn.c_fc.lora_b', 'blocks.4.ffn.c_proj.lora_a', 'blocks.4.ffn.c_proj.lora_b', 'blocks.5.attn.c_att.lora_a', 'blocks.5.attn.c_att.lora_b', 'blocks.5.attn.c_proj.lora_a', 'blocks.5.attn.c_proj.lora_b', 'blocks.5.ffn.c_fc.lora_a', 'blocks.5.ffn.c_fc.lora_b', 'blocks.5.ffn.c_proj.lora_a', 'blocks.5.ffn.c_proj.lora_b', 'blocks.6.attn.c_att.lora_a', 'blocks.6.attn.c_att.lora_b', 'blocks.6.attn.c_proj.lora_a', 'blocks.6.attn.c_proj.lora_b', 'blocks.6.ffn.c_fc.lora_a', 'blocks.6.ffn.c_fc.lora_b', 'blocks.6.ffn.c_proj.lora_a', 'blocks.6.ffn.c_proj.lora_b', 'blocks.7.attn.c_att.lora_a', 'blocks.7.attn.c_att.lora_b', 'blocks.7.attn.c_proj.lora_a', 'blocks.7.attn.c_proj.lora_b', 'blocks.7.ffn.c_fc.lora_a', 'blocks.7.ffn.c_fc.lora_b', 'blocks.7.ffn.c_proj.lora_a', 'blocks.7.ffn.c_proj.lora_b', 'blocks.8.attn.c_att.lora_a', 'blocks.8.attn.c_att.lora_b', 'blocks.8.attn.c_proj.lora_a', 'blocks.8.attn.c_proj.lora_b', 'blocks.8.ffn.c_fc.lora_a', 'blocks.8.ffn.c_fc.lora_b', 'blocks.8.ffn.c_proj.lora_a', 'blocks.8.ffn.c_proj.lora_b', 'blocks.9.attn.c_att.lora_a', 'blocks.9.attn.c_att.lora_b', 'blocks.9.attn.c_proj.lora_a', 'blocks.9.attn.c_proj.lora_b', 'blocks.9.ffn.c_fc.lora_a', 'blocks.9.ffn.c_fc.lora_b', 'blocks.9.ffn.c_proj.lora_a', 'blocks.9.ffn.c_proj.lora_b', 'blocks.10.attn.c_att.lora_a', 'blocks.10.attn.c_att.lora_b', 'blocks.10.attn.c_proj.lora_a', 'blocks.10.attn.c_proj.lora_b', 'blocks.10.ffn.c_fc.lora_a', 'blocks.10.ffn.c_fc.lora_b', 'blocks.10.ffn.c_proj.lora_a', 'blocks.10.ffn.c_proj.lora_b', 'blocks.11.attn.c_att.lora_a', 'blocks.11.attn.c_att.lora_b', 'blocks.11.attn.c_proj.lora_a', 'blocks.11.attn.c_proj.lora_b', 'blocks.11.ffn.c_fc.lora_a', 'blocks.11.ffn.c_fc.lora_b', 'blocks.11.ffn.c_proj.lora_a', 'blocks.11.ffn.c_proj.lora_b', 'lm_head.lora_a', 'lm_head.lora_b']\n"
71-
]
72-
}
73-
],
74-
"execution_count": 3
43+
"outputs": [],
44+
"execution_count": null
7545
},
7646
{
77-
"metadata": {
78-
"ExecuteTime": {
79-
"end_time": "2024-07-31T12:23:00.447976Z",
80-
"start_time": "2024-07-31T12:22:58.566527Z"
81-
}
82-
},
47+
"metadata": {},
8348
"cell_type": "code",
8449
"source": [
8550
"prompt = \"hello how are you\"\n",
@@ -96,32 +61,16 @@
9661
" print(tokenizer.decode(id))"
9762
],
9863
"id": "f4f7826ec3729b66",
99-
"outputs": [
100-
{
101-
"name": "stdout",
102-
"output_type": "stream",
103-
"text": [
104-
",\n",
105-
" to\n",
106-
" you\n",
107-
" doing\n"
108-
]
109-
}
110-
],
111-
"execution_count": 4
64+
"outputs": [],
65+
"execution_count": null
11266
},
11367
{
114-
"metadata": {
115-
"ExecuteTime": {
116-
"end_time": "2024-07-31T12:23:00.452060Z",
117-
"start_time": "2024-07-31T12:23:00.448904Z"
118-
}
119-
},
68+
"metadata": {},
12069
"cell_type": "code",
12170
"source": "",
12271
"id": "c12776360008a974",
12372
"outputs": [],
124-
"execution_count": 4
73+
"execution_count": null
12574
}
12675
],
12776
"metadata": {

labml_nn/transformers/LoRA/train.ipynb

Lines changed: 22 additions & 115 deletions
Original file line numberDiff line numberDiff line change
@@ -7,117 +7,75 @@
77
"collapsed": true,
88
"jupyter": {
99
"outputs_hidden": true
10-
},
11-
"ExecuteTime": {
12-
"end_time": "2024-07-31T12:57:37.296030Z",
13-
"start_time": "2024-07-31T12:57:37.292368Z"
1410
}
1511
},
1612
"source": "# !wget https://raw.github/zusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt",
1713
"outputs": [],
18-
"execution_count": 1
14+
"execution_count": null
1915
},
2016
{
2117
"cell_type": "code",
2218
"id": "3b1e507015ba6b81",
23-
"metadata": {
24-
"ExecuteTime": {
25-
"end_time": "2024-07-31T12:57:37.317651Z",
26-
"start_time": "2024-07-31T12:57:37.313808Z"
27-
}
28-
},
19+
"metadata": {},
2920
"source": [
3021
"with open('input.txt', 'r', encoding='utf-8') as f:\n",
3122
" text = f.read()"
3223
],
3324
"outputs": [],
34-
"execution_count": 2
25+
"execution_count": null
3526
},
3627
{
3728
"cell_type": "code",
3829
"id": "ac8e51ae5bbfcae7",
39-
"metadata": {
40-
"ExecuteTime": {
41-
"end_time": "2024-07-31T12:57:40.488939Z",
42-
"start_time": "2024-07-31T12:57:37.319486Z"
43-
}
44-
},
30+
"metadata": {},
4531
"source": [
4632
"from transformers import AutoTokenizer\n",
4733
"\n",
4834
"tokenizer = AutoTokenizer.from_pretrained(\"gpt2\")\n",
4935
"\n",
5036
"tokens = tokenizer.encode(text, add_special_tokens=False)"
5137
],
52-
"outputs": [
53-
{
54-
"name": "stderr",
55-
"output_type": "stream",
56-
"text": [
57-
"Token indices sequence length is longer than the specified maximum sequence length for this model (338025 > 1024). Running this sequence through the model will result in indexing errors\n"
58-
]
59-
}
60-
],
61-
"execution_count": 3
38+
"outputs": [],
39+
"execution_count": null
6240
},
6341
{
6442
"cell_type": "code",
6543
"id": "aeefcdf813e427e",
66-
"metadata": {
67-
"ExecuteTime": {
68-
"end_time": "2024-07-31T12:57:40.495510Z",
69-
"start_time": "2024-07-31T12:57:40.490341Z"
70-
}
71-
},
44+
"metadata": {},
7245
"source": [
7346
"context_length = 512\n",
7447
"batch_size = 2"
7548
],
7649
"outputs": [],
77-
"execution_count": 4
50+
"execution_count": null
7851
},
7952
{
8053
"cell_type": "code",
8154
"id": "a384b42274f008a2",
82-
"metadata": {
83-
"ExecuteTime": {
84-
"end_time": "2024-07-31T12:57:40.522050Z",
85-
"start_time": "2024-07-31T12:57:40.496842Z"
86-
}
87-
},
55+
"metadata": {},
8856
"source": [
8957
"num_batches = len(tokens) // (batch_size * context_length)\n",
9058
"tokens = tokens[:num_batches * batch_size * context_length]"
9159
],
9260
"outputs": [],
93-
"execution_count": 5
61+
"execution_count": null
9462
},
9563
{
9664
"cell_type": "code",
9765
"id": "5c4cc78ac1a02c1d",
98-
"metadata": {
99-
"ExecuteTime": {
100-
"end_time": "2024-07-31T12:57:40.592272Z",
101-
"start_time": "2024-07-31T12:57:40.524063Z"
102-
}
103-
},
66+
"metadata": {},
10467
"source": [
10568
"import torch\n",
10669
"\n",
10770
"input_ids = torch.tensor(tokens).view(-1, context_length)"
10871
],
10972
"outputs": [],
110-
"execution_count": 6
73+
"execution_count": null
11174
},
11275
{
11376
"cell_type": "code",
11477
"id": "7037fd75e2161382",
115-
"metadata": {
116-
"ExecuteTime": {
117-
"end_time": "2024-07-31T12:57:40.601199Z",
118-
"start_time": "2024-07-31T12:57:40.593250Z"
119-
}
120-
},
78+
"metadata": {},
12179
"source": [
12280
"from torch.utils.data import DataLoader, TensorDataset\n",
12381
"from torch.optim import Adam\n",
@@ -137,17 +95,12 @@
13795
"test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)"
13896
],
13997
"outputs": [],
140-
"execution_count": 7
98+
"execution_count": null
14199
},
142100
{
143101
"cell_type": "code",
144102
"id": "a98b7baa064b8494",
145-
"metadata": {
146-
"ExecuteTime": {
147-
"end_time": "2024-07-31T12:57:41.577878Z",
148-
"start_time": "2024-07-31T12:57:40.602187Z"
149-
}
150-
},
103+
"metadata": {},
151104
"source": [
152105
"from labml_nn.transformers.LoRA.GPT2 import GPTModel\n",
153106
"\n",
@@ -157,33 +110,23 @@
157110
"_ = model.load_state_dict(state_dict, strict=False)"
158111
],
159112
"outputs": [],
160-
"execution_count": 8
113+
"execution_count": null
161114
},
162115
{
163-
"metadata": {
164-
"ExecuteTime": {
165-
"end_time": "2024-07-31T12:57:43.098187Z",
166-
"start_time": "2024-07-31T12:57:41.578713Z"
167-
}
168-
},
116+
"metadata": {},
169117
"cell_type": "code",
170118
"source": [
171119
"device = \"cuda\"\n",
172120
"model = model.to(device=\"cuda\")"
173121
],
174122
"id": "2e0fa8b3082df716",
175123
"outputs": [],
176-
"execution_count": 9
124+
"execution_count": null
177125
},
178126
{
179127
"cell_type": "code",
180128
"id": "e2f5076894770740",
181-
"metadata": {
182-
"ExecuteTime": {
183-
"end_time": "2024-07-31T12:57:57.044755Z",
184-
"start_time": "2024-07-31T12:57:43.099050Z"
185-
}
186-
},
129+
"metadata": {},
187130
"source": [
188131
"from labml import tracker, experiment\n",
189132
"\n",
@@ -236,49 +179,13 @@
236179
"\n",
237180
"print(\"Training complete.\")"
238181
],
239-
"outputs": [
240-
{
241-
"data": {
242-
"text/plain": [
243-
"<IPython.core.display.HTML object>"
244-
],
245-
"text/html": [
246-
"<pre style=\"overflow-x: scroll;\">\n",
247-
"<strong><span style=\"text-decoration: underline\">LoRA.GPT2</span></strong>: <span style=\"color: #208FFB\">7a14822c4f3c11efad8354ef33f17c7c</span>\n",
248-
"\t[dirty]: <strong><span style=\"color: #DDB62B\">\"training loop\"</span></strong>\n",
249-
"<span style=\"color: #208FFB\">Monitor experiment at </span><a href='http://localhost:5005/run/7a14822c4f3c11efad8354ef33f17c7c' target='blank'>http://localhost:5005/run/7a14822c4f3c11efad8354ef33f17c7c</a>\n",
250-
"<strong><span style=\"color: #DDB62B\">Still updating labml server, please wait for it to complete...</span></strong></pre>"
251-
]
252-
},
253-
"metadata": {},
254-
"output_type": "display_data"
255-
},
256-
{
257-
"ename": "KeyboardInterrupt",
258-
"evalue": "",
259-
"output_type": "error",
260-
"traceback": [
261-
"\u001B[0;31m---------------------------------------------------------------------------\u001B[0m",
262-
"\u001B[0;31mKeyboardInterrupt\u001B[0m Traceback (most recent call last)",
263-
"Cell \u001B[0;32mIn[10], line 25\u001B[0m\n\u001B[1;32m 22\u001B[0m loss \u001B[38;5;241m=\u001B[39m criterion(shift_logits\u001B[38;5;241m.\u001B[39mreshape(\u001B[38;5;241m-\u001B[39m\u001B[38;5;241m1\u001B[39m, shift_logits\u001B[38;5;241m.\u001B[39msize(\u001B[38;5;241m-\u001B[39m\u001B[38;5;241m1\u001B[39m)), shift_labels\u001B[38;5;241m.\u001B[39mreshape(\u001B[38;5;241m-\u001B[39m\u001B[38;5;241m1\u001B[39m))\n\u001B[1;32m 24\u001B[0m optimizer\u001B[38;5;241m.\u001B[39mzero_grad()\n\u001B[0;32m---> 25\u001B[0m loss\u001B[38;5;241m.\u001B[39mbackward()\n\u001B[1;32m 26\u001B[0m optimizer\u001B[38;5;241m.\u001B[39mstep()\n\u001B[1;32m 28\u001B[0m tracker\u001B[38;5;241m.\u001B[39msave(step, {\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mloss\u001B[39m\u001B[38;5;124m'\u001B[39m: loss})\n",
264-
"File \u001B[0;32m~/miniconda3/lib/python3.12/site-packages/torch/_tensor.py:521\u001B[0m, in \u001B[0;36mTensor.backward\u001B[0;34m(self, gradient, retain_graph, create_graph, inputs)\u001B[0m\n\u001B[1;32m 511\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m has_torch_function_unary(\u001B[38;5;28mself\u001B[39m):\n\u001B[1;32m 512\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m handle_torch_function(\n\u001B[1;32m 513\u001B[0m Tensor\u001B[38;5;241m.\u001B[39mbackward,\n\u001B[1;32m 514\u001B[0m (\u001B[38;5;28mself\u001B[39m,),\n\u001B[0;32m (...)\u001B[0m\n\u001B[1;32m 519\u001B[0m inputs\u001B[38;5;241m=\u001B[39minputs,\n\u001B[1;32m 520\u001B[0m )\n\u001B[0;32m--> 521\u001B[0m torch\u001B[38;5;241m.\u001B[39mautograd\u001B[38;5;241m.\u001B[39mbackward(\n\u001B[1;32m 522\u001B[0m \u001B[38;5;28mself\u001B[39m, gradient, retain_graph, create_graph, inputs\u001B[38;5;241m=\u001B[39minputs\n\u001B[1;32m 523\u001B[0m )\n",
265-
"File \u001B[0;32m~/miniconda3/lib/python3.12/site-packages/torch/autograd/__init__.py:289\u001B[0m, in \u001B[0;36mbackward\u001B[0;34m(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)\u001B[0m\n\u001B[1;32m 284\u001B[0m retain_graph \u001B[38;5;241m=\u001B[39m create_graph\n\u001B[1;32m 286\u001B[0m \u001B[38;5;66;03m# The reason we repeat the same comment below is that\u001B[39;00m\n\u001B[1;32m 287\u001B[0m \u001B[38;5;66;03m# some Python versions print out the first line of a multi-line function\u001B[39;00m\n\u001B[1;32m 288\u001B[0m \u001B[38;5;66;03m# calls in the traceback and some print out the last line\u001B[39;00m\n\u001B[0;32m--> 289\u001B[0m _engine_run_backward(\n\u001B[1;32m 290\u001B[0m tensors,\n\u001B[1;32m 291\u001B[0m grad_tensors_,\n\u001B[1;32m 292\u001B[0m retain_graph,\n\u001B[1;32m 293\u001B[0m create_graph,\n\u001B[1;32m 294\u001B[0m inputs,\n\u001B[1;32m 295\u001B[0m allow_unreachable\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mTrue\u001B[39;00m,\n\u001B[1;32m 296\u001B[0m accumulate_grad\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mTrue\u001B[39;00m,\n\u001B[1;32m 297\u001B[0m )\n",
266-
"File \u001B[0;32m~/miniconda3/lib/python3.12/site-packages/torch/autograd/graph.py:768\u001B[0m, in \u001B[0;36m_engine_run_backward\u001B[0;34m(t_outputs, *args, **kwargs)\u001B[0m\n\u001B[1;32m 766\u001B[0m unregister_hooks \u001B[38;5;241m=\u001B[39m _register_logging_hooks_on_whole_graph(t_outputs)\n\u001B[1;32m 767\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[0;32m--> 768\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m Variable\u001B[38;5;241m.\u001B[39m_execution_engine\u001B[38;5;241m.\u001B[39mrun_backward( \u001B[38;5;66;03m# Calls into the C++ engine to run the backward pass\u001B[39;00m\n\u001B[1;32m 769\u001B[0m t_outputs, \u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs\n\u001B[1;32m 770\u001B[0m ) \u001B[38;5;66;03m# Calls into the C++ engine to run the backward pass\u001B[39;00m\n\u001B[1;32m 771\u001B[0m \u001B[38;5;28;01mfinally\u001B[39;00m:\n\u001B[1;32m 772\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m attach_logging_hooks:\n",
267-
"\u001B[0;31mKeyboardInterrupt\u001B[0m: "
268-
]
269-
}
270-
],
271-
"execution_count": 10
182+
"outputs": [],
183+
"execution_count": null
272184
},
273185
{
274186
"cell_type": "code",
275187
"id": "da2d4023002648dc",
276-
"metadata": {
277-
"ExecuteTime": {
278-
"end_time": "2024-07-31T12:57:57.046254Z",
279-
"start_time": "2024-07-31T12:57:57.045954Z"
280-
}
281-
},
188+
"metadata": {},
282189
"source": [],
283190
"outputs": [],
284191
"execution_count": null

0 commit comments

Comments
 (0)