Add sort_key lambda to make training loop work!

falloutdurham · falloutdurham · commit 28c49340411d · 2019-11-10T10:47:15.000-05:00
diff --git a/chapter5/Chapter 5.ipynb b/chapter5/Chapter 5.ipynb
@@ -9,12 +9,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 37,
    "metadata": {},
    "outputs": [],
    "source": [
     "import torch \n",
     "import torch.nn as nn\n",
+    "import torch.nn.functional as F\n",
+    "import torch.optim as optim\n",
     "import numpy as np\n",
     "from torchtext import data \n",
     "import torchtext\n",
@@ -30,13 +32,21 @@
     "## Loading & Data Cleaning"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "device = \"cuda\"\n"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
-    "device = \"cuda\"\n",
     "# You'll probably need to use the 'python' engine to load the CSV\n",
     "# tweetsDF = pd.read_csv(\"training.1600000.processed.noemoticon.csv\", header=None)\n",
     "tweetsDF = pd.read_csv(\"training.1600000.processed.noemoticon.csv\", \n",
@@ -66,12 +76,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 38,
    "metadata": {},
    "outputs": [],
    "source": [
     "LABEL = data.LabelField()\n",
-    "TWEET = data.Field(tokenize='spacy', lower=true)\n",
+    "TWEET = data.Field(tokenize='spacy', lower=True)\n",
     "\n",
     "fields = [('score',None), ('id',None),('date',None),('query',None),\n",
     "      ('name',None),\n",
@@ -87,7 +97,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 39,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -100,20 +110,51 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 40,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(6000, 2000, 2000)"
+      ]
+     },
+     "execution_count": 40,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "(train, test, valid) = twitterDataset.split(split_ratio=[0.8,0.1,0.1])\n",
+    "(train, test, valid)=twitterDataset.split(split_ratio=[0.6,0.2,0.2],stratified=True, strata_field='label')\n",
     "\n",
     "(len(train),len(test),len(valid))"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 41,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[('i', 3742),\n",
+       " ('!', 3315),\n",
+       " ('.', 3084),\n",
+       " (' ', 2175),\n",
+       " ('to', 2115),\n",
+       " ('the', 2022),\n",
+       " (',', 1823),\n",
+       " ('a', 1461),\n",
+       " ('my', 1205),\n",
+       " ('it', 1197)]"
+      ]
+     },
+     "execution_count": 41,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "vocab_size = 20000\n",
     "TWEET.build_vocab(train, max_size = vocab_size)\n",
@@ -123,14 +164,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 42,
    "metadata": {},
    "outputs": [],
    "source": [
     "train_iterator, valid_iterator, test_iterator = data.BucketIterator.splits(\n",
     "(train, valid, test), \n",
     "batch_size = 32,\n",
-    "device = device)"
+    "device = device,\n",
+    "sort_key = lambda x: len(x.tweet),\n",
+    "sort_within_batch = False)"
    ]
   },
   {
@@ -142,9 +185,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 43,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "OurFirstLSTM(\n",
+       "  (embedding): Embedding(20002, 300)\n",
+       "  (encoder): LSTM(300, 100)\n",
+       "  (predictor): Linear(in_features=100, out_features=2, bias=True)\n",
+       ")"
+      ]
+     },
+     "execution_count": 43,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "class OurFirstLSTM(nn.Module):\n",
     "    def __init__(self, hidden_size, embedding_dim, vocab_size):\n",
@@ -173,7 +231,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 44,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -187,7 +245,7 @@
     "        valid_loss = 0.0\n",
     "        model.train()\n",
     "        for batch_idx, batch in enumerate(train_iterator):\n",
-    "            opt.zero_grad()\n",
+    "            optimizer.zero_grad()\n",
     "            predict = model(batch.tweet)\n",
     "            loss = criterion(predict,batch.label)\n",
     "            loss.backward()\n",
@@ -203,8 +261,28 @@
     "            valid_loss += loss.data.item() * batch.tweet.size(0)\n",
     " \n",
     "        valid_loss /= len(valid_iterator)\n",
-    "        print('Epoch: {}, Training Loss: {:.2f}, \n",
-    "        Validation Loss: {:.2f}'.format(epoch, training_loss, valid_loss))"
+    "        print('Epoch: {}, Training Loss: {:.2f}, Validation Loss: {:.2f}'.format(epoch, training_loss, valid_loss))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch: 1, Training Loss: 24.47, Validation Loss: 14.04\n",
+      "Epoch: 2, Training Loss: 23.81, Validation Loss: 14.57\n",
+      "Epoch: 3, Training Loss: 23.25, Validation Loss: 15.69\n",
+      "Epoch: 4, Training Loss: 23.12, Validation Loss: 16.16\n",
+      "Epoch: 5, Training Loss: 21.71, Validation Loss: 18.80\n"
+     ]
+    }
+   ],
+   "source": [
+    "train(5, model, optimizer, criterion, train_iterator, valid_iterator)        "
    ]
   },
   {
@@ -219,6 +297,13 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 46,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "def classify_tweet(tweet):\n",
     "    categories = {0: \"Negative\", 1:\"Positive\"}\n",