ScottMcCormack
diff --git a/‎chapter_4/0401_tf-idf_Gensim.ipynb‎
Lines changed: 51 additions & 55 deletions b/‎chapter_4/0401_tf-idf_Gensim.ipynb‎
Lines changed: 51 additions & 55 deletions
@@ -4,7 +4,7 @@
    "cell_type": "code",
    "execution_count": 1,
    "metadata": {
-    "collapsed": true
+    "tags": []
    },
    "outputs": [],
    "source": [
@@ -15,7 +15,7 @@
    "cell_type": "code",
    "execution_count": 2,
    "metadata": {
-    "collapsed": true
+    "tags": []
    },
    "outputs": [],
    "source": [
@@ -31,7 +31,7 @@
    "cell_type": "code",
    "execution_count": 3,
    "metadata": {
-    "collapsed": true
+    "tags": []
    },
    "outputs": [],
    "source": [
@@ -43,7 +43,7 @@
    "cell_type": "code",
    "execution_count": 4,
    "metadata": {
-    "collapsed": true
+    "tags": []
    },
    "outputs": [],
    "source": [
@@ -82,38 +82,38 @@
      "output_type": "stream",
      "text": [
       "Num words in dictionary: 32\n",
-      "27 This\n",
-      "20 That\n",
-      "21 as\n",
-      "8 ran\n",
+      "0 .\n",
+      "1 I\n",
+      "2 love\n",
+      "3 tacos\n",
+      "4 She\n",
+      "5 chicken\n",
+      "6 ran\n",
       "7 the\n",
+      "8 with\n",
+      "9 The\n",
+      "10 a\n",
+      "11 choose\n",
+      "12 chooses\n",
+      "13 do\n",
       "14 me\n",
-      "15 do\n",
-      "12 The\n",
-      "28 pizza\n",
+      "15 n't\n",
+      "16 nap\n",
+      "17 take\n",
+      "18 to\n",
+      "19 That\n",
+      "20 as\n",
+      "21 cream\n",
+      "22 ice\n",
       "23 is\n",
-      "11 n't\n",
-      "0 love\n",
-      "29 affront\n",
-      "17 to\n",
-      "1 .\n",
-      "25 ice\n",
-      "18 chooses\n",
-      "19 nice\n",
-      "9 choose\n",
-      "6 She\n",
-      "24 cream\n",
-      "22 man\n",
-      "16 a\n",
-      "2 tacos\n",
-      "3 I\n",
-      "4 chicken\n",
-      "5 with\n",
-      "10 nap\n",
+      "24 man\n",
+      "25 nice\n",
+      "26 pie\n",
+      "27 This\n",
+      "28 affront\n",
+      "29 an\n",
       "30 nature\n",
-      "13 take\n",
-      "31 an\n",
-      "26 pie\n"
+      "31 pizza\n"
      ]
     }
    ],
@@ -155,7 +155,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "8\n"
+      "6\n"
      ]
     }
    ],
@@ -172,7 +172,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[(0, 3), (2, 1), (3, 1)]\n"
+      "[(1, 1), (2, 3), (3, 1)]\n"
      ]
     }
    ],
@@ -194,7 +194,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[[(0, 1), (1, 1), (2, 1), (3, 1)], [(1, 1), (4, 1), (5, 1), (6, 1), (7, 1), (8, 1)], [(1, 2), (3, 1), (9, 1), (10, 2), (11, 1), (12, 1), (13, 1), (14, 1), (15, 1), (16, 1), (17, 1), (18, 1)], [(1, 1), (5, 1), (19, 1), (20, 1), (21, 1), (22, 1), (23, 1), (24, 1), (25, 1), (26, 1)], [(1, 1), (17, 1), (23, 1), (27, 1), (28, 1), (29, 1), (30, 1), (31, 1)]]\n"
+      "[[(0, 1), (1, 1), (2, 1), (3, 1)], [(0, 1), (4, 1), (5, 1), (6, 1), (7, 1), (8, 1)], [(0, 2), (1, 1), (9, 1), (10, 1), (11, 1), (12, 1), (13, 1), (14, 1), (15, 1), (16, 2), (17, 1), (18, 1)], [(0, 1), (8, 1), (19, 1), (20, 1), (21, 1), (22, 1), (23, 1), (24, 1), (25, 1), (26, 1)], [(0, 1), (18, 1), (23, 1), (27, 1), (28, 1), (29, 1), (30, 1), (31, 1)]]\n"
      ]
     }
    ],
@@ -236,7 +236,7 @@
      "text": [
       "['I', 'love', 'tacos', '.']\n",
       "[(0, 1), (1, 1), (2, 1), (3, 1)]\n",
-      "[(0, 0.6559486886294514), (2, 0.6559486886294514), (3, 0.37344696513776354)]\n"
+      "[(1, 0.37344696513776354), (2, 0.6559486886294514), (3, 0.6559486886294514)]\n"
      ]
     }
    ],
@@ -260,8 +260,8 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[(0, 1), (1, 1), (3, 1), (28, 1)]\n",
-      "[(0, 0.6559486886294514), (3, 0.37344696513776354), (28, 0.6559486886294514)]\n"
+      "[(0, 1), (1, 1), (2, 1), (31, 1)]\n",
+      "[(1, 0.37344696513776354), (2, 0.6559486886294514), (31, 0.6559486886294514)]\n"
      ]
     }
    ],
@@ -282,15 +282,19 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Similarity index with 5 documents in 0 shards (stored under ~/Documents/nlp-python/similarity)\n"
+      "Similarity index with 5 documents in 0 shards (stored under output/similarity)\n"
      ]
     }
    ],
    "source": [
     "# Create similarity measure object in tf-idf space\n",
     "# First arg is temp external storage\n",
     "# https://radimrehurek.com/gensim/similarities/docsim.html\n",
-    "sims = gensim.similarities.Similarity('~/Documents/nlp-python/similarity', tf_idf[corpus],\n",
+    "import os\n",
+    "os.makedirs('output', exist_ok=True)\n",
+    "\n",
+    "output_obj = os.path.join('output', 'similarity')\n",
+    "sims = gensim.similarities.Similarity(output_obj, tf_idf[corpus],\n",
     "                                     num_features=len(dictionary))\n",
     "print(sims)"
    ]
@@ -305,8 +309,8 @@
      "output_type": "stream",
      "text": [
       "['chicken', 'with', 'taco', 'love']\n",
-      "[(0, 1), (4, 1), (5, 1)]\n",
-      "[(0, 0.6559486886294514), (4, 0.6559486886294514), (5, 0.37344696513776354)]\n"
+      "[(2, 1), (5, 1), (8, 1)]\n",
+      "[(2, 0.6559486886294514), (5, 0.6559486886294514), (8, 0.37344696513776354)]\n"
      ]
     }
    ],
@@ -328,7 +332,8 @@
     {
      "data": {
       "text/plain": [
-       "array([ 0.4302687 ,  0.41768694,  0.        ,  0.07687882,  0.        ], dtype=float32)"
+       "array([0.4302687 , 0.41768694, 0.        , 0.07687882, 0.        ],\n",
+       "      dtype=float32)"
       ]
      },
      "execution_count": 16,
@@ -339,20 +344,11 @@
    "source": [
     "sims[query_doc_tf_idf]"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "collapsed": true
-   },
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -366,9 +362,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.5.2"
+   "version": "3.8.10"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }