|
2 | 2 | "cells": [
|
3 | 3 | {
|
4 | 4 | "cell_type": "code",
|
5 |
| - "execution_count": 3, |
| 5 | + "execution_count": 8, |
6 | 6 | "metadata": {},
|
7 | 7 | "outputs": [],
|
8 | 8 | "source": [
|
9 | 9 | "import util\n",
|
| 10 | + "import os\n", |
| 11 | + "import math\n", |
| 12 | + "import subprocess\n", |
10 | 13 | "import numpy as np\n",
|
11 | 14 | "from tensorflow.keras.datasets import imdb\n",
|
12 | 15 | "(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=1000)"
|
|
54 | 57 | },
|
55 | 58 | {
|
56 | 59 | "cell_type": "code",
|
57 |
| - "execution_count": null, |
| 60 | + "execution_count": 6, |
58 | 61 | "metadata": {},
|
59 |
| - "outputs": [], |
| 62 | + "outputs": [ |
| 63 | + { |
| 64 | + "name": "stdout", |
| 65 | + "output_type": "stream", |
| 66 | + "text": [ |
| 67 | + "Downloading glove embeddings...\n" |
| 68 | + ] |
| 69 | + } |
| 70 | + ], |
60 | 71 | "source": [
|
61 | 72 | "# Load embeddings\n",
|
| 73 | + "if not os.path.exists(\"glove.6B.100d.txt\"):\n", |
| 74 | + " print(\"Downloading glove embeddings...\")\n", |
| 75 | + " subprocess.check_output(\n", |
| 76 | + " \"curl -OL http://nlp.stanford.edu/data/glove.6B.zip && unzip glove.6B.zip\", shell=True)\n", |
62 | 77 | "embeddings_index = dict()\n",
|
63 | 78 | "f = open('glove.6B.100d.txt')\n",
|
| 79 | + "print(\"Loading globe embeddings...\")\n", |
64 | 80 | "for line in f:\n",
|
65 | 81 | " values = line.split()\n",
|
66 | 82 | " word = values[0]\n",
|
|
71 | 87 | },
|
72 | 88 | {
|
73 | 89 | "cell_type": "code",
|
74 |
| - "execution_count": null, |
| 90 | + "execution_count": 17, |
75 | 91 | "metadata": {},
|
76 |
| - "outputs": [], |
| 92 | + "outputs": [ |
| 93 | + { |
| 94 | + "data": { |
| 95 | + "text/plain": [ |
| 96 | + "0.21388251764217375" |
| 97 | + ] |
| 98 | + }, |
| 99 | + "execution_count": 17, |
| 100 | + "metadata": {}, |
| 101 | + "output_type": "execute_result" |
| 102 | + } |
| 103 | + ], |
77 | 104 | "source": [
|
78 | 105 | "def cosine_sim(v1,v2):\n",
|
79 | 106 | " \"compute cosine similarity of v1 to v2: (v1 dot v2)/{||v1||*||v2||)\"\n",
|
|
90 | 117 | "car = embeddings_index[\"car\"]\n",
|
91 | 118 | "truck = embeddings_index[\"truck\"]\n",
|
92 | 119 | "plane = embeddings_index[\"plane\"]\n",
|
93 |
| - "cosine_sim(plane, book)" |
| 120 | + "cosine_sim(film, truck)" |
94 | 121 | ]
|
95 | 122 | },
|
96 | 123 | {
|
97 | 124 | "cell_type": "code",
|
98 |
| - "execution_count": 6, |
| 125 | + "execution_count": 19, |
99 | 126 | "metadata": {},
|
100 | 127 | "outputs": [
|
101 | 128 | {
|
102 |
| - "ename": "NameError", |
103 |
| - "evalue": "name 'embeddings_index' is not defined", |
104 |
| - "output_type": "error", |
105 |
| - "traceback": [ |
106 |
| - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", |
107 |
| - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", |
108 |
| - "\u001b[0;32m<ipython-input-6-5ea6505cec73>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0membeddings_index\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m14\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", |
109 |
| - "\u001b[0;31mNameError\u001b[0m: name 'embeddings_index' is not defined" |
110 |
| - ] |
| 129 | + "data": { |
| 130 | + "text/plain": [ |
| 131 | + "array([-1.9744e-01, 4.4831e-01, 1.3689e-01, -1.5595e-01, 9.3600e-01,\n", |
| 132 | + " 7.2986e-01, 3.4099e-01, -3.3896e-01, -8.9569e-02, -4.7706e-01,\n", |
| 133 | + " 3.5112e-01, -4.2198e-01, -1.2221e-01, -6.3375e-02, -4.5820e-01,\n", |
| 134 | + " 7.8723e-01, 9.4045e-01, 8.1101e-02, -2.3224e-01, 4.0778e-01,\n", |
| 135 | + " 3.3258e-01, -4.4458e-01, -4.7117e-01, 1.4852e-01, 9.6308e-01,\n", |
| 136 | + " -6.5267e-02, -5.3661e-02, -6.7474e-01, -4.2364e-01, 9.4392e-02,\n", |
| 137 | + " -3.8668e-01, 1.8237e-01, -1.2846e-01, -2.1952e-01, -5.8993e-01,\n", |
| 138 | + " 7.3602e-01, -2.4009e-01, 3.2392e-01, -2.4663e-01, -4.0684e-01,\n", |
| 139 | + " -5.2468e-01, 4.6174e-01, -1.4936e-01, -1.1999e-01, -1.3990e-01,\n", |
| 140 | + " -4.4944e-01, -2.6565e-01, -7.0061e-01, 3.0188e-01, -1.1209e-01,\n", |
| 141 | + " 6.6323e-01, 3.9698e-01, 6.9158e-01, 8.3442e-01, -5.2717e-01,\n", |
| 142 | + " -2.5314e+00, 1.3281e-01, 3.0253e-01, 1.1062e+00, 7.2221e-03,\n", |
| 143 | + " 2.6031e-01, 1.1584e+00, -7.9330e-02, -7.6659e-01, 1.2623e+00,\n", |
| 144 | + " -6.2071e-01, 5.9821e-01, 7.3539e-01, 3.8573e-01, -4.0293e-01,\n", |
| 145 | + " -3.1440e-02, 7.7863e-01, 3.1525e-01, 1.9003e-01, -6.5821e-01,\n", |
| 146 | + " 4.0548e-01, 5.3596e-03, 5.5274e-02, -1.2238e+00, -4.8912e-02,\n", |
| 147 | + " -3.0511e-01, 4.4473e-01, -3.3826e-01, -2.2133e-01, -1.3214e+00,\n", |
| 148 | + " -6.4761e-01, -4.4021e-01, -1.4910e+00, -2.2495e-02, 6.0346e-02,\n", |
| 149 | + " 1.4833e-01, 4.4162e-01, 7.9787e-01, -2.8076e-01, -2.9400e-02,\n", |
| 150 | + " -1.5656e-01, -1.2650e-01, -5.6968e-01, 1.5374e-03, 6.6600e-01],\n", |
| 151 | + " dtype=float32)" |
| 152 | + ] |
| 153 | + }, |
| 154 | + "execution_count": 19, |
| 155 | + "metadata": {}, |
| 156 | + "output_type": "execute_result" |
111 | 157 | }
|
112 | 158 | ],
|
113 | 159 | "source": [
|
114 |
| - "embeddings_index[14]" |
| 160 | + "embeddings_index[\"book\"]" |
115 | 161 | ]
|
116 | 162 | },
|
117 | 163 | {
|
|
0 commit comments