Skip to content

Commit 1a32d17

Browse files
Updates to Chapter 5 notebooks
1 parent e4e1a38 commit 1a32d17

File tree

3 files changed

+50
-66
lines changed

3 files changed

+50
-66
lines changed

chapter_5/0501_Dependency_Parsing.ipynb

Lines changed: 32 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,12 @@
44
"cell_type": "code",
55
"execution_count": 1,
66
"metadata": {
7-
"collapsed": true
7+
"tags": []
88
},
99
"outputs": [],
1010
"source": [
11-
"from spacy.en import English\n",
12-
"parser = English()"
11+
"import spacy\n",
12+
"parser = spacy.load(\"en_core_web_sm\")"
1313
]
1414
},
1515
{
@@ -20,7 +20,8 @@
2020
{
2121
"data": {
2222
"text/plain": [
23-
"['__bytes__',\n",
23+
"['_',\n",
24+
" '__bytes__',\n",
2425
" '__class__',\n",
2526
" '__delattr__',\n",
2627
" '__dir__',\n",
@@ -32,6 +33,7 @@
3233
" '__gt__',\n",
3334
" '__hash__',\n",
3435
" '__init__',\n",
36+
" '__init_subclass__',\n",
3537
" '__le__',\n",
3638
" '__len__',\n",
3739
" '__lt__',\n",
@@ -58,40 +60,52 @@
5860
" 'ent_id_',\n",
5961
" 'ent_iob',\n",
6062
" 'ent_iob_',\n",
63+
" 'ent_kb_id',\n",
64+
" 'ent_kb_id_',\n",
6165
" 'ent_type',\n",
6266
" 'ent_type_',\n",
63-
" 'has_repvec',\n",
67+
" 'get_extension',\n",
68+
" 'has_dep',\n",
69+
" 'has_extension',\n",
70+
" 'has_head',\n",
71+
" 'has_morph',\n",
6472
" 'has_vector',\n",
6573
" 'head',\n",
6674
" 'i',\n",
6775
" 'idx',\n",
76+
" 'iob_strings',\n",
6877
" 'is_alpha',\n",
6978
" 'is_ancestor',\n",
70-
" 'is_ancestor_of',\n",
7179
" 'is_ascii',\n",
7280
" 'is_bracket',\n",
81+
" 'is_currency',\n",
7382
" 'is_digit',\n",
7483
" 'is_left_punct',\n",
7584
" 'is_lower',\n",
7685
" 'is_oov',\n",
7786
" 'is_punct',\n",
7887
" 'is_quote',\n",
7988
" 'is_right_punct',\n",
89+
" 'is_sent_end',\n",
90+
" 'is_sent_start',\n",
8091
" 'is_space',\n",
8192
" 'is_stop',\n",
8293
" 'is_title',\n",
94+
" 'is_upper',\n",
8395
" 'lang',\n",
8496
" 'lang_',\n",
8597
" 'left_edge',\n",
8698
" 'lefts',\n",
8799
" 'lemma',\n",
88100
" 'lemma_',\n",
101+
" 'lex',\n",
89102
" 'lex_id',\n",
90103
" 'like_email',\n",
91104
" 'like_num',\n",
92105
" 'like_url',\n",
93106
" 'lower',\n",
94107
" 'lower_',\n",
108+
" 'morph',\n",
95109
" 'n_lefts',\n",
96110
" 'n_rights',\n",
97111
" 'nbor',\n",
@@ -105,19 +119,23 @@
105119
" 'prefix_',\n",
106120
" 'prob',\n",
107121
" 'rank',\n",
108-
" 'repvec',\n",
122+
" 'remove_extension',\n",
109123
" 'right_edge',\n",
110124
" 'rights',\n",
125+
" 'sent',\n",
126+
" 'sent_start',\n",
111127
" 'sentiment',\n",
128+
" 'set_extension',\n",
129+
" 'set_morph',\n",
112130
" 'shape',\n",
113131
" 'shape_',\n",
114132
" 'similarity',\n",
115-
" 'string',\n",
116133
" 'subtree',\n",
117134
" 'suffix',\n",
118135
" 'suffix_',\n",
119136
" 'tag',\n",
120137
" 'tag_',\n",
138+
" 'tensor',\n",
121139
" 'text',\n",
122140
" 'text_with_ws',\n",
123141
" 'vector',\n",
@@ -132,7 +150,7 @@
132150
}
133151
],
134152
"source": [
135-
"# Recell that spaCy gives us a lot of information about each token when it parses\n",
153+
"# Recall that spaCy gives us a lot of information about each token when it parses\n",
136154
"tokens = parser('She ran')\n",
137155
"dir(tokens[0])"
138156
]
@@ -141,7 +159,7 @@
141159
"cell_type": "code",
142160
"execution_count": 3,
143161
"metadata": {
144-
"collapsed": true
162+
"tags": []
145163
},
146164
"outputs": [],
147165
"source": [
@@ -177,7 +195,7 @@
177195
"cell_type": "code",
178196
"execution_count": 5,
179197
"metadata": {
180-
"collapsed": true
198+
"tags": []
181199
},
182200
"outputs": [],
183201
"source": [
@@ -207,20 +225,11 @@
207225
"source": [
208226
"show_dep(\"She hit the wall.\")"
209227
]
210-
},
211-
{
212-
"cell_type": "code",
213-
"execution_count": null,
214-
"metadata": {
215-
"collapsed": true
216-
},
217-
"outputs": [],
218-
"source": []
219228
}
220229
],
221230
"metadata": {
222231
"kernelspec": {
223-
"display_name": "Python 3",
232+
"display_name": "Python 3 (ipykernel)",
224233
"language": "python",
225234
"name": "python3"
226235
},
@@ -234,9 +243,9 @@
234243
"name": "python",
235244
"nbconvert_exporter": "python",
236245
"pygments_lexer": "ipython3",
237-
"version": "3.5.2"
246+
"version": "3.8.10"
238247
}
239248
},
240249
"nbformat": 4,
241-
"nbformat_minor": 2
250+
"nbformat_minor": 4
242251
}

chapter_5/0502_Head_of_a_Sentence.ipynb

Lines changed: 8 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,13 @@
44
"cell_type": "code",
55
"execution_count": 1,
66
"metadata": {
7-
"collapsed": true
7+
"tags": []
88
},
99
"outputs": [],
1010
"source": [
1111
"# Recall that we can show the part of speech, the dependency, and the head of each token\n",
12-
"from spacy.en import English\n",
13-
"parser = English()\n",
12+
"import spacy\n",
13+
"parser = spacy.load(\"en_core_web_sm\")\n",
1414
"def show_dep(text):\n",
1515
" tokens = parser(text)\n",
1616
" for token in tokens:\n",
@@ -45,14 +45,14 @@
4545
"cell_type": "code",
4646
"execution_count": 3,
4747
"metadata": {
48-
"collapsed": true
48+
"tags": []
4949
},
5050
"outputs": [],
5151
"source": [
5252
"def get_head_of_sentence(text):\n",
5353
" tokens = parser(text)\n",
5454
" for token in tokens:\n",
55-
" if token.head is token:\n",
55+
" if token.head == token:\n",
5656
" return token\n",
5757
" return None"
5858
]
@@ -76,20 +76,11 @@
7676
"source": [
7777
"get_head_of_sentence(\"She hit the wall.\")"
7878
]
79-
},
80-
{
81-
"cell_type": "code",
82-
"execution_count": null,
83-
"metadata": {
84-
"collapsed": true
85-
},
86-
"outputs": [],
87-
"source": []
8879
}
8980
],
9081
"metadata": {
9182
"kernelspec": {
92-
"display_name": "Python 3",
83+
"display_name": "Python 3 (ipykernel)",
9384
"language": "python",
9485
"name": "python3"
9586
},
@@ -103,9 +94,9 @@
10394
"name": "python",
10495
"nbconvert_exporter": "python",
10596
"pygments_lexer": "ipython3",
106-
"version": "3.5.2"
97+
"version": "3.8.10"
10798
}
10899
},
109100
"nbformat": 4,
110-
"nbformat_minor": 2
101+
"nbformat_minor": 4
111102
}

chapter_5/0503_Entity_Recognition.ipynb

Lines changed: 10 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -3,21 +3,17 @@
33
{
44
"cell_type": "code",
55
"execution_count": 1,
6-
"metadata": {
7-
"collapsed": true
8-
},
6+
"metadata": {},
97
"outputs": [],
108
"source": [
11-
"from spacy.en import English\n",
12-
"parser = English()"
9+
"import spacy\n",
10+
"parser = spacy.load(\"en_core_web_sm\")"
1311
]
1412
},
1513
{
1614
"cell_type": "code",
1715
"execution_count": 2,
18-
"metadata": {
19-
"collapsed": true
20-
},
16+
"metadata": {},
2117
"outputs": [],
2218
"source": [
2319
"def show_ents(text):\n",
@@ -82,7 +78,7 @@
8278
"$ \n",
8379
"50 MONEY\n",
8480
"and \n",
85-
"50 CARDINAL\n",
81+
"50 \n",
8682
"dollars \n",
8783
"on \n",
8884
"March DATE\n",
@@ -92,13 +88,12 @@
9288
"\n",
9389
"Just showing the entities ....\n",
9490
"MONEY 50\n",
95-
"CARDINAL 50\n",
9691
"DATE March 12 , 2016\n"
9792
]
9893
}
9994
],
10095
"source": [
101-
"# Another example\n",
96+
"# Another example (Note: \"50\" doesn't show as a CARDINAL entity in this version of spacy)\n",
10297
"show_ents(\"I paid $50 and 50 dollars on March 12, 2016\")"
10398
]
10499
},
@@ -134,9 +129,7 @@
134129
{
135130
"cell_type": "code",
136131
"execution_count": 6,
137-
"metadata": {
138-
"collapsed": true
139-
},
132+
"metadata": {},
140133
"outputs": [],
141134
"source": [
142135
"# I O B format\n",
@@ -170,20 +163,11 @@
170163
"source": [
171164
"show_ent_IOB(\"I went to New Orleans to speak French.\")"
172165
]
173-
},
174-
{
175-
"cell_type": "code",
176-
"execution_count": null,
177-
"metadata": {
178-
"collapsed": true
179-
},
180-
"outputs": [],
181-
"source": []
182166
}
183167
],
184168
"metadata": {
185169
"kernelspec": {
186-
"display_name": "Python 3",
170+
"display_name": "Python 3 (ipykernel)",
187171
"language": "python",
188172
"name": "python3"
189173
},
@@ -197,9 +181,9 @@
197181
"name": "python",
198182
"nbconvert_exporter": "python",
199183
"pygments_lexer": "ipython3",
200-
"version": "3.5.2"
184+
"version": "3.8.10"
201185
}
202186
},
203187
"nbformat": 4,
204-
"nbformat_minor": 2
188+
"nbformat_minor": 4
205189
}

0 commit comments

Comments
 (0)