Skip to content

Commit 8bc4f04

Browse files
committed
sp classification of turkish
1 parent 102f418 commit 8bc4f04

File tree

1 file changed

+93
-53
lines changed

1 file changed

+93
-53
lines changed

nn-turkish.ipynb

Lines changed: 93 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
},
4040
{
4141
"cell_type": "code",
42-
"execution_count": 30,
42+
"execution_count": 3,
4343
"metadata": {},
4444
"outputs": [],
4545
"source": [
@@ -57,7 +57,7 @@
5757
},
5858
{
5959
"cell_type": "code",
60-
"execution_count": 3,
60+
"execution_count": 4,
6161
"metadata": {},
6262
"outputs": [
6363
{
@@ -81,7 +81,7 @@
8181
},
8282
{
8383
"cell_type": "code",
84-
"execution_count": 4,
84+
"execution_count": 5,
8585
"metadata": {},
8686
"outputs": [
8787
{
@@ -116,28 +116,19 @@
116116
"data = (TextList.from_folder(dest, processor=[OpenFileProcessor(), SPProcessor()])\n",
117117
" .split_by_rand_pct(0.1, seed=42)\n",
118118
" .label_for_lm()\n",
119-
" .databunch(bs=bs, num_workers=1))"
119+
" .databunch(bs=bs, num_workers=1))\n",
120+
"\n",
121+
"data.save(f'{lang}_databunch')\n",
122+
"len(data.vocab.itos),len(data.train_ds)"
120123
]
121124
},
122125
{
123126
"cell_type": "code",
124-
"execution_count": 9,
127+
"execution_count": null,
125128
"metadata": {},
126-
"outputs": [
127-
{
128-
"data": {
129-
"text/plain": [
130-
"(30000, 44905)"
131-
]
132-
},
133-
"execution_count": 9,
134-
"metadata": {},
135-
"output_type": "execute_result"
136-
}
137-
],
129+
"outputs": [],
138130
"source": [
139-
"data.save(f'{lang}_databunch')\n",
140-
"len(data.vocab.itos),len(data.train_ds)"
131+
"data = load_data(f'{lang}_databunch', bs=bs)"
141132
]
142133
},
143134
{
@@ -191,15 +182,6 @@
191182
"data.show_batch()"
192183
]
193184
},
194-
{
195-
"cell_type": "code",
196-
"execution_count": null,
197-
"metadata": {},
198-
"outputs": [],
199-
"source": [
200-
"# data = load_data(path, f'{lang}_databunch', bs=bs)"
201-
]
202-
},
203185
{
204186
"cell_type": "code",
205187
"execution_count": 13,
@@ -359,7 +341,7 @@
359341
},
360342
{
361343
"cell_type": "code",
362-
"execution_count": 17,
344+
"execution_count": 6,
363345
"metadata": {},
364346
"outputs": [
365347
{
@@ -368,10 +350,11 @@
368350
"[PosixPath('/home/jhoward/.fastai/data/trwiki/movies/tr_polarity.neg'),\n",
369351
" PosixPath('/home/jhoward/.fastai/data/trwiki/movies/tr_polarity.pos'),\n",
370352
" PosixPath('/home/jhoward/.fastai/data/trwiki/movies/tmp'),\n",
371-
" PosixPath('/home/jhoward/.fastai/data/trwiki/movies/models')]"
353+
" PosixPath('/home/jhoward/.fastai/data/trwiki/movies/models'),\n",
354+
" PosixPath('/home/jhoward/.fastai/data/trwiki/movies/tr_data_lm')]"
372355
]
373356
},
374-
"execution_count": 17,
357+
"execution_count": 6,
375358
"metadata": {},
376359
"output_type": "execute_result"
377360
}
@@ -383,7 +366,7 @@
383366
},
384367
{
385368
"cell_type": "code",
386-
"execution_count": 18,
369+
"execution_count": 7,
387370
"metadata": {},
388371
"outputs": [
389372
{
@@ -450,7 +433,7 @@
450433
"4 özgürlük denilince aklima gelen ilk film.bir b... 1"
451434
]
452435
},
453-
"execution_count": 18,
436+
"execution_count": 7,
454437
"metadata": {},
455438
"output_type": "execute_result"
456439
}
@@ -464,7 +447,7 @@
464447
},
465448
{
466449
"cell_type": "code",
467-
"execution_count": 19,
450+
"execution_count": 8,
468451
"metadata": {},
469452
"outputs": [
470453
{
@@ -531,7 +514,7 @@
531514
"4 milliyetçi bir film tavsiye etmiyorum.... \\n 0"
532515
]
533516
},
534-
"execution_count": 19,
517+
"execution_count": 8,
535518
"metadata": {},
536519
"output_type": "execute_result"
537520
}
@@ -545,7 +528,7 @@
545528
},
546529
{
547530
"cell_type": "code",
548-
"execution_count": 20,
531+
"execution_count": 9,
549532
"metadata": {},
550533
"outputs": [],
551534
"source": [
@@ -554,23 +537,31 @@
554537
},
555538
{
556539
"cell_type": "code",
557-
"execution_count": 117,
540+
"execution_count": 11,
558541
"metadata": {},
559542
"outputs": [],
560543
"source": [
561-
"spp = SPProcessor(sp_model=dest/'tmp'/'spm.model', sp_vocab=dest/'tmp'/'spm.vocab')\n",
562-
"\n",
563-
"data_lm = (TextList.from_df(df, path_clas, cols='text', processor=[OpenFileProcessor(), spp], vocab=data.vocab)\n",
544+
"data_lm = (TextList.from_df(df, path_clas, cols='text', processor=[\n",
545+
" OpenFileProcessor(), SPProcessor.load(dest)], vocab=data.vocab)\n",
564546
" .split_by_rand_pct(0.1, seed=42)\n",
565547
" .label_for_lm() \n",
566548
" .databunch(bs=bs, num_workers=1))\n",
567549
"\n",
568-
"data_lm.save(f'{lang}_data_lm')"
550+
"data_lm.save(f'{lang}_clas_databunch')"
569551
]
570552
},
571553
{
572554
"cell_type": "code",
573-
"execution_count": 119,
555+
"execution_count": null,
556+
"metadata": {},
557+
"outputs": [],
558+
"source": [
559+
"data_lm = load_data(f'{lang}_clas_databunch', bs=bs)"
560+
]
561+
},
562+
{
563+
"cell_type": "code",
564+
"execution_count": 21,
574565
"metadata": {},
575566
"outputs": [
576567
{
@@ -586,23 +577,23 @@
586577
" <tbody>\n",
587578
" <tr>\n",
588579
" <td>0</td>\n",
589-
" <td>▁özgürlük ▁as ki ni ▁ve ▁i ngilizlerin ▁ ne ▁kadar ▁ vah set ▁oldu klar ini ▁gözler ▁önüne ▁ser en ▁bir ▁film ▁ve ▁tabi ▁ki ▁as k ▁xxrep ▁4 ▁ . ▁xxbos ▁gerçekten ▁tarihi ▁sava s ▁filmleri ▁ara si nda ▁tar tis ma siz ▁en ▁iyi si ▁ , ▁12 ▁ yi l ▁boyunca ▁ac aba ▁ikincisi ▁çek ir imi ▁diye ▁bekledi gi m ▁bir ▁film ▁ , bel ki</td>\n",
580+
" <td>bl il r = ) . ▁xxbos ▁haftada ▁bir ▁bu ▁filme ▁bak i yorum . . . ▁yorum ▁ya pil cak ▁bi ▁film ▁ di il ▁çünkü ▁mükemmel . ▁is le digi ▁as k ▁konusu yla , ▁özgürlük ▁konusu yla , ▁intikam ▁ve ▁ hir si yla ▁ve ▁tabi ki ▁ müz ig iyle ▁mükemmel ▁bir ▁film ▁ol mus . . ▁the ▁best ▁film ▁on ▁the ▁world ▁for ▁me .</td>\n",
590581
" </tr>\n",
591582
" <tr>\n",
592583
" <td>1</td>\n",
593-
" <td>ne ▁yok ▁ di yebilir im . . ▁xxbos ▁böyle ▁güzel ▁bir ▁ya pit ▁olamaz ▁filmde ▁her ▁sey ▁var ▁insani ▁dünya dan ▁ali p ▁götürü yor ▁bask a ▁diyar lara ▁film ▁bitti kten ▁sonra ▁epey ▁süre ▁geçmesi ▁gerekiyor ▁tekrar ▁dünya ▁ya ▁dönmek ▁için ▁dikkat ! . ▁xxbos ▁ ye sil ▁yol , bra ve heart , ti tan ic , ▁xxrep ▁4 ▁ . ▁bu ▁filmler ▁için ▁ ne ▁</td>\n",
584+
" <td>▁benim ▁göz ya s lar im ▁olur ▁her ▁defa si nda ▁xxrep ▁4 ▁ . ▁san i rim ▁izlemeye n ▁yoktur ▁fazla ▁bir ▁söz ▁istemez . . ▁mü this ! !! . ▁xxbos ▁hayati min ▁filmi ▁ di yebilir im . ▁10 ▁numara ▁bir ▁film . ▁ele sti re lere ▁kap ali ▁bir ▁film ▁olma li . ▁çünkü ▁kötü ▁bir ▁yan ▁göre miyorum . . ▁xxbos ▁özgür ▁olma yi ,</td>\n",
594585
" </tr>\n",
595586
" <tr>\n",
596587
" <td>2</td>\n",
597588
" <td>▁i sk ence ▁edilerek ▁idam ▁edilmesi . . . ve ▁sonunda ▁özgürlük ▁diye ▁hay kir isi . . . ha lan ▁unut a miyorum ▁xxrep ▁4 ▁ . ▁xxbos ▁ilk ▁bu ▁filmi ▁sinemada ▁izledi m ▁ve ▁insan in ▁inan di ktan ▁sonra ▁ ne leri ▁yap abi le ce gi ni ▁fark ▁etti m . ▁gerçekten ▁süper ▁film di . ▁halen ▁içi m den ▁geldi kçe ▁takip ▁izleri m ▁ve</td>\n",
598589
" </tr>\n",
599590
" <tr>\n",
600591
" <td>3</td>\n",
601-
" <td>▁istemez ▁oraya ▁götürü yor ▁filmin ▁uzun lu gun a ▁al dan ip ta ▁filmi ▁izlemek ten ▁vazgeçme yin ▁xxrep ▁4 ▁ . ▁xxbos ▁mükemmel ▁ötesi . . ▁ . ▁xxbos ▁bu ▁filmi ▁izlemeye n ▁kal di mi ? ▁sonu ▁iyi ▁bit me se de ▁acil i mini ▁yap miyorum ▁izle mi yen ler ▁icin . ▁xxbos ▁hiç ▁ a bart miyorum ▁hayat im da ▁izledi gi m ▁en ▁iyi ▁filmlerden</td>\n",
592+
" <td>▁al dan ip ta ▁filmi ▁izlemek ten ▁vazgeçme yin ▁xxrep ▁4 ▁ . ▁xxbos ▁harika ▁bir ▁film di ▁xxrep ▁5 ▁ . ▁xxbos ▁mükemmel ▁ötesi . . ▁ . ▁xxbos ▁hiç ▁ a bart miyorum ▁hayat im da ▁izledi gi m ▁en ▁iyi ▁filmlerden ▁biri ▁ di yebilir im . tam ▁bir ▁bas ya pit ▁nite ligi nde . o scar ▁al digi na ▁hiç ▁ sa si rma dim</td>\n",
602593
" </tr>\n",
603594
" <tr>\n",
604595
" <td>4</td>\n",
605-
" <td>di . ▁xxbos ▁tarantino nun ▁en ▁iyi ▁filmi ▁ben ce ▁her ke zin ▁izleme si ▁gereken ▁bi ▁film ▁xxrep ▁8 ▁ . ▁xxbos ▁tarantino nun ▁bu ▁filmi ▁kendini ▁belli ▁etti r iyor . hat ta ▁ben ce ▁tarantino nun ▁en ▁iyi ▁filmidir . kendi ne ▁has ▁anlat imi ▁ile ▁bu ▁film ▁hak ka ten ▁sinema ▁sever lerin ▁izleme si ▁gereken ▁bir ▁film . ben ▁10 ▁üzerinden ▁7 ▁verdi m ▁bu</td>\n",
596+
" <td>. ▁herkes ▁izleme li . . . ▁xxbos ▁tek ▁kelime yle ▁bas ▁ya pit , ta ran tino ▁o ▁bir ▁dahi . iste ▁ tü t k ▁sinema si ▁bölgesel ▁ya da ▁yerel ▁drama lardan , mel od ram lardan ▁kurtul up , bir az cik ▁tarantino ▁kurgusu nu ▁ve ▁esp iri ▁an la yi sini ▁kavrama li . . ▁xxbos ▁müzikleri ▁konusu ▁ di y ologlar i ▁ki sa</td>\n",
606597
" </tr>\n",
607598
" </tbody>\n",
608599
"</table>"
@@ -621,7 +612,7 @@
621612
},
622613
{
623614
"cell_type": "code",
624-
"execution_count": 124,
615+
"execution_count": null,
625616
"metadata": {},
626617
"outputs": [],
627618
"source": [
@@ -768,21 +759,20 @@
768759
},
769760
{
770761
"cell_type": "code",
771-
"execution_count": 136,
762+
"execution_count": 17,
772763
"metadata": {},
773764
"outputs": [],
774765
"source": [
775-
"spp = SPProcessor(sp_model=dest/'tmp'/'spm.model', sp_vocab=dest/'tmp'/'spm.vocab')\n",
776-
"\n",
777-
"data_clas = (TextList.from_df(df, path_clas, cols='text', processor=[OpenFileProcessor(), spp], vocab=data_lm.vocab)\n",
766+
"data_clas = (TextList.from_df(df, path_clas, cols='text', processor=[\n",
767+
" OpenFileProcessor(), SPProcessor.load(dest)], vocab=data_lm.vocab)\n",
778768
" .split_by_rand_pct(0.1, seed=42)\n",
779769
" .label_from_df(cols='pos')\n",
780770
" .databunch(bs=bs, num_workers=1))"
781771
]
782772
},
783773
{
784774
"cell_type": "code",
785-
"execution_count": 152,
775+
"execution_count": 18,
786776
"metadata": {},
787777
"outputs": [],
788778
"source": [
@@ -793,14 +783,64 @@
793783
},
794784
{
795785
"cell_type": "code",
796-
"execution_count": 153,
786+
"execution_count": 19,
797787
"metadata": {},
798788
"outputs": [],
799789
"source": [
800790
"lr=2e-2\n",
801791
"lr *= bs/48"
802792
]
803793
},
794+
{
795+
"cell_type": "code",
796+
"execution_count": 20,
797+
"metadata": {
798+
"scrolled": false
799+
},
800+
"outputs": [
801+
{
802+
"data": {
803+
"text/html": [
804+
"<table border=\"1\" class=\"dataframe\">\n",
805+
" <thead>\n",
806+
" <tr style=\"text-align: left;\">\n",
807+
" <th>epoch</th>\n",
808+
" <th>train_loss</th>\n",
809+
" <th>valid_loss</th>\n",
810+
" <th>accuracy</th>\n",
811+
" <th>time</th>\n",
812+
" </tr>\n",
813+
" </thead>\n",
814+
" <tbody>\n",
815+
" <tr>\n",
816+
" <td>0</td>\n",
817+
" <td>0.460636</td>\n",
818+
" <td>0.599994</td>\n",
819+
" <td>0.744841</td>\n",
820+
" <td>00:02</td>\n",
821+
" </tr>\n",
822+
" <tr>\n",
823+
" <td>1</td>\n",
824+
" <td>0.420206</td>\n",
825+
" <td>0.548175</td>\n",
826+
" <td>0.749531</td>\n",
827+
" <td>00:02</td>\n",
828+
" </tr>\n",
829+
" </tbody>\n",
830+
"</table>"
831+
],
832+
"text/plain": [
833+
"<IPython.core.display.HTML object>"
834+
]
835+
},
836+
"metadata": {},
837+
"output_type": "display_data"
838+
}
839+
],
840+
"source": [
841+
"learn_c.fit_one_cycle(2, lr, moms=(0.8,0.7))"
842+
]
843+
},
804844
{
805845
"cell_type": "code",
806846
"execution_count": 154,

0 commit comments

Comments
 (0)