|
39 | 39 | },
|
40 | 40 | {
|
41 | 41 | "cell_type": "code",
|
42 |
| - "execution_count": 30, |
| 42 | + "execution_count": 3, |
43 | 43 | "metadata": {},
|
44 | 44 | "outputs": [],
|
45 | 45 | "source": [
|
|
57 | 57 | },
|
58 | 58 | {
|
59 | 59 | "cell_type": "code",
|
60 |
| - "execution_count": 3, |
| 60 | + "execution_count": 4, |
61 | 61 | "metadata": {},
|
62 | 62 | "outputs": [
|
63 | 63 | {
|
|
81 | 81 | },
|
82 | 82 | {
|
83 | 83 | "cell_type": "code",
|
84 |
| - "execution_count": 4, |
| 84 | + "execution_count": 5, |
85 | 85 | "metadata": {},
|
86 | 86 | "outputs": [
|
87 | 87 | {
|
|
116 | 116 | "data = (TextList.from_folder(dest, processor=[OpenFileProcessor(), SPProcessor()])\n",
|
117 | 117 | " .split_by_rand_pct(0.1, seed=42)\n",
|
118 | 118 | " .label_for_lm()\n",
|
119 |
| - " .databunch(bs=bs, num_workers=1))" |
| 119 | + " .databunch(bs=bs, num_workers=1))\n", |
| 120 | + "\n", |
| 121 | + "data.save(f'{lang}_databunch')\n", |
| 122 | + "len(data.vocab.itos),len(data.train_ds)" |
120 | 123 | ]
|
121 | 124 | },
|
122 | 125 | {
|
123 | 126 | "cell_type": "code",
|
124 |
| - "execution_count": 9, |
| 127 | + "execution_count": null, |
125 | 128 | "metadata": {},
|
126 |
| - "outputs": [ |
127 |
| - { |
128 |
| - "data": { |
129 |
| - "text/plain": [ |
130 |
| - "(30000, 44905)" |
131 |
| - ] |
132 |
| - }, |
133 |
| - "execution_count": 9, |
134 |
| - "metadata": {}, |
135 |
| - "output_type": "execute_result" |
136 |
| - } |
137 |
| - ], |
| 129 | + "outputs": [], |
138 | 130 | "source": [
|
139 |
| - "data.save(f'{lang}_databunch')\n", |
140 |
| - "len(data.vocab.itos),len(data.train_ds)" |
| 131 | + "data = load_data(f'{lang}_databunch', bs=bs)" |
141 | 132 | ]
|
142 | 133 | },
|
143 | 134 | {
|
|
191 | 182 | "data.show_batch()"
|
192 | 183 | ]
|
193 | 184 | },
|
194 |
| - { |
195 |
| - "cell_type": "code", |
196 |
| - "execution_count": null, |
197 |
| - "metadata": {}, |
198 |
| - "outputs": [], |
199 |
| - "source": [ |
200 |
| - "# data = load_data(path, f'{lang}_databunch', bs=bs)" |
201 |
| - ] |
202 |
| - }, |
203 | 185 | {
|
204 | 186 | "cell_type": "code",
|
205 | 187 | "execution_count": 13,
|
|
359 | 341 | },
|
360 | 342 | {
|
361 | 343 | "cell_type": "code",
|
362 |
| - "execution_count": 17, |
| 344 | + "execution_count": 6, |
363 | 345 | "metadata": {},
|
364 | 346 | "outputs": [
|
365 | 347 | {
|
|
368 | 350 | "[PosixPath('/home/jhoward/.fastai/data/trwiki/movies/tr_polarity.neg'),\n",
|
369 | 351 | " PosixPath('/home/jhoward/.fastai/data/trwiki/movies/tr_polarity.pos'),\n",
|
370 | 352 | " PosixPath('/home/jhoward/.fastai/data/trwiki/movies/tmp'),\n",
|
371 |
| - " PosixPath('/home/jhoward/.fastai/data/trwiki/movies/models')]" |
| 353 | + " PosixPath('/home/jhoward/.fastai/data/trwiki/movies/models'),\n", |
| 354 | + " PosixPath('/home/jhoward/.fastai/data/trwiki/movies/tr_data_lm')]" |
372 | 355 | ]
|
373 | 356 | },
|
374 |
| - "execution_count": 17, |
| 357 | + "execution_count": 6, |
375 | 358 | "metadata": {},
|
376 | 359 | "output_type": "execute_result"
|
377 | 360 | }
|
|
383 | 366 | },
|
384 | 367 | {
|
385 | 368 | "cell_type": "code",
|
386 |
| - "execution_count": 18, |
| 369 | + "execution_count": 7, |
387 | 370 | "metadata": {},
|
388 | 371 | "outputs": [
|
389 | 372 | {
|
|
450 | 433 | "4 özgürlük denilince aklima gelen ilk film.bir b... 1"
|
451 | 434 | ]
|
452 | 435 | },
|
453 |
| - "execution_count": 18, |
| 436 | + "execution_count": 7, |
454 | 437 | "metadata": {},
|
455 | 438 | "output_type": "execute_result"
|
456 | 439 | }
|
|
464 | 447 | },
|
465 | 448 | {
|
466 | 449 | "cell_type": "code",
|
467 |
| - "execution_count": 19, |
| 450 | + "execution_count": 8, |
468 | 451 | "metadata": {},
|
469 | 452 | "outputs": [
|
470 | 453 | {
|
|
531 | 514 | "4 milliyetçi bir film tavsiye etmiyorum.... \\n 0"
|
532 | 515 | ]
|
533 | 516 | },
|
534 |
| - "execution_count": 19, |
| 517 | + "execution_count": 8, |
535 | 518 | "metadata": {},
|
536 | 519 | "output_type": "execute_result"
|
537 | 520 | }
|
|
545 | 528 | },
|
546 | 529 | {
|
547 | 530 | "cell_type": "code",
|
548 |
| - "execution_count": 20, |
| 531 | + "execution_count": 9, |
549 | 532 | "metadata": {},
|
550 | 533 | "outputs": [],
|
551 | 534 | "source": [
|
|
554 | 537 | },
|
555 | 538 | {
|
556 | 539 | "cell_type": "code",
|
557 |
| - "execution_count": 117, |
| 540 | + "execution_count": 11, |
558 | 541 | "metadata": {},
|
559 | 542 | "outputs": [],
|
560 | 543 | "source": [
|
561 |
| - "spp = SPProcessor(sp_model=dest/'tmp'/'spm.model', sp_vocab=dest/'tmp'/'spm.vocab')\n", |
562 |
| - "\n", |
563 |
| - "data_lm = (TextList.from_df(df, path_clas, cols='text', processor=[OpenFileProcessor(), spp], vocab=data.vocab)\n", |
| 544 | + "data_lm = (TextList.from_df(df, path_clas, cols='text', processor=[\n", |
| 545 | + " OpenFileProcessor(), SPProcessor.load(dest)], vocab=data.vocab)\n", |
564 | 546 | " .split_by_rand_pct(0.1, seed=42)\n",
|
565 | 547 | " .label_for_lm() \n",
|
566 | 548 | " .databunch(bs=bs, num_workers=1))\n",
|
567 | 549 | "\n",
|
568 |
| - "data_lm.save(f'{lang}_data_lm')" |
| 550 | + "data_lm.save(f'{lang}_clas_databunch')" |
569 | 551 | ]
|
570 | 552 | },
|
571 | 553 | {
|
572 | 554 | "cell_type": "code",
|
573 |
| - "execution_count": 119, |
| 555 | + "execution_count": null, |
| 556 | + "metadata": {}, |
| 557 | + "outputs": [], |
| 558 | + "source": [ |
| 559 | + "data_lm = load_data(f'{lang}_clas_databunch', bs=bs)" |
| 560 | + ] |
| 561 | + }, |
| 562 | + { |
| 563 | + "cell_type": "code", |
| 564 | + "execution_count": 21, |
574 | 565 | "metadata": {},
|
575 | 566 | "outputs": [
|
576 | 567 | {
|
|
586 | 577 | " <tbody>\n",
|
587 | 578 | " <tr>\n",
|
588 | 579 | " <td>0</td>\n",
|
589 |
| - " <td>▁özgürlük ▁as ki ni ▁ve ▁i ngilizlerin ▁ ne ▁kadar ▁ vah set ▁oldu klar ini ▁gözler ▁önüne ▁ser en ▁bir ▁film ▁ve ▁tabi ▁ki ▁as k ▁xxrep ▁4 ▁ . ▁xxbos ▁gerçekten ▁tarihi ▁sava s ▁filmleri ▁ara si nda ▁tar tis ma siz ▁en ▁iyi si ▁ , ▁12 ▁ yi l ▁boyunca ▁ac aba ▁ikincisi ▁çek ir imi ▁diye ▁bekledi gi m ▁bir ▁film ▁ , bel ki</td>\n", |
| 580 | + " <td>bl il r = ) . ▁xxbos ▁haftada ▁bir ▁bu ▁filme ▁bak i yorum . . . ▁yorum ▁ya pil cak ▁bi ▁film ▁ di il ▁çünkü ▁mükemmel . ▁is le digi ▁as k ▁konusu yla , ▁özgürlük ▁konusu yla , ▁intikam ▁ve ▁ hir si yla ▁ve ▁tabi ki ▁ müz ig iyle ▁mükemmel ▁bir ▁film ▁ol mus . . ▁the ▁best ▁film ▁on ▁the ▁world ▁for ▁me .</td>\n", |
590 | 581 | " </tr>\n",
|
591 | 582 | " <tr>\n",
|
592 | 583 | " <td>1</td>\n",
|
593 |
| - " <td>ne ▁yok ▁ di yebilir im . . ▁xxbos ▁böyle ▁güzel ▁bir ▁ya pit ▁olamaz ▁filmde ▁her ▁sey ▁var ▁insani ▁dünya dan ▁ali p ▁götürü yor ▁bask a ▁diyar lara ▁film ▁bitti kten ▁sonra ▁epey ▁süre ▁geçmesi ▁gerekiyor ▁tekrar ▁dünya ▁ya ▁dönmek ▁için ▁dikkat ! . ▁xxbos ▁ ye sil ▁yol , bra ve heart , ti tan ic , ▁xxrep ▁4 ▁ . ▁bu ▁filmler ▁için ▁ ne ▁</td>\n", |
| 584 | + " <td>▁benim ▁göz ya s lar im ▁olur ▁her ▁defa si nda ▁xxrep ▁4 ▁ . ▁san i rim ▁izlemeye n ▁yoktur ▁fazla ▁bir ▁söz ▁istemez . . ▁mü this ! !! . ▁xxbos ▁hayati min ▁filmi ▁ di yebilir im . ▁10 ▁numara ▁bir ▁film . ▁ele sti re lere ▁kap ali ▁bir ▁film ▁olma li . ▁çünkü ▁kötü ▁bir ▁yan ▁göre miyorum . . ▁xxbos ▁özgür ▁olma yi ,</td>\n", |
594 | 585 | " </tr>\n",
|
595 | 586 | " <tr>\n",
|
596 | 587 | " <td>2</td>\n",
|
597 | 588 | " <td>▁i sk ence ▁edilerek ▁idam ▁edilmesi . . . ve ▁sonunda ▁özgürlük ▁diye ▁hay kir isi . . . ha lan ▁unut a miyorum ▁xxrep ▁4 ▁ . ▁xxbos ▁ilk ▁bu ▁filmi ▁sinemada ▁izledi m ▁ve ▁insan in ▁inan di ktan ▁sonra ▁ ne leri ▁yap abi le ce gi ni ▁fark ▁etti m . ▁gerçekten ▁süper ▁film di . ▁halen ▁içi m den ▁geldi kçe ▁takip ▁izleri m ▁ve</td>\n",
|
598 | 589 | " </tr>\n",
|
599 | 590 | " <tr>\n",
|
600 | 591 | " <td>3</td>\n",
|
601 |
| - " <td>▁istemez ▁oraya ▁götürü yor ▁filmin ▁uzun lu gun a ▁al dan ip ta ▁filmi ▁izlemek ten ▁vazgeçme yin ▁xxrep ▁4 ▁ . ▁xxbos ▁mükemmel ▁ötesi . . ▁ . ▁xxbos ▁bu ▁filmi ▁izlemeye n ▁kal di mi ? ▁sonu ▁iyi ▁bit me se de ▁acil i mini ▁yap miyorum ▁izle mi yen ler ▁icin . ▁xxbos ▁hiç ▁ a bart miyorum ▁hayat im da ▁izledi gi m ▁en ▁iyi ▁filmlerden</td>\n", |
| 592 | + " <td>▁al dan ip ta ▁filmi ▁izlemek ten ▁vazgeçme yin ▁xxrep ▁4 ▁ . ▁xxbos ▁harika ▁bir ▁film di ▁xxrep ▁5 ▁ . ▁xxbos ▁mükemmel ▁ötesi . . ▁ . ▁xxbos ▁hiç ▁ a bart miyorum ▁hayat im da ▁izledi gi m ▁en ▁iyi ▁filmlerden ▁biri ▁ di yebilir im . tam ▁bir ▁bas ya pit ▁nite ligi nde . o scar ▁al digi na ▁hiç ▁ sa si rma dim</td>\n", |
602 | 593 | " </tr>\n",
|
603 | 594 | " <tr>\n",
|
604 | 595 | " <td>4</td>\n",
|
605 |
| - " <td>di . ▁xxbos ▁tarantino nun ▁en ▁iyi ▁filmi ▁ben ce ▁her ke zin ▁izleme si ▁gereken ▁bi ▁film ▁xxrep ▁8 ▁ . ▁xxbos ▁tarantino nun ▁bu ▁filmi ▁kendini ▁belli ▁etti r iyor . hat ta ▁ben ce ▁tarantino nun ▁en ▁iyi ▁filmidir . kendi ne ▁has ▁anlat imi ▁ile ▁bu ▁film ▁hak ka ten ▁sinema ▁sever lerin ▁izleme si ▁gereken ▁bir ▁film . ben ▁10 ▁üzerinden ▁7 ▁verdi m ▁bu</td>\n", |
| 596 | + " <td>. ▁herkes ▁izleme li . . . ▁xxbos ▁tek ▁kelime yle ▁bas ▁ya pit , ta ran tino ▁o ▁bir ▁dahi . iste ▁ tü t k ▁sinema si ▁bölgesel ▁ya da ▁yerel ▁drama lardan , mel od ram lardan ▁kurtul up , bir az cik ▁tarantino ▁kurgusu nu ▁ve ▁esp iri ▁an la yi sini ▁kavrama li . . ▁xxbos ▁müzikleri ▁konusu ▁ di y ologlar i ▁ki sa</td>\n", |
606 | 597 | " </tr>\n",
|
607 | 598 | " </tbody>\n",
|
608 | 599 | "</table>"
|
|
621 | 612 | },
|
622 | 613 | {
|
623 | 614 | "cell_type": "code",
|
624 |
| - "execution_count": 124, |
| 615 | + "execution_count": null, |
625 | 616 | "metadata": {},
|
626 | 617 | "outputs": [],
|
627 | 618 | "source": [
|
|
768 | 759 | },
|
769 | 760 | {
|
770 | 761 | "cell_type": "code",
|
771 |
| - "execution_count": 136, |
| 762 | + "execution_count": 17, |
772 | 763 | "metadata": {},
|
773 | 764 | "outputs": [],
|
774 | 765 | "source": [
|
775 |
| - "spp = SPProcessor(sp_model=dest/'tmp'/'spm.model', sp_vocab=dest/'tmp'/'spm.vocab')\n", |
776 |
| - "\n", |
777 |
| - "data_clas = (TextList.from_df(df, path_clas, cols='text', processor=[OpenFileProcessor(), spp], vocab=data_lm.vocab)\n", |
| 766 | + "data_clas = (TextList.from_df(df, path_clas, cols='text', processor=[\n", |
| 767 | + " OpenFileProcessor(), SPProcessor.load(dest)], vocab=data_lm.vocab)\n", |
778 | 768 | " .split_by_rand_pct(0.1, seed=42)\n",
|
779 | 769 | " .label_from_df(cols='pos')\n",
|
780 | 770 | " .databunch(bs=bs, num_workers=1))"
|
781 | 771 | ]
|
782 | 772 | },
|
783 | 773 | {
|
784 | 774 | "cell_type": "code",
|
785 |
| - "execution_count": 152, |
| 775 | + "execution_count": 18, |
786 | 776 | "metadata": {},
|
787 | 777 | "outputs": [],
|
788 | 778 | "source": [
|
|
793 | 783 | },
|
794 | 784 | {
|
795 | 785 | "cell_type": "code",
|
796 |
| - "execution_count": 153, |
| 786 | + "execution_count": 19, |
797 | 787 | "metadata": {},
|
798 | 788 | "outputs": [],
|
799 | 789 | "source": [
|
800 | 790 | "lr=2e-2\n",
|
801 | 791 | "lr *= bs/48"
|
802 | 792 | ]
|
803 | 793 | },
|
| 794 | + { |
| 795 | + "cell_type": "code", |
| 796 | + "execution_count": 20, |
| 797 | + "metadata": { |
| 798 | + "scrolled": false |
| 799 | + }, |
| 800 | + "outputs": [ |
| 801 | + { |
| 802 | + "data": { |
| 803 | + "text/html": [ |
| 804 | + "<table border=\"1\" class=\"dataframe\">\n", |
| 805 | + " <thead>\n", |
| 806 | + " <tr style=\"text-align: left;\">\n", |
| 807 | + " <th>epoch</th>\n", |
| 808 | + " <th>train_loss</th>\n", |
| 809 | + " <th>valid_loss</th>\n", |
| 810 | + " <th>accuracy</th>\n", |
| 811 | + " <th>time</th>\n", |
| 812 | + " </tr>\n", |
| 813 | + " </thead>\n", |
| 814 | + " <tbody>\n", |
| 815 | + " <tr>\n", |
| 816 | + " <td>0</td>\n", |
| 817 | + " <td>0.460636</td>\n", |
| 818 | + " <td>0.599994</td>\n", |
| 819 | + " <td>0.744841</td>\n", |
| 820 | + " <td>00:02</td>\n", |
| 821 | + " </tr>\n", |
| 822 | + " <tr>\n", |
| 823 | + " <td>1</td>\n", |
| 824 | + " <td>0.420206</td>\n", |
| 825 | + " <td>0.548175</td>\n", |
| 826 | + " <td>0.749531</td>\n", |
| 827 | + " <td>00:02</td>\n", |
| 828 | + " </tr>\n", |
| 829 | + " </tbody>\n", |
| 830 | + "</table>" |
| 831 | + ], |
| 832 | + "text/plain": [ |
| 833 | + "<IPython.core.display.HTML object>" |
| 834 | + ] |
| 835 | + }, |
| 836 | + "metadata": {}, |
| 837 | + "output_type": "display_data" |
| 838 | + } |
| 839 | + ], |
| 840 | + "source": [ |
| 841 | + "learn_c.fit_one_cycle(2, lr, moms=(0.8,0.7))" |
| 842 | + ] |
| 843 | + }, |
804 | 844 | {
|
805 | 845 | "cell_type": "code",
|
806 | 846 | "execution_count": 154,
|
|
0 commit comments