Skip to content

Commit d54e147

Browse files
author
l2k2
committed
new video
1 parent 74de7a8 commit d54e147

File tree

3 files changed

+106
-0
lines changed

3 files changed

+106
-0
lines changed
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
import os
2+
3+
def load_imdb():
4+
X_train = []
5+
y_train = []
6+
7+
path = './aclImdb/train/pos/'
8+
X_train.extend([open(path + f).read() for f in os.listdir(path) if f.endswith('.txt')])
9+
y_train.extend([1 for _ in range(12500)])
10+
11+
path = './aclImdb/train/neg/'
12+
X_train.extend([open(path + f).read() for f in os.listdir(path) if f.endswith('.txt')])
13+
y_train.extend([0 for _ in range(12500)])
14+
15+
X_test = []
16+
y_test = []
17+
18+
path = './aclImdb/test/pos/'
19+
X_test.extend([open(path + f).read() for f in os.listdir(path) if f.endswith('.txt')])
20+
y_test.extend([1 for _ in range(12500)])
21+
22+
path = './aclImdb/test/neg/'
23+
X_test.extend([open(path + f).read() for f in os.listdir(path) if f.endswith('.txt')])
24+
y_test.extend([0 for _ in range(12500)])
25+
26+
return (X_train, y_train), (X_test, y_test)

videos/lstm-classifier/imdb-lstm.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
from keras.preprocessing import sequence
2+
from keras.models import Sequential
3+
from keras.layers import Dense, Dropout, Activation
4+
from keras.layers import Embedding, LSTM, Bidirectional
5+
from keras.layers import Conv1D, Flatten
6+
from keras.datasets import imdb
7+
import wandb
8+
from wandb.keras import WandbCallback
9+
import imdb
10+
import numpy as np
11+
from keras.preprocessing import text
12+
13+
wandb.init()
14+
config = wandb.config
15+
16+
# set parameters:
17+
config.vocab_size = 1000
18+
config.maxlen = 300
19+
config.batch_size = 32
20+
config.embedding_dims = 50
21+
config.filters = 10
22+
config.kernel_size = 3
23+
config.hidden_dims = 10
24+
config.epochs = 10
25+
26+
(X_train, y_train), (X_test, y_test) = imdb.load_imdb()
27+
28+
tokenizer = text.Tokenizer(num_words=config.vocab_size)
29+
tokenizer.fit_on_texts(X_train)
30+
X_train = tokenizer.texts_to_matrix(X_train)
31+
X_test = tokenizer.texts_to_matrix(X_test)
32+
33+
X_train = sequence.pad_sequences(X_train, maxlen=config.maxlen)
34+
X_test = sequence.pad_sequences(X_test, maxlen=config.maxlen)
35+
36+
model = Sequential()
37+
model.add(Embedding(config.vocab_size,
38+
config.embedding_dims,
39+
input_length=config.maxlen))
40+
model.add(Conv1D(config.filters,
41+
config.kernel_size,
42+
padding='valid',
43+
activation='relu'))
44+
model.add(MaxPooling1D())
45+
model.add(LSTM(config.hidden_dims, activation="sigmoid"))
46+
model.add(Dense(1, activation='sigmoid'))
47+
model.compile(loss='binary_crossentropy',
48+
optimizer='rmsprop',
49+
metrics=['accuracy'])
50+
51+
model.fit(X_train, y_train,
52+
batch_size=config.batch_size,
53+
epochs=config.epochs,
54+
validation_data=(X_test, y_test), callbacks=[WandbCallback()])

videos/lstm-classifier/imdb.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
import os
2+
3+
def load_imdb():
4+
X_train = []
5+
y_train = []
6+
7+
path = './aclImdb/train/pos/'
8+
X_train.extend([open(path + f).read() for f in os.listdir(path) if f.endswith('.txt')])
9+
y_train.extend([1 for _ in range(12500)])
10+
11+
path = './aclImdb/train/neg/'
12+
X_train.extend([open(path + f).read() for f in os.listdir(path) if f.endswith('.txt')])
13+
y_train.extend([0 for _ in range(12500)])
14+
15+
X_test = []
16+
y_test = []
17+
18+
path = './aclImdb/test/pos/'
19+
X_test.extend([open(path + f).read() for f in os.listdir(path) if f.endswith('.txt')])
20+
y_test.extend([1 for _ in range(12500)])
21+
22+
path = './aclImdb/test/neg/'
23+
X_test.extend([open(path + f).read() for f in os.listdir(path) if f.endswith('.txt')])
24+
y_test.extend([0 for _ in range(12500)])
25+
26+
return (X_train, y_train), (X_test, y_test)

0 commit comments

Comments
 (0)