Skip to content

Commit 46120c8

Browse files
committed
update
1 parent ceef0bf commit 46120c8

File tree

18 files changed

+4854
-0
lines changed

18 files changed

+4854
-0
lines changed

Untitled.ipynb

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"metadata": {},
7+
"outputs": [
8+
{
9+
"name": "stdout",
10+
"output_type": "stream",
11+
"text": [
12+
"pip 20.1 from /Users/yihyap/anaconda3/envs/sandbox36/lib/python3.6/site-packages/pip (python 3.6)\r\n"
13+
]
14+
}
15+
],
16+
"source": [
17+
"!pip --version"
18+
]
19+
}
20+
],
21+
"metadata": {
22+
"kernelspec": {
23+
"display_name": "smv2",
24+
"language": "python",
25+
"name": "smv2"
26+
},
27+
"language_info": {
28+
"codemirror_mode": {
29+
"name": "ipython",
30+
"version": 3
31+
},
32+
"file_extension": ".py",
33+
"mimetype": "text/x-python",
34+
"name": "python",
35+
"nbconvert_exporter": "python",
36+
"pygments_lexer": "ipython3",
37+
"version": "3.8.5"
38+
}
39+
},
40+
"nbformat": 4,
41+
"nbformat_minor": 4
42+
}

script-mode-pytorch/docker/Dockerfile

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
# Part of the implementation of this container is based on the Amazon SageMaker Apache MXNet container.
2+
# https://github.com/aws/sagemaker-mxnet-container
3+
4+
FROM ubuntu:16.04
5+
6+
LABEL maintainer="Giuseppe A. Porcelli"
7+
8+
# Defining some variables used at build time to install Python3
9+
ARG PYTHON=python3
10+
ARG PYTHON_PIP=python3-pip
11+
ARG PIP=pip3
12+
ARG PYTHON_VERSION=3.6.6
13+
14+
# Install some handful libraries like curl, wget, git, build-essential, zlib
15+
RUN apt-get update && apt-get install -y --no-install-recommends software-properties-common && \
16+
add-apt-repository ppa:deadsnakes/ppa -y && \
17+
apt-get update && apt-get install -y --no-install-recommends \
18+
build-essential \
19+
ca-certificates \
20+
curl \
21+
wget \
22+
git \
23+
libopencv-dev \
24+
openssh-client \
25+
openssh-server \
26+
vim \
27+
zlib1g-dev && \
28+
rm -rf /var/lib/apt/lists/*
29+
30+
# Installing Python3
31+
RUN wget https://www.python.org/ftp/python/$PYTHON_VERSION/Python-$PYTHON_VERSION.tgz && \
32+
tar -xvf Python-$PYTHON_VERSION.tgz && cd Python-$PYTHON_VERSION && \
33+
./configure && make && make install && \
34+
apt-get update && apt-get install -y --no-install-recommends libreadline-gplv2-dev libncursesw5-dev libssl-dev libsqlite3-dev tk-dev libgdbm-dev libc6-dev libbz2-dev && \
35+
make && make install && rm -rf ../Python-$PYTHON_VERSION* && \
36+
ln -s /usr/local/bin/pip3 /usr/bin/pip
37+
38+
# Upgrading pip and creating symbolic link for python3
39+
RUN ${PIP} --no-cache-dir install --upgrade pip
40+
RUN ln -s $(which ${PYTHON}) /usr/local/bin/python
41+
42+
WORKDIR /
43+
44+
# Installing numpy, pandas, scikit-learn, scipy
45+
RUN ${PIP} install --no-cache --upgrade \
46+
numpy==1.14.5 \
47+
pandas==0.24.1 \
48+
scikit-learn==0.20.3 \
49+
requests==2.21.0 \
50+
scipy==1.2.1 \
51+
torch \
52+
torchaudio
53+
54+
# Setting some environment variables.
55+
ENV PYTHONDONTWRITEBYTECODE=1 \
56+
PYTHONUNBUFFERED=1 \
57+
LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/local/lib" \
58+
PYTHONIOENCODING=UTF-8 \
59+
LANG=C.UTF-8 \
60+
LC_ALL=C.UTF-8
61+
62+
RUN ${PIP} install --no-cache --upgrade \
63+
sagemaker-containers
64+
65+
# Copies code under /opt/ml/code where sagemaker-containers expects to find the script to run
66+
COPY code/* /opt/ml/code/
67+
68+
# Defines train.py as script entry point
69+
ENV SAGEMAKER_PROGRAM train.py
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
import torch
2+
import torch.nn as nn
3+
import torch.nn.functional as F
4+
import os
5+
6+
7+
class Net(nn.Module):
8+
def __init__(self, input_features):
9+
super(Net, self).__init__()
10+
self.fc1 = nn.Linear(input_features, 3)
11+
12+
def forward(self, x):
13+
x = self.fc1(x)
14+
output = F.log_softmax(x, dim=1)
15+
return output
16+
17+
18+
def model_fn(model_dir):
19+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
20+
model = Net()
21+
if torch.cuda.device_count() > 1:
22+
print("Gpu count: {}".format(torch.cuda.device_count()))
23+
model = nn.DataParallel(model)
24+
25+
with open(os.path.join(model_dir, "model.pth"), "rb") as f:
26+
model.load_state_dict(torch.load(f))
27+
return model.to(device)
28+
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
torchaudio
Lines changed: 209 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,209 @@
1+
"""
2+
Test:
3+
Local test on train.py
4+
python train.py --train "../../test_data/train/" --validation "../../test_data/val/" --model-dir "../../test_data/"
5+
6+
vscode launch.json
7+
{
8+
"version": "0.2.0",
9+
"configurations": [
10+
{
11+
"name": "Python: Current File",
12+
"type": "python",
13+
"request": "launch",
14+
"program": "${file}",
15+
"console": "integratedTerminal",
16+
"cwd": "${fileDirname}",
17+
"args": [
18+
"--train",
19+
"../../test_data/train/",
20+
"--validation",
21+
"../../test_data/val/",
22+
"--model-dir",
23+
"../../test_data/"
24+
]
25+
}
26+
]
27+
}
28+
29+
"""
30+
from sklearn.datasets import make_classification
31+
import argparse
32+
import torch
33+
import torch.nn as nn
34+
import torch.nn.functional as F
35+
import torch.optim as optim
36+
from torch.optim.lr_scheduler import StepLR
37+
from torch.utils.data import Dataset, DataLoader
38+
import os
39+
from utils import print_files_in_path
40+
import torchaudio
41+
42+
43+
class MyDataset(Dataset):
44+
def __init__(self, n_samples, n_features, n_classes):
45+
self.n_samples = n_samples
46+
self.X, self.Y = make_classification(
47+
n_samples=n_samples,
48+
n_features=n_features,
49+
n_redundant=0,
50+
n_informative=2,
51+
n_clusters_per_class=1,
52+
n_classes=n_classes,
53+
)
54+
55+
def __len__(self):
56+
return self.n_samples
57+
58+
def __getitem__(self, x):
59+
# Model expect float32
60+
return torch.tensor(self.X[x, :], dtype=torch.float32), torch.tensor(self.Y[x], dtype=torch.long)
61+
62+
63+
class Net(nn.Module):
64+
def __init__(self, input_features):
65+
super(Net, self).__init__()
66+
self.fc1 = nn.Linear(input_features, 3)
67+
68+
def forward(self, x):
69+
x = self.fc1(x)
70+
output = F.log_softmax(x, dim=1)
71+
return output
72+
73+
74+
def train(args, model, device, train_loader, optimizer, epoch):
75+
model.train()
76+
for batch_idx, (data, target) in enumerate(train_loader):
77+
data, target = data.to(device), target.to(device)
78+
optimizer.zero_grad()
79+
output = model(data)
80+
loss = F.nll_loss(output, target.long())
81+
loss.backward()
82+
optimizer.step()
83+
if batch_idx % args.log_interval == 0:
84+
print(
85+
"Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".format(
86+
epoch,
87+
batch_idx * len(data),
88+
len(train_loader.dataset),
89+
100.0 * batch_idx / len(train_loader),
90+
loss.item(),
91+
)
92+
)
93+
94+
95+
def test(model, device, test_loader):
96+
model.eval()
97+
test_loss = 0
98+
correct = 0
99+
with torch.no_grad():
100+
for data, target in test_loader:
101+
data, target = data.to(device), target.to(device)
102+
output = model(data)
103+
test_loss += F.nll_loss(output, target.long(), reduction="sum").item() # sum up batch loss
104+
pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
105+
correct += pred.eq(target.view_as(pred)).sum().item()
106+
107+
test_loss /= len(test_loader.dataset)
108+
109+
print(
110+
"\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n".format(
111+
test_loss, correct, len(test_loader.dataset), 100.0 * correct / len(test_loader.dataset)
112+
)
113+
)
114+
115+
116+
# Sagemaker
117+
def model_fn(model_dir):
118+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
119+
model = Net()
120+
if torch.cuda.device_count() > 1:
121+
print("Gpu count: {}".format(torch.cuda.device_count()))
122+
model = nn.DataParallel(model)
123+
124+
with open(os.path.join(model_dir, "model.pth"), "rb") as f:
125+
model.load_state_dict(torch.load(f))
126+
return model.to(device)
127+
128+
129+
# Sagemaker
130+
def save_model(model, model_dir):
131+
path = os.path.join(model_dir, "model.pth")
132+
torch.save(model.state_dict(), path)
133+
134+
135+
def main(args):
136+
"""
137+
SM_CHANNEL does not contain backward slash:
138+
SM_CHANNEL_TRAIN=/opt/ml/input/data/train
139+
SM_CHANNEL_VALIDATION=/opt/ml/input/data/validation
140+
141+
Training job name:
142+
script-mode-container-xgb-2020-08-10-13-29-15-756
143+
144+
"""
145+
train_channel, validation_channel, model_dir = args.train, args.validation, args.model_dir
146+
147+
print("\nList of files in train channel: ")
148+
print_files_in_path(train_channel)
149+
150+
print("\nList of files in validation channel: ")
151+
print_files_in_path(validation_channel)
152+
use_cuda = torch.cuda.is_available()
153+
torch.manual_seed(args.seed)
154+
155+
device = torch.device("cuda" if use_cuda else "cpu")
156+
print("Device:", device)
157+
kwargs = {"num_workers": 8, "pin_memory": True} if use_cuda else {}
158+
159+
input_features = 5
160+
n_samples = 1000
161+
dataset = MyDataset(n_samples, input_features, 3)
162+
train_len = int(n_samples * 0.7)
163+
test_len = n_samples - train_len
164+
train_set, val_set = torch.utils.data.random_split(dataset, [train_len, test_len])
165+
train_loader = DataLoader(train_set, batch_size=args.batch_size, shuffle=True, **kwargs)
166+
test_loader = DataLoader(val_set, batch_size=args.batch_size, shuffle=True, **kwargs)
167+
168+
model = Net(input_features).to(device)
169+
# optimizer = optim.Adadelta(model.parameters(), lr=args.lr)
170+
optimizer = optim.Adam(model.parameters(), lr=args.lr)
171+
# scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma)
172+
for epoch in range(1, args.epochs + 1):
173+
train(args, model, device, train_loader, optimizer, epoch)
174+
# scheduler.step()
175+
176+
test(model, device, test_loader)
177+
178+
if args.save_model:
179+
save_model(model, model_dir)
180+
181+
182+
if __name__ == "__main__":
183+
184+
# Training settings
185+
parser = argparse.ArgumentParser()
186+
parser.add_argument("--seed", type=int, default=1, metavar="S", help="random seed (default: 1)")
187+
parser.add_argument("--epochs", type=int, default=10, metavar="N", help="number of epochs to train (default: 14)")
188+
parser.add_argument(
189+
"--batch-size", type=int, default=64, metavar="N", help="input batch size for training (default: 64)"
190+
)
191+
parser.add_argument("--lr", type=float, default=0.1, metavar="LR", help="learning rate (default: 1.0)")
192+
parser.add_argument("--save-model", action="store_true", default=False, help="For Saving the current Model")
193+
parser.add_argument(
194+
"--log-interval",
195+
type=int,
196+
default=10,
197+
metavar="N",
198+
help="how many batches to wait before logging training status",
199+
)
200+
201+
# This is a way to pass additional arguments when running as a script
202+
# and use sagemaker-containers defaults to set their values when not specified.
203+
parser.add_argument("--train", type=str, default=os.getenv("SM_CHANNEL_TRAIN", None))
204+
parser.add_argument("--validation", type=str, default=os.getenv("SM_CHANNEL_VALIDATION", None))
205+
parser.add_argument("--model-dir", type=str, default=os.getenv("SM_MODEL_DIR", None))
206+
207+
args = parser.parse_args()
208+
print(args)
209+
main(args)
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
import os
2+
from os import path
3+
4+
5+
def save_model_artifacts(model_artifacts_path, net):
6+
if path.exists(model_artifacts_path):
7+
model_file = open(model_artifacts_path + "model.dummy", "w")
8+
model_file.write("Dummy model.")
9+
model_file.close()
10+
11+
12+
def print_files_in_path(path):
13+
14+
if path is None:
15+
print("SM_CHANNEL is not set")
16+
return
17+
files = []
18+
# r=root, d=directories, f = files
19+
for r, d, f in os.walk(path):
20+
for file in f:
21+
files.append(os.path.join(r, file))
22+
23+
for f in files:
24+
print(f)

0 commit comments

Comments
 (0)