Skip to content

Commit d5ec5c1

Browse files
author
Raul Puri
committed
starting licensing update
1 parent 185847b commit d5ec5c1

File tree

8 files changed

+216
-28
lines changed

8 files changed

+216
-28
lines changed

LICENSE

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,4 +22,39 @@
2222
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
2323
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
2424
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25-
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26+
27+
28+
------------------ LICENSE FOR fairseq(transformer) repository --------------------
29+
30+
31+
BSD License
32+
33+
For fairseq software
34+
35+
Copyright (c) 2017-present, Facebook, Inc. All rights reserved.
36+
37+
Redistribution and use in source and binary forms, with or without modification,
38+
are permitted provided that the following conditions are met:
39+
40+
* Redistributions of source code must retain the above copyright notice, this
41+
list of conditions and the following disclaimer.
42+
43+
* Redistributions in binary form must reproduce the above copyright notice,
44+
this list of conditions and the following disclaimer in the documentation
45+
and/or other materials provided with the distribution.
46+
47+
* Neither the name Facebook nor the names of its contributors may be used to
48+
endorse or promote products derived from this software without specific
49+
prior written permission.
50+
51+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
52+
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
53+
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
54+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
55+
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
56+
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
57+
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
58+
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
59+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
60+
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

arguments.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,11 @@
1+
###############################################################################
2+
# BSD 3-Clause License
3+
#
4+
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
5+
#
6+
# Author & Contact: Raul Puri ([email protected])
7+
###############################################################################
8+
19
from configure_data import configure_data
210

311
def add_general_args(parser):

configure_data.py

Lines changed: 24 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -120,48 +120,50 @@ def get_split(opt):
120120
def configure_data(parser):
121121
"""add cmdline flags for configuring datasets"""
122122
main_parser = parser
123-
parser = parser.add_argument_group('data options')
124-
parser.add_argument('--data', nargs='+', default=['./data/imdb/unsup.json'],
123+
group = parser.add_argument_group('data options')
124+
group.add_argument('--data', nargs='+', default=['./data/imdb/unsup.json'],
125125
help="""Filename for training""")
126-
parser.add_argument('--valid', nargs='*', default=None,
126+
group.add_argument('--valid', nargs='*', default=None,
127127
help="""Filename for validation""")
128-
parser.add_argument('--test', nargs='*', default=None,
128+
group.add_argument('--test', nargs='*', default=None,
129129
help="""Filename for testing""")
130-
parser.add_argument('--process-fn', type=str, default='process_str', choices=['process_str', 'process_tweet'],
130+
group.add_argument('--process-fn', type=str, default='process_str', choices=['process_str', 'process_tweet'],
131131
help='what preprocessing function to use to process text. One of [process_str, process_tweet].')
132-
parser.add_argument('--batch-size', type=int, default=128,
132+
group.add_argument('--batch-size', type=int, default=128,
133133
help='Data Loader batch size')
134-
parser.add_argument('--eval-batch-size', type=int, default=0,
134+
group.add_argument('--eval-batch-size', type=int, default=0,
135135
help='Data Loader batch size for evaluation datasets')
136-
parser.add_argument('--data-size', type=int, default=256,
136+
group.add_argument('--data-size', type=int, default=256,
137137
help='number of tokens in data')
138-
parser.add_argument('--loose-json', action='store_true',
138+
group.add_argument('--loose-json', action='store_true',
139139
help='Use loose json (one json-formatted string per newline), instead of tight json (data file is one json string)')
140-
parser.add_argument('--preprocess', action='store_true',
140+
group.add_argument('--preprocess', action='store_true',
141141
help='force preprocessing of datasets')
142-
parser.add_argument('--delim', default=',',
142+
group.add_argument('--delim', default=',',
143143
help='delimiter used to parse csv testfiles')
144-
parser.add_argument('--non-binary-cols', nargs='*', default=None,
144+
group.add_argument('--non-binary-cols', nargs='*', default=None,
145145
help='labels for columns to non-binary dataset [only works for csv datasets]')
146-
parser.add_argument('--split', default='1.',
146+
group.add_argument('--split', default='1.',
147147
help='comma-separated list of proportions for training, validation, and test split')
148-
parser.add_argument('--text-key', default='sentence',
148+
group.add_argument('--text-key', default='sentence',
149149
help='key to use to extract text from json/csv')
150-
parser.add_argument('--label-key', default='label',
150+
group.add_argument('--label-key', default='label',
151151
help='key to use to extract labels from json/csv')
152-
parser.add_argument('--eval-text-key', default=None,
152+
group.add_argument('--eval-text-key', default=None,
153153
help='key to use to extract text from json/csv evaluation datasets')
154-
parser.add_argument('--eval-label-key', default=None,
154+
group.add_argument('--eval-label-key', default=None,
155155
help='key to use to extract labels from json/csv evaluation datasets')
156156
# tokenizer arguments
157-
parser.add_argument('--tokenizer-type', type=str, default='CharacterLevelTokenizer', choices=['CharacterLevelTokenizer', 'SentencePieceTokenizer'],
157+
group.add_argument('--tokenizer-type', type=str, default='CharacterLevelTokenizer', choices=['CharacterLevelTokenizer', 'SentencePieceTokenizer'],
158158
help='what type of tokenizer to use')
159-
parser.add_argument('--tokenizer-model-type', type=str, default='bpe', choices=['bpe', 'char', 'unigram', 'word'],
159+
group.add_argument('--tokenizer-model-type', type=str, default='bpe', choices=['bpe', 'char', 'unigram', 'word'],
160160
help='Model type to use for sentencepiece tokenization')
161-
parser.add_argument('--vocab-size', type=int, default=256,
161+
group.add_argument('--vocab-size', type=int, default=256,
162162
help='vocab size to use for non-character-level tokenization')
163-
parser.add_argument('--tokenizer-path', type=str, default='tokenizer.model',
163+
group.add_argument('--tokenizer-path', type=str, default='tokenizer.model',
164164
help='path used to save/load sentencepiece tokenization models')
165+
# These are options that are relevant to data loading functionality, but are not meant to be exposed to the command line user.
166+
# These options are intneded to be set in code by specific scripts.
165167
defaults = {
166168
'world_size': 1,
167169
'rank': -1,
@@ -174,4 +176,4 @@ def configure_data(parser):
174176
'eval_seq_length': 256,
175177
'samples_per_shard': 1000
176178
}
177-
return DataConfig(main_parser, defaults=defaults), parser
179+
return DataConfig(main_parser, defaults=defaults), group

model/checkpoint.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,11 @@
1+
###############################################################################
2+
# BSD 3-Clause License
3+
#
4+
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
5+
#
6+
# Author & Contact: Raul Puri ([email protected])
7+
###############################################################################
8+
19
from __future__ import absolute_import, division, print_function, unicode_literals
210
import torch
311
import warnings

model/sentiment_classifier_old.py

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
import torch
2+
from torch import nn
3+
import torch.nn.functional as F
4+
5+
import numpy as np
6+
from model import RNNFeaturizer
7+
8+
class BinaryClassifier(nn.Module):
9+
def __init__(self, num_features=4096):
10+
super().__init__()
11+
12+
self.dense0 = nn.Linear(num_features, 1)
13+
self.neurons = None
14+
15+
def forward(self, X, **kwargs):
16+
return torch.sigmoid(self.linear(X)).float()
17+
#return F.sigmoid(self.linear(X), dim=-1).float()
18+
19+
def linear(self, X):
20+
weight = self.dense0.weight
21+
if self.neurons is not None:
22+
#weight = weight[torch.arange(weight.size(0)).unsqueeze(1), self.neurons].contiguous()
23+
weight = weight[:, self.neurons].contiguous()
24+
if X.size(-1) == self.dense0.weight.size(-1):
25+
X = X[:, self.neurons].contiguous()
26+
torch.cuda.synchronize()
27+
return F.linear(X, weight, self.dense0.bias)
28+
29+
def set_neurons(self, num_neurons=None):
30+
if num_neurons is None:
31+
self.neurons = None
32+
return self.get_neurons()
33+
neurons, values = self.get_neurons(num_neurons=num_neurons)
34+
self.neurons = neurons
35+
return neurons, values
36+
37+
def get_neurons(self, num_neurons=None):
38+
if num_neurons is None:
39+
return self.dense0.weight
40+
values, neurons = torch.topk(self.dense0.weight.abs().float(), num_neurons, 1)
41+
neurons = neurons[0]
42+
values = self.dense0.weight[:, neurons]
43+
return neurons, values
44+
45+
def state_dict(self, destination=None, prefix='', keep_vars=False):
46+
sd = self.dense0.state_dict(destination=destination, prefix=prefix, keep_vars=keep_vars)
47+
sd['neurons'] = self.neurons
48+
return sd
49+
50+
def load_state_dict(self, state_dict, strict=True):
51+
if 'neurons' in state_dict:
52+
self.neurons = state_dict['neurons']
53+
54+
sd = {}
55+
for k, v in state_dict.items():
56+
if k != 'neurons':
57+
sd[k] = v
58+
59+
self.dense0.load_state_dict(sd, strict=strict)
60+
61+
62+
class SentimentClassifier(nn.Module):
63+
"""Container module with an encoder, a recurrent module, and a decoder."""
64+
65+
def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers, dropout=0.5, all_layers=False):
66+
super().__init__()
67+
self.encoder = RNNFeaturizer(rnn_type, ntoken, ninp, nhid, nlayers, dropout=dropout, all_layers=all_layers)
68+
self.classifier = BinaryClassifier(num_features=self.encoder.output_size)
69+
70+
self.neurons_ = None
71+
72+
def forward(self, input, seq_len=None, get_hidden=False):
73+
self.encoder.rnn.reset_hidden(input.size(1))
74+
hidden = self.encoder(input, seq_len=seq_len, get_hidden=get_hidden)
75+
if get_hidden:
76+
hidden = hidden[0]
77+
if self.neurons is not None:
78+
hidden = hidden[:, self.neurons].contiguous()
79+
return self.classifier(hidden)
80+
81+
def state_dict(self, destination=None, prefix='', keep_vars=False):
82+
sd = {}
83+
sd['encoder'] = self.encoder.state_dict(destination=destination, prefix=prefix, keep_vars=keep_vars)
84+
sd['classifier'] = self.classifier.state_dict(destination=destination, prefix=prefix, keep_vars=keep_vars)
85+
return sd
86+
87+
def load_state_dict(self, state_dict, strict=True):
88+
self.encoder.load_state_dict(state_dict['encoder'], strict=strict)
89+
self.classifier.load_state_dict(state_dict['classifier'], strict=strict)
90+
self.neurons = self.classifier.neurons
91+
92+
def get_neurons(self, **kwargs):
93+
return self.classifier.get_neurons(**kwargs)
94+
95+
def set_neurons(self, num_neurons=None):
96+
rtn = self.classifier.set_neurons(num_neurons=num_neurons)
97+
self.neurons_ = self.classifier.neurons
98+
return rtn
99+
100+
@property
101+
def neurons(self):
102+
return self.neurons_
103+
104+
@neurons.setter
105+
def neurons(self, val):
106+
self.neurons_ = val
107+
self.classifier.neurons = val

model/transformer.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,16 @@
1+
###############################################################################
2+
# BSD 3-Clause License
3+
#
4+
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
5+
#
6+
# Copyright (c) 2017, Facebook, inc. All rights reserved.
7+
###############################################################################
8+
'''
9+
Code adapted from https://github.com/pytorch/fairseq/blob/master/fairseq/models/transformer.py
10+
Introduced optimal gradient checkpointing for intermediate layers
11+
'''
12+
13+
114
import math
215
import torch
316
import torch.nn as nn

model/transformer_utils.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,15 @@
1+
###############################################################################
2+
# BSD 3-Clause License
3+
#
4+
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
5+
#
6+
# Copyright (c) 2017, Facebook, inc. All rights reserved.
7+
###############################################################################
8+
'''
9+
Code adapted from https://github.com/pytorch/fairseq/blob/master/fairseq/models/transformer.py
10+
Introduced optimal gradient checkpointing for intermediate layers in ./transformer.py
11+
'''
12+
113
import math
214
import torch
315
import torch.nn as nn

transfer.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -68,14 +68,17 @@ def get_model(args):
6868
sd = sd['sd']
6969
if 'lm_encoder' in sd:
7070
sd = sd['lm_encoder']
71-
7271
try:
7372
model.load_state_dict(sd)
7473
except:
7574
# if state dict has weight normalized parameters apply and remove weight norm to model while loading sd
76-
apply_weight_norm(model)
75+
if hasattr(model, 'rnn'):
76+
apply_weight_norm(model.rnn)
77+
else:
78+
apply_weight_norm(model)
7779
model.load_state_dict(sd)
7880
remove_weight_norm(model)
81+
7982
return model
8083

8184
def transform(model, text, args):
@@ -109,7 +112,7 @@ def get_outs(text_batch, length_batch):
109112
if args.model.lower() == 'transformer' or args.model.lower() == 'bert':
110113
cell_out, lm_or_encoder_out = model(text_batch, length_batch, args.get_hidden)
111114
else:
112-
model.lm_encoder.rnn.reset_hidden(args.batch_size)
115+
model.rnn.reset_hidden(args.batch_size)
113116
for _ in range(1 + args.num_hidden_warmup):
114117
cell_out, lm_or_encoder_out = model(text_batch, length_batch, args.get_hidden)
115118
return cell_out, lm_or_encoder_out
@@ -369,11 +372,11 @@ def main():
369372
clf_sd = {'weight': torch.from_numpy(logreg_model.coef_).half(), 'bias': torch.from_numpy(logreg_model.intercept_).half()}
370373
sd['classifier'] = clf_sd
371374
model.float().cpu()
372-
sd['encoder'] = model.state_dict()
375+
sd['lm_encoder'] = model.state_dict()
373376
with open(os.path.join(save_root, 'classifier.pt'), 'wb') as f:
374377
torch.save(sd, f)
375378
model.half()
376-
sd['encoder'] = model.state_dict()
379+
sd['lm_encoder'] = model.state_dict()
377380
with open(os.path.join(save_root, 'classifier.pt.16'), 'wb') as f:
378381
torch.save(sd, f)
379382

0 commit comments

Comments
 (0)