Skip to content

Commit 1039e00

Browse files
committed
Add GitHub action and nn.SiLU
1 parent c1ee2d1 commit 1039e00

File tree

3 files changed

+30
-19
lines changed

3 files changed

+30
-19
lines changed

.github/workflows/main.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
- name: Publish a Python distribution to PyPI
2+
uses: pypa/gh-action-pypi-publish@release/v1
3+
with:
4+
user: __token__
5+
password: ${{ secrets.PYPI_API_TOKEN }}

efficientnet_pytorch/model.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ class MBConvBlock(nn.Module):
5050
def __init__(self, block_args, global_params, image_size=None):
5151
super().__init__()
5252
self._block_args = block_args
53-
self._bn_mom = 1 - global_params.batch_norm_momentum # pytorch's difference from tensorflow
53+
self._bn_mom = 1 - global_params.batch_norm_momentum # pytorch's difference from tensorflow
5454
self._bn_eps = global_params.batch_norm_epsilon
5555
self.has_se = (self._block_args.se_ratio is not None) and (0 < self._block_args.se_ratio <= 1)
5656
self.id_skip = block_args.id_skip # whether to use skip connection and drop connect
@@ -196,7 +196,7 @@ def __init__(self, blocks_args=None, global_params=None):
196196
# The first block needs to take care of stride and filter size increase.
197197
self._blocks.append(MBConvBlock(block_args, self._global_params, image_size=image_size))
198198
image_size = calculate_output_image_size(image_size, block_args.stride)
199-
if block_args.num_repeat > 1: # modify block_args to keep same output size
199+
if block_args.num_repeat > 1: # modify block_args to keep same output size
200200
block_args = block_args._replace(input_filters=block_args.output_filters, stride=1)
201201
for _ in range(block_args.num_repeat - 1):
202202
self._blocks.append(MBConvBlock(block_args, self._global_params, image_size=image_size))
@@ -261,15 +261,15 @@ def extract_endpoints(self, inputs):
261261
for idx, block in enumerate(self._blocks):
262262
drop_connect_rate = self._global_params.drop_connect_rate
263263
if drop_connect_rate:
264-
drop_connect_rate *= float(idx) / len(self._blocks) # scale drop connect_rate
264+
drop_connect_rate *= float(idx) / len(self._blocks) # scale drop connect_rate
265265
x = block(x, drop_connect_rate=drop_connect_rate)
266266
if prev_x.size(2) > x.size(2):
267-
endpoints['reduction_{}'.format(len(endpoints)+1)] = prev_x
267+
endpoints['reduction_{}'.format(len(endpoints) + 1)] = prev_x
268268
prev_x = x
269269

270270
# Head
271271
x = self._swish(self._bn1(self._conv_head(x)))
272-
endpoints['reduction_{}'.format(len(endpoints)+1)] = x
272+
endpoints['reduction_{}'.format(len(endpoints) + 1)] = x
273273

274274
return endpoints
275275

@@ -290,7 +290,7 @@ def extract_features(self, inputs):
290290
for idx, block in enumerate(self._blocks):
291291
drop_connect_rate = self._global_params.drop_connect_rate
292292
if drop_connect_rate:
293-
drop_connect_rate *= float(idx) / len(self._blocks) # scale drop connect_rate
293+
drop_connect_rate *= float(idx) / len(self._blocks) # scale drop connect_rate
294294
x = block(x, drop_connect_rate=drop_connect_rate)
295295

296296
# Head
@@ -373,7 +373,8 @@ def from_pretrained(cls, model_name, weights_path=None, advprop=False,
373373
A pretrained efficientnet model.
374374
"""
375375
model = cls.from_name(model_name, num_classes=num_classes, **override_params)
376-
load_pretrained_weights(model, model_name, weights_path=weights_path, load_fc=(num_classes == 1000), advprop=advprop)
376+
load_pretrained_weights(model, model_name, weights_path=weights_path,
377+
load_fc=(num_classes == 1000), advprop=advprop)
377378
model._change_in_channels(in_channels)
378379
return model
379380

efficientnet_pytorch/utils.py

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818

1919
################################################################################
20-
### Help functions for model architecture
20+
# Help functions for model architecture
2121
################################################################################
2222

2323
# GlobalParams and BlockArgs: Two namedtuples
@@ -50,11 +50,14 @@
5050
GlobalParams.__new__.__defaults__ = (None,) * len(GlobalParams._fields)
5151
BlockArgs.__new__.__defaults__ = (None,) * len(BlockArgs._fields)
5252

53-
54-
# An ordinary implementation of Swish function
55-
class Swish(nn.Module):
56-
def forward(self, x):
57-
return x * torch.sigmoid(x)
53+
# Swish activation function
54+
if hasattr(nn, 'SiLU'):
55+
Swish = nn.SiLU
56+
else:
57+
# For compatibility with old PyTorch versions
58+
class Swish(nn.Module):
59+
def forward(self, x):
60+
return x * torch.sigmoid(x)
5861

5962

6063
# A memory-efficient implementation of Swish function
@@ -97,10 +100,10 @@ def round_filters(filters, global_params):
97100
divisor = global_params.depth_divisor
98101
min_depth = global_params.min_depth
99102
filters *= multiplier
100-
min_depth = min_depth or divisor # pay attention to this line when using min_depth
103+
min_depth = min_depth or divisor # pay attention to this line when using min_depth
101104
# follow the formula transferred from official TensorFlow implementation
102105
new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor)
103-
if new_filters < 0.9 * filters: # prevent rounding by more than 10%
106+
if new_filters < 0.9 * filters: # prevent rounding by more than 10%
104107
new_filters += divisor
105108
return int(new_filters)
106109

@@ -234,7 +237,7 @@ def forward(self, x):
234237
ih, iw = x.size()[-2:]
235238
kh, kw = self.weight.size()[-2:]
236239
sh, sw = self.stride
237-
oh, ow = math.ceil(ih / sh), math.ceil(iw / sw) # change the output size according to stride ! ! !
240+
oh, ow = math.ceil(ih / sh), math.ceil(iw / sw) # change the output size according to stride ! ! !
238241
pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0)
239242
pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0)
240243
if pad_h > 0 or pad_w > 0:
@@ -312,6 +315,7 @@ def forward(self, x):
312315
return F.max_pool2d(x, self.kernel_size, self.stride, self.padding,
313316
self.dilation, self.ceil_mode, self.return_indices)
314317

318+
315319
class MaxPool2dStaticSamePadding(nn.MaxPool2d):
316320
"""2D MaxPooling like TensorFlow's 'SAME' mode, with the given input image size.
317321
The padding mudule is calculated in construction function, then used in forward.
@@ -344,7 +348,7 @@ def forward(self, x):
344348

345349

346350
################################################################################
347-
### Helper functions for loading model params
351+
# Helper functions for loading model params
348352
################################################################################
349353

350354
# BlockDecoder: A Class for encoding and decoding BlockArgs
@@ -577,7 +581,7 @@ def get_model_params(model_name, override_params):
577581
# TODO: add the petrained weights url map of 'efficientnet-l2'
578582

579583

580-
def load_pretrained_weights(model, model_name, weights_path=None, load_fc=True, advprop=False):
584+
def load_pretrained_weights(model, model_name, weights_path=None, load_fc=True, advprop=False, verbose=True):
581585
"""Loads pretrained weights from weights path or download using url.
582586
583587
Args:
@@ -608,4 +612,5 @@ def load_pretrained_weights(model, model_name, weights_path=None, load_fc=True,
608612
['_fc.weight', '_fc.bias']), 'Missing keys when loading pretrained weights: {}'.format(ret.missing_keys)
609613
assert not ret.unexpected_keys, 'Missing keys when loading pretrained weights: {}'.format(ret.unexpected_keys)
610614

611-
print('Loaded pretrained weights for {}'.format(model_name))
615+
if verbose:
616+
print('Loaded pretrained weights for {}'.format(model_name))

0 commit comments

Comments
 (0)