Skip to content

Commit 8e3cfef

Browse files
committed
WIP fix derive logic and redundant hash calc
removed all counting whatsoever and just queue the derive in the end
1 parent 55431a5 commit 8e3cfef

File tree

2 files changed

+12
-76
lines changed

2 files changed

+12
-76
lines changed

internetarchive/item.py

Lines changed: 12 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@
4949
from requests.exceptions import HTTPError
5050

5151
from internetarchive.utils import IdentifierListAsItems, get_md5, chunk_generator, \
52-
IterableToFileAdapter, iter_directory, recursive_file_count, norm_filepath
52+
IterableToFileAdapter, iter_directory, norm_filepath
5353
from internetarchive.files import File
5454
from internetarchive.iarequest import MetadataRequest, S3Request
5555
from internetarchive.auth import S3Auth
@@ -1165,45 +1165,32 @@ def upload(self, files,
11651165
"""
11661166
queue_derive = True if queue_derive is None else queue_derive
11671167
remote_dir_name = None
1168-
total_files = None
1168+
11691169
if isinstance(files, dict):
11701170
if files.get('name'):
11711171
files = [files]
1172-
total_files = 1
11731172
else:
11741173
files = list(files.items())
11751174
if not isinstance(files, (list, tuple)):
11761175
files = [files]
1177-
if all(isinstance(f, dict) and f.get('name') for f in files):
1178-
total_files = len(files)
11791176

11801177
responses = []
11811178
file_index = 0
1182-
if queue_derive and total_files is None:
1183-
if checksum:
1184-
total_files = recursive_file_count(files, item=self, checksum=True)
1185-
else:
1186-
total_files = recursive_file_count(files, item=self, checksum=False)
11871179
file_metadata = None
11881180
for f in files:
1181+
11891182
if isinstance(f, dict):
11901183
if f.get('name'):
11911184
file_metadata = f.copy()
11921185
del file_metadata['name']
11931186
f = f['name']
1187+
11941188
if (isinstance(f, string_types) and is_dir(f)) \
11951189
or (isinstance(f, tuple) and is_dir(f[-1])):
11961190
if isinstance(f, tuple):
11971191
remote_dir_name = f[0].strip('/')
11981192
f = f[-1]
11991193
for filepath, key in iter_directory(f):
1200-
file_index += 1
1201-
# Set derive header if queue_derive is True,
1202-
# and this is the last request being made.
1203-
if queue_derive is True and file_index >= total_files:
1204-
_queue_derive = True
1205-
else:
1206-
_queue_derive = False
12071194
if not f.endswith('/'):
12081195
if remote_dir_name:
12091196
key = '{0}{1}/{2}'.format(remote_dir_name, f, key)
@@ -1219,7 +1206,7 @@ def upload(self, files,
12191206
headers=headers,
12201207
access_key=access_key,
12211208
secret_key=secret_key,
1222-
queue_derive=_queue_derive,
1209+
queue_derive=False,
12231210
verbose=verbose,
12241211
verify=verify,
12251212
checksum=checksum,
@@ -1231,15 +1218,6 @@ def upload(self, files,
12311218
request_kwargs=request_kwargs)
12321219
responses.append(resp)
12331220
else:
1234-
file_index += 1
1235-
# Set derive header if queue_derive is True,
1236-
# and this is the last request being made.
1237-
# if queue_derive is True and file_index >= len(files):
1238-
if queue_derive is True and file_index >= total_files:
1239-
_queue_derive = True
1240-
else:
1241-
_queue_derive = False
1242-
12431221
if not isinstance(f, (list, tuple)):
12441222
key, body = (None, f)
12451223
else:
@@ -1253,7 +1231,7 @@ def upload(self, files,
12531231
headers=headers,
12541232
access_key=access_key,
12551233
secret_key=secret_key,
1256-
queue_derive=_queue_derive,
1234+
queue_derive=False,
12571235
verbose=verbose,
12581236
verify=verify,
12591237
checksum=checksum,
@@ -1264,6 +1242,12 @@ def upload(self, files,
12641242
validate_identifier=validate_identifier,
12651243
request_kwargs=request_kwargs)
12661244
responses.append(resp)
1245+
1246+
if queue_derive:
1247+
# Came this far without any exceptions raised, so all uploads
1248+
# probably completed successfully. Derive now.
1249+
self.derive()
1250+
12671251
return responses
12681252

12691253

internetarchive/utils.py

Lines changed: 0 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -222,54 +222,6 @@ def iter_directory(directory):
222222
yield (filepath, key)
223223

224224

225-
def recursive_file_count(files, item=None, checksum=False):
226-
"""Given a filepath or list of filepaths, return the total number of files."""
227-
if not isinstance(files, (list, set)):
228-
files = [files]
229-
total_files = 0
230-
if checksum is True:
231-
md5s = [f.get('md5') for f in item.files]
232-
else:
233-
md5s = list()
234-
if isinstance(files, dict):
235-
# make sure to use local filenames.
236-
_files = files.values()
237-
else:
238-
if isinstance(files[0], tuple):
239-
_files = dict(files).values()
240-
else:
241-
_files = files
242-
for f in _files:
243-
try:
244-
is_dir = os.path.isdir(f)
245-
except TypeError:
246-
try:
247-
f = f[0]
248-
is_dir = os.path.isdir(f)
249-
except (AttributeError, TypeError):
250-
is_dir = False
251-
if is_dir:
252-
for x, _ in iter_directory(f):
253-
if checksum is True:
254-
with open(x, 'rb') as fh:
255-
lmd5 = get_md5(fh)
256-
if lmd5 in md5s:
257-
continue
258-
total_files += 1
259-
else:
260-
if checksum is True:
261-
try:
262-
with open(f, 'rb') as fh:
263-
lmd5 = get_md5(fh)
264-
except TypeError:
265-
# Support file-like objects.
266-
lmd5 = get_md5(f)
267-
if lmd5 in md5s:
268-
continue
269-
total_files += 1
270-
return total_files
271-
272-
273225
def is_dir(obj):
274226
"""Special is_dir function to handle file-like object cases that
275227
cannot be stat'd"""

0 commit comments

Comments
 (0)