File tree Expand file tree Collapse file tree 2 files changed +44
-0
lines changed Expand file tree Collapse file tree 2 files changed +44
-0
lines changed Original file line number Diff line number Diff line change @@ -59,3 +59,30 @@ def _modify_iter_params(self, params):
59
59
if offset :
60
60
params ['start' ] = '{}/{}' .format (self .key , offset )
61
61
return params
62
+
63
+ def iter_by_chunks (self , chunksize = 10000 , * args , ** kwargs ):
64
+ """An alternative for reading and processing items by returning a
65
+ generator of item chunks.
66
+
67
+ This is a convenient method for cases when processing a large amount of
68
+ items from a job isn't ideal in one go due to the large memory needed.
69
+ Instead, this allows you to process it chunk by chunk.
70
+
71
+ You can improve I/O overheads by increasing the chunk value but that
72
+ would also increase the memory consumption.
73
+
74
+ :return: an iterator over a list of elements.
75
+ :rtype: :class:`collections.Iterable`
76
+ """
77
+
78
+ processed = 0
79
+ while True :
80
+ next_key = self .key + '/' + str (processed )
81
+ items = [
82
+ item for item in self .iter (
83
+ count = chunksize , start = next_key , * args , ** kwargs )
84
+ ]
85
+ yield items
86
+ processed += len (items )
87
+ if len (items ) < chunksize :
88
+ break
Original file line number Diff line number Diff line change @@ -36,3 +36,20 @@ def test_items_list(spider, json_and_msgpack):
36
36
assert o [0 ] == {'id' : 0 , 'data' : 'data0' }
37
37
assert o [1 ] == {'id' : 1 , 'data' : 'data1' }
38
38
assert o [2 ] == {'id' : 2 , 'data' : 'data2' }
39
+
40
+
41
+ def test_items_iter_by_chunks (spider , json_and_msgpack ):
42
+ job = spider .jobs .run (meta = {'state' : 'running' })
43
+ _add_test_items (job )
44
+
45
+ o = job .items .iter_by_chunks (2 )
46
+ assert next (o ) == [
47
+ {'id' : 0 , 'data' : 'data0' },
48
+ {'id' : 1 , 'data' : 'data1' },
49
+ ]
50
+ assert next (o ) == [
51
+ {'id' : 2 , 'data' : 'data2' },
52
+ ]
53
+ next (o )
54
+ with pytest .raises (StopIteration ):
55
+ next (o )
You can’t perform that action at this time.
0 commit comments