Skip to content

Commit 78d3bff

Browse files
committed
update_index: don't use workers to remove stale records
There was only minimal gain to this because, unlike indexing, removal is a simple bulk operation limited by the search engine. See django-haystack#1194 See django-haystack#1201
1 parent cb87e9b commit 78d3bff

File tree

1 file changed

+20
-40
lines changed

1 file changed

+20
-40
lines changed

haystack/management/commands/update_index.py

Lines changed: 20 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -71,8 +71,8 @@ def worker(bits):
7171
if func == 'do_update':
7272
qs = index.build_queryset(start_date=start_date, end_date=end_date)
7373
do_update(backend, index, qs, start, end, total, verbosity=verbosity, commit=commit)
74-
elif bits[0] == 'do_remove':
75-
do_remove(backend, index, model, pks_seen, start, upper_bound, verbosity=verbosity, commit=commit)
74+
else:
75+
raise NotImplementedError('Unknown function %s' % func)
7676

7777

7878
def do_update(backend, index, qs, start, end, total, verbosity=1, commit=True):
@@ -94,26 +94,6 @@ def do_update(backend, index, qs, start, end, total, verbosity=1, commit=True):
9494
reset_queries()
9595

9696

97-
def do_remove(backend, index, model, pks_seen, start, upper_bound, verbosity=1, commit=True):
98-
# Retrieve PKs from the index. Note that this cannot be a range query because keys can be non-numeric
99-
# UUIDs or other custom values.
100-
# To reduce load on the search engine, we only retrieve the pk field, which will be checked against the
101-
# full list obtained from the database, and the id field, which will be used to delete the record should
102-
# it be found to be stale.
103-
index_pks = SearchQuerySet(using=backend.connection_alias).models(model).values_list('pk', 'id')
104-
105-
# Compare the pks from the index to the list obtained from the database:
106-
for pk, rec_id in index_pks[start:upper_bound]:
107-
if smart_bytes(pk) in pks_seen:
108-
continue
109-
110-
# Since the PK was not in the database list, we'll delete the record from the search index:
111-
if verbosity >= 2:
112-
print(" removing %s." % rec_id)
113-
114-
backend.remove(rec_id, commit=commit)
115-
116-
11797
class Command(LabelCommand):
11898
help = "Freshens the index for the given app(s)."
11999
base_options = (
@@ -256,10 +236,6 @@ def update_backend(self, label, using):
256236
pool.join()
257237

258238
if self.remove:
259-
# Close the database connection to avoid a “MySQL has gone away” error
260-
# when using workers:
261-
db.close_connection()
262-
263239
if self.start_date or self.end_date or total <= 0:
264240
# They're using a reduced set, which may not incorporate
265241
# all pks. Rebuild the list with everything.
@@ -270,9 +246,6 @@ def update_backend(self, label, using):
270246
else:
271247
pks_seen = set(smart_bytes(pk) for pk in qs.values_list('pk', flat=True))
272248

273-
if self.workers > 0:
274-
ghetto_queue = []
275-
276249
# Since records may still be in the search index but not the local database
277250
# we'll use that to create batches for processing.
278251
# See https://github.com/django-haystack/django-haystack/issues/1186
@@ -281,14 +254,21 @@ def update_backend(self, label, using):
281254
for start in range(0, index_total, batch_size):
282255
upper_bound = start + batch_size
283256

284-
if self.workers == 0:
285-
do_remove(backend, index, model, pks_seen, start, upper_bound,
286-
verbosity=self.verbosity, commit=self.commit)
287-
else:
288-
ghetto_queue.append(('do_remove', model, pks_seen, start, upper_bound, using,
289-
self.verbosity, self.commit))
290-
291-
if self.workers > 0:
292-
pool = multiprocessing.Pool(self.workers)
293-
pool.map(worker, ghetto_queue)
294-
pool.terminate()
257+
# Retrieve PKs from the index. Note that this cannot be a range query because keys can be
258+
# non-numeric UUIDs or other custom values. To reduce load on the search engine, we only
259+
# retrieve the pk field, which will be checked against the full list obtained from the
260+
# database, and the id field, which will be used to delete the record should it be found
261+
# to be stale.
262+
index_pks = SearchQuerySet(using=backend.connection_alias).models(model).values_list('pk',
263+
'id')
264+
265+
# Compare the pks from the index to the list obtained from the database:
266+
for pk, rec_id in index_pks[start:upper_bound]:
267+
if smart_bytes(pk) in pks_seen:
268+
continue
269+
270+
# Since the PK was not in the database list, we'll delete the record from the search index:
271+
if self.verbosity >= 2:
272+
print(" removing %s." % rec_id)
273+
274+
backend.remove(rec_id, commit=self.commit)

0 commit comments

Comments
 (0)