4
4
from django .core .management .base import AppCommand , CommandError
5
5
from django .db import reset_queries
6
6
from django .utils .encoding import smart_str
7
+ from haystack .query import SearchQuerySet
7
8
try :
8
9
from django .utils import importlib
9
10
except ImportError :
10
11
from haystack .utils import importlib
12
+ try :
13
+ set
14
+ except NameError :
15
+ from sets import Set as set
11
16
12
17
13
18
DEFAULT_BATCH_SIZE = getattr (settings , 'HAYSTACK_BATCH_SIZE' , 1000 )
@@ -28,6 +33,9 @@ class Command(AppCommand):
28
33
make_option ('-s' , '--site' , action = 'store' , dest = 'site' ,
29
34
type = 'string' , help = 'The site object to use when reindexing (like `search_sites.mysite`).'
30
35
),
36
+ make_option ('-r' , '--remove' , action = 'store_true' , dest = 'remove' ,
37
+ default = False , help = 'Remove objects from the index that are no longer present in the database.'
38
+ ),
31
39
)
32
40
option_list = AppCommand .option_list + base_options
33
41
@@ -51,6 +59,7 @@ def handle(self, *apps, **options):
51
59
self .batchsize = options .get ('batchsize' , DEFAULT_BATCH_SIZE )
52
60
self .age = options .get ('age' , DEFAULT_AGE )
53
61
self .site = options .get ('site' )
62
+ self .remove = options .get ('remove' , False )
54
63
55
64
if not apps :
56
65
from django .db .models import get_app
@@ -111,16 +120,52 @@ def handle_app(self, app, **options):
111
120
if self .verbosity >= 1 :
112
121
print "Indexing %d %s." % (total , smart_str (model ._meta .verbose_name_plural ))
113
122
123
+ pks_seen = set ()
124
+
114
125
for start in range (0 , total , self .batchsize ):
115
126
end = min (start + self .batchsize , total )
116
127
117
- if self .verbosity >= 2 :
118
- print " indexing %s - %d of %d." % (start + 1 , end , total )
119
-
120
128
# Get a clone of the QuerySet so that the cache doesn't bloat up
121
129
# in memory. Useful when reindexing large amounts of data.
122
130
small_cache_qs = qs .all ()
123
- index .backend .update (index , small_cache_qs [start :end ])
131
+ current_qs = small_cache_qs [start :end ]
132
+
133
+ for obj in current_qs :
134
+ pks_seen .add (smart_str (obj .pk ))
135
+
136
+ if self .verbosity >= 2 :
137
+ print " indexing %s - %d of %d." % (start + 1 , end , total )
138
+
139
+ index .backend .update (index , current_qs )
124
140
125
141
# Clear out the DB connections queries because it bloats up RAM.
126
142
reset_queries ()
143
+
144
+ if self .remove :
145
+ if self .age or total <= 0 :
146
+ # They're using a reduced set, which may not incorporate
147
+ # all pks. Rebuild the list with everything.
148
+ pks_seen = set ()
149
+ qs = index .get_queryset ().values_list ('pk' , flat = True )
150
+ total = qs .count ()
151
+
152
+ for pk in qs :
153
+ pks_seen .add (smart_str (pk ))
154
+
155
+ for start in range (0 , total , self .batchsize ):
156
+ upper_bound = start + self .batchsize
157
+
158
+ # Fetch a list of results.
159
+ # Can't do pk range, because id's are strings (thanks comments
160
+ # & UUIDs!).
161
+ stuff_in_the_index = SearchQuerySet ().models (model )[start :upper_bound ]
162
+
163
+ # Iterate over those results.
164
+ for result in stuff_in_the_index :
165
+ # Be careful not to hit the DB.
166
+ if not smart_str (result .pk ) in pks_seen :
167
+ # The id is NOT in the small_cache_qs, issue a delete.
168
+ if self .verbosity >= 2 :
169
+ print " removing %s." % result .pk
170
+
171
+ index .backend .remove ("." .join ([result .app_label , result .model_name , result .pk ]))
0 commit comments