odedfos
diff --git a/‎haystack/query.py
Lines changed: 90 additions & 180 deletions b/‎haystack/query.py
Lines changed: 90 additions & 180 deletions
diff --git a/‎test_haystack/elasticsearch_tests/test_elasticsearch_backend.py
Lines changed: 6 additions & 6 deletions b/‎test_haystack/elasticsearch_tests/test_elasticsearch_backend.py
Lines changed: 6 additions & 6 deletions
@@ -154,36 +154,6 @@ def _manual_iter(self):
             if not self._fill_cache(current_position, current_position + ITERATOR_LOAD_PER_QUERY):
                 raise StopIteration
 
-    def _fill_cache(self, start, end, **kwargs):
-        # Tell the query where to start from and how many we'd like.
-        self.query._reset()
-        self.query.set_limits(start, end)
-        results = self.query.get_results(**kwargs)
-
-        if results is None or len(results) == 0:
-            return False
-
-        # Setup the full cache now that we know how many results there are.
-        # We need the ``None``s as placeholders to know what parts of the
-        # cache we have/haven't filled.
-        # Using ``None`` like this takes up very little memory. In testing,
-        # an array of 100,000 ``None``s consumed less than .5 Mb, which ought
-        # to be an acceptable loss for consistent and more efficient caching.
-        if len(self._result_cache) == 0:
-            self._result_cache = [None] * self.query.get_count()
-
-        if start is None:
-            start = 0
-
-        if end is None:
-            end = self.query.get_count()
-
-        to_cache = self.post_process_results(results)
-
-        # Assign by slice.
-        self._result_cache[start:start + len(to_cache)] = to_cache
-        return True
-
     def post_process_results(self, results):
         to_cache = []
 
@@ -198,15 +168,7 @@ def post_process_results(self, results):
 
             # Load the objects for each model in turn.
             for model in models_pks:
-                try:
-                    ui = connections[self.query._using].get_unified_index()
-                    index = ui.get_index(model)
-                    objects = index.read_queryset(using=self.query._using)
-                    loaded_objects[model] = objects.in_bulk(models_pks[model])
-                except NotHandled:
-                    self.log.warning("Model '%s' not handled by the routers", model)
-                    # Revert to old behaviour
-                    loaded_objects[model] = model._default_manager.in_bulk(models_pks[model])
+                loaded_objects[model] = self._load_model_objects(model, models_pks[model])
 
         for result in results:
             if self._load_all:
@@ -223,12 +185,86 @@ def post_process_results(self, results):
                     # The object was either deleted since we indexed or should
                     # be ignored; fail silently.
                     self._ignored_result_count += 1
+
+                    # avoid an unfilled None at the end of the result cache
+                    self._result_cache.pop()
                     continue
 
             to_cache.append(result)
 
         return to_cache
 
+    def _load_model_objects(self, model, pks):
+        try:
+            ui = connections[self.query._using].get_unified_index()
+            index = ui.get_index(model)
+            objects = index.read_queryset(using=self.query._using)
+            return objects.in_bulk(pks)
+        except NotHandled:
+            self.log.warning("Model '%s' not handled by the routers.", model)
+            # Revert to old behaviour
+            return model._default_manager.in_bulk(pks)
+
+    def _fill_cache(self, start, end, **kwargs):
+        # Tell the query where to start from and how many we'd like.
+        self.query._reset()
+
+        if start is None:
+            start = 0
+
+        query_start = start
+        query_start += self._ignored_result_count
+        query_end = end
+        if query_end is not None:
+            query_end += self._ignored_result_count
+
+        self.query.set_limits(query_start, query_end)
+        results = self.query.get_results(**kwargs)
+
+        if results is None or len(results) == 0:
+            # trim missing stuff from the result cache
+            self._result_cache = self._result_cache[:start]
+            return False
+
+        # Setup the full cache now that we know how many results there are.
+        # We need the ``None``s as placeholders to know what parts of the
+        # cache we have/haven't filled.
+        # Using ``None`` like this takes up very little memory. In testing,
+        # an array of 100,000 ``None``s consumed less than .5 Mb, which ought
+        # to be an acceptable loss for consistent and more efficient caching.
+        if len(self._result_cache) == 0:
+            self._result_cache = [None] * self.query.get_count()
+
+        fill_start, fill_end = start, end
+        if fill_end is None:
+            fill_end = self.query.get_count()
+        cache_start = fill_start
+
+        while True:
+            to_cache = self.post_process_results(results)
+
+            # Assign by slice.
+            self._result_cache[cache_start:cache_start + len(to_cache)] = to_cache
+
+            if None in self._result_cache[start:end]:
+                fill_start = fill_end
+                fill_end += ITERATOR_LOAD_PER_QUERY
+                cache_start += len(to_cache)
+
+                # Tell the query where to start from and how many we'd like.
+                self.query._reset()
+                self.query.set_limits(fill_start, fill_end)
+                results = self.query.get_results()
+
+                if results is None or len(results) == 0:
+                    # No more results. Trim missing stuff from the result cache
+                    self._result_cache = self._result_cache[:cache_start]
+                    break
+            else:
+                break
+
+        return True
+
     def __getitem__(self, k):
         """
         Retrieves an item or slice from the set of results.
@@ -665,151 +701,30 @@ def post_process_results(self, results):
 class RelatedSearchQuerySet(SearchQuerySet):
     """
     A variant of the SearchQuerySet that can handle `load_all_queryset`s.
-
-    This is predominantly different in the `_fill_cache` method, as it is
-    far less efficient but needs to fill the cache before it to maintain
-    consistency.
     """
 
     def __init__(self, *args, **kwargs):
         super(RelatedSearchQuerySet, self).__init__(*args, **kwargs)
         self._load_all_querysets = {}
         self._result_cache = []
 
-    def _cache_is_full(self):
-        return len(self._result_cache) >= len(self)
-
-    def _manual_iter(self):
-        # If we're here, our cache isn't fully populated.
-        # For efficiency, fill the cache as we go if we run out of results.
-        # Also, this can't be part of the __iter__ method due to Python's rules
-        # about generator functions.
-        current_position = 0
-        current_cache_max = 0
-
-        while True:
-            current_cache_max = len(self._result_cache)
-
-            while current_position < current_cache_max:
-                yield self._result_cache[current_position]
-                current_position += 1
-
-            if self._cache_is_full():
-                raise StopIteration
-
-            # We've run out of results and haven't hit our limit.
-            # Fill more of the cache.
-            start = current_position + self._ignored_result_count
-
-            if not self._fill_cache(start, start + ITERATOR_LOAD_PER_QUERY):
-                raise StopIteration
-
-    def _fill_cache(self, start, end):
-        # Tell the query where to start from and how many we'd like.
-        self.query._reset()
-        self.query.set_limits(start, end)
-        results = self.query.get_results()
-
-        if len(results) == 0:
-            return False
-
-        if start is None:
-            start = 0
-
-        if end is None:
-            end = self.query.get_count()
-
-        # Check if we wish to load all objects.
-        if self._load_all:
-            models_pks = {}
-            loaded_objects = {}
-
-            # Remember the search position for each result so we don't have to resort later.
-            for result in results:
-                models_pks.setdefault(result.model, []).append(result.pk)
-
-            # Load the objects for each model in turn.
-            for model in models_pks:
-                if model in self._load_all_querysets:
-                    # Use the overriding queryset.
-                    loaded_objects[model] = self._load_all_querysets[model].in_bulk(models_pks[model])
-                else:
-                    # Check the SearchIndex for the model for an override.
-                    try:
-                        index = connections[self.query._using].get_unified_index().get_index(model)
-                        qs = index.load_all_queryset()
-                        loaded_objects[model] = qs.in_bulk(models_pks[model])
-                    except NotHandled:
-                        # The model returned doesn't seem to be handled by the
-                        # routers. We should silently fail and populate
-                        # nothing for those objects.
-                        loaded_objects[model] = []
-
-        if len(results) + len(self._result_cache) < len(self) and len(results) < ITERATOR_LOAD_PER_QUERY:
-            self._ignored_result_count += ITERATOR_LOAD_PER_QUERY - len(results)
-
-        for result in results:
-            if self._load_all:
-                # We have to deal with integer keys being cast from strings; if this
-                # fails we've got a character pk.
-                try:
-                    result.pk = int(result.pk)
-                except ValueError:
-                    pass
-                try:
-                    result._object = loaded_objects[result.model][result.pk]
-                except (KeyError, IndexError):
-                    # The object was either deleted since we indexed or should
-                    # be ignored; fail silently.
-                    self._ignored_result_count += 1
-                    continue
-
-            self._result_cache.append(result)
-
-        return True
-
-    def __getitem__(self, k):
-        """
-        Retrieves an item or slice from the set of results.
-        """
-        if not isinstance(k, (slice, six.integer_types)):
-            raise TypeError
-
-        assert ((not isinstance(k, slice) and (k >= 0))
-                or (isinstance(k, slice) and (k.start is None or k.start >= 0)
-                    and (k.stop is None or k.stop >= 0))), \
-                "Negative indexing is not supported."
-
-        # Remember if it's a slice or not. We're going to treat everything as
-        # a slice to simply the logic and will `.pop()` at the end as needed.
-        if isinstance(k, slice):
-            is_slice = True
-            start = k.start
-
-            if k.stop is not None:
-                bound = int(k.stop)
-            else:
-                bound = None
+    def _load_model_objects(self, model, pks):
+        if model in self._load_all_querysets:
+            # Use the overriding queryset.
+            return self._load_all_querysets[model].in_bulk(pks)
         else:
-            is_slice = False
-            start = k
-            bound = k + 1
+            # Check the SearchIndex for the model for an override.
 
-        # We need check to see if we need to populate more of the cache.
-        if len(self._result_cache) <= 0 or not self._cache_is_full():
             try:
-                while len(self._result_cache) < bound and not self._cache_is_full():
-                    current_max = len(self._result_cache) + self._ignored_result_count
-                    self._fill_cache(current_max, current_max + ITERATOR_LOAD_PER_QUERY)
-            except StopIteration:
-                # There's nothing left, even though the bound is higher.
-                pass
-
-        # Cache should be full enough for our needs.
-        if is_slice:
-            return self._result_cache[start:bound]
-        else:
-            return self._result_cache[start]
+                ui = connections[self.query._using].get_unified_index()
+                index = ui.get_index(model)
+                qs = index.load_all_queryset()
+                return qs.in_bulk(pks)
+            except NotHandled:
+                # The model returned doesn't seem to be handled by the
+                # routers. We should silently fail and populate
+                # nothing for those objects.
+                return {}
 
     def load_all_queryset(self, model, queryset):
         """
@@ -824,11 +739,6 @@ def load_all_queryset(self, model, queryset):
         return clone
 
     def _clone(self, klass=None):
-        if klass is None:
-            klass = self.__class__
-
-        query = self.query._clone()
-        clone = klass(query=query)
-        clone._load_all = self._load_all
+        clone = super(RelatedSearchQuerySet, self)._clone(klass=klass)
         clone._load_all_querysets = self._load_all_querysets
         return clone
@@ -921,26 +921,26 @@ def test_related_iter(self):
         sqs = self.rsqs.all()
         results = set([int(result.pk) for result in sqs])
         self.assertEqual(results, set([2, 7, 12, 17, 1, 6, 11, 16, 23, 5, 10, 15, 22, 4, 9, 14, 19, 21, 3, 8, 13, 18, 20]))
-        self.assertEqual(len(connections['elasticsearch'].queries), 4)
+        self.assertEqual(len(connections['elasticsearch'].queries), 3)
 
     def test_related_slice(self):
         reset_search_queries()
         self.assertEqual(len(connections['elasticsearch'].queries), 0)
         results = self.rsqs.all().order_by('pub_date')
         self.assertEqual([int(result.pk) for result in results[1:11]], [3, 2, 4, 5, 6, 7, 8, 9, 10, 11])
-        self.assertEqual(len(connections['elasticsearch'].queries), 3)
+        self.assertEqual(len(connections['elasticsearch'].queries), 1)
 
         reset_search_queries()
         self.assertEqual(len(connections['elasticsearch'].queries), 0)
         results = self.rsqs.all().order_by('pub_date')
         self.assertEqual(int(results[21].pk), 22)
-        self.assertEqual(len(connections['elasticsearch'].queries), 4)
+        self.assertEqual(len(connections['elasticsearch'].queries), 1)
 
         reset_search_queries()
         self.assertEqual(len(connections['elasticsearch'].queries), 0)
         results = self.rsqs.all().order_by('pub_date')
         self.assertEqual(set([int(result.pk) for result in results[20:30]]), set([21, 22, 23]))
-        self.assertEqual(len(connections['elasticsearch'].queries), 4)
+        self.assertEqual(len(connections['elasticsearch'].queries), 1)
 
     def test_related_manual_iter(self):
         results = self.rsqs.all()
@@ -949,7 +949,7 @@ def test_related_manual_iter(self):
         self.assertEqual(len(connections['elasticsearch'].queries), 0)
         results = sorted([int(result.pk) for result in results._manual_iter()])
         self.assertEqual(results, list(range(1, 24)))
-        self.assertEqual(len(connections['elasticsearch'].queries), 4)
+        self.assertEqual(len(connections['elasticsearch'].queries), 3)
 
     def test_related_fill_cache(self):
         reset_search_queries()
@@ -971,7 +971,7 @@ def test_related_cache_is_full(self):
         results = self.rsqs.all()
         fire_the_iterator_and_fill_cache = [result for result in results]
         self.assertEqual(results._cache_is_full(), True)
-        self.assertEqual(len(connections['elasticsearch'].queries), 5)
+        self.assertEqual(len(connections['elasticsearch'].queries), 3)
 
     def test_quotes_regression(self):
         sqs = self.sqs.auto_query(u"44°48'40''N 20°28'32''E")