Merge branch 'master' of github.com:toastdriven/django-haystack

jezdez · jezdez · commit cd233d6c3d6e · 2013-05-10T16:01:44.000-07:00
diff --git a/docs/searchquery_api.rst b/docs/searchquery_api.rst
@@ -250,6 +250,12 @@ to the rest of the ``SearchQuerySet``.
 Allows backends with support for "More Like This" to return results
 similar to the provided instance.
 
+``add_stats_query``
+~~~~~~~~~~~~~~~~~~~
+.. method:: SearchQuery.add_stats_query(self,stats_field,stats_facets)
+
+Adds stats and stats_facets queries for the Solr backend.
+
 ``add_highlight``
 ~~~~~~~~~~~~~~~~~
 
diff --git a/docs/searchqueryset_api.rst b/docs/searchqueryset_api.rst
@@ -360,9 +360,40 @@ Spatial: Adds a distance-based search to the query.
 
 See the :ref:`ref-spatial` docs for more information.
 
+``stats``
+~~~~~~~~~
+
+.. method:: SearchQuerySet.stats(self, field):
+
+Adds stats to a query for the provided field. This is supported on
+Solr only. You provide the field (from one of the ``SearchIndex``
+classes) you would like stats on.
+
+In the search results you get back, stats will be populated in the
+``SearchResult`` object. You can access them via the `` stats_results`` method.
+
+Example::
+
+    # Get stats on the author field.
+    SearchQuerySet().filter(content='foo').stats('author')
+
+``stats_facet``
+~~~~~~~~~~~~~~~
+.. method:: SearchQuerySet.stats_facet(self, field,
+.. facet_fields=None):
+
+Adds stats facet for the given field and facet_fields represents the
+faceted fields. This is supported on Solr only.
+
+Example::
+
+    # Get stats on the author field, and stats on the author field
+    faceted by bookstore.
+    SearchQuerySet().filter(content='foo').stats_facet('author','bookstore')
+
+
 ``distance``
 ~~~~~~~~~~~~
-
 .. method:: SearchQuerySet.distance(self, field, point):
 
 Spatial: Denotes results must have distance measurements from the
@@ -632,6 +663,52 @@ Example::
     #     'queries': {}
     # }
 
+``stats_results``
+~~~~~~~~~~~~~~~~~
+
+.. method:: SearchQuerySet.stats_results(self):
+ 
+Returns the stats results found by the query.
+
+ This will cause the query to
+execute and should generally be used when presenting the data (template-level).
+
+You receive back a dictionary with three keys: ``fields``, ``dates`` and
+``queries``. Each contains the facet counts for whatever facets you specified
+within your ``SearchQuerySet``.
+
+.. note::
+
+    The resulting dictionary may change before 1.0 release. It's fairly
+    backend-specific at the time of writing. Standardizing is waiting on
+    implementing other backends that support faceting and ensuring that the
+    results presented will meet their needs as well.
+
+Example::
+
+    # Count document hits for each author.
+    sqs = SearchQuerySet().filter(content='foo').stats('price')
+
+    sqs.stats_results()
+
+    # Gives the following response
+    # {
+    #    'stats_fields':{
+    #       'author:{
+    #            'min': 0.0, 
+    #            'max': 2199.0,  
+    #            'sum': 5251.2699999999995,
+    #            'count': 15,
+    #            'missing': 11,
+    #            'sumOfSquares': 6038619.160300001,
+    #            'mean': 350.08466666666664,
+    #            'stddev': 547.737557906113
+    #        }
+    #    }
+    #    
+    # }
+
+
 ``spelling_suggestion``
 ~~~~~~~~~~~~~~~~~~~~~~~
 
diff --git a/haystack/backends/__init__.py b/haystack/backends/__init__.py
@@ -7,7 +7,7 @@
 from django.utils import tree
 from django.utils.encoding import force_unicode
 from haystack.constants import VALID_FILTERS, FILTER_SEPARATOR, DEFAULT_ALIAS
-from haystack.exceptions import MoreLikeThisError, FacetingError
+from haystack.exceptions import MoreLikeThisError, FacetingError, StatsError
 from haystack.models import SearchResult
 from haystack.utils.loading import UnifiedIndex
 
@@ -312,9 +312,10 @@ def __init__(self, using=DEFAULT_ALIAS):
         self._results = None
         self._hit_count = None
         self._facet_counts = None
+        self._stats = None
         self._spelling_suggestion = None
         self.result_class = SearchResult
-
+        self.stats = {}
         from haystack import connections
         self._using = using
         self.backend = connections[self._using].get_backend()
@@ -497,6 +498,17 @@ def get_facet_counts(self):
 
         return self._facet_counts
 
+    def get_stats(self):
+        """
+        Returns the stats received from the backend.
+
+        If the query has not been run, this will execute the query and store
+        the results
+        """
+        if self._stats is None:
+            self.run()
+        return self._stats
+
     def get_spelling_suggestion(self, preferred_query=None):
         """
         Returns the spelling suggestion received from the backend.
@@ -693,6 +705,10 @@ def more_like_this(self, model_instance):
         self._more_like_this = True
         self._mlt_instance = model_instance
 
+    def add_stats_query(self,stats_field,stats_facets):
+        """Adds stats and stats_facets queries for the Solr backend."""
+        self.stats[stats_field] = stats_facets
+
     def add_highlight(self):
         """Adds highlighting to the search results."""
         self.highlight = True
@@ -826,6 +842,7 @@ def _clone(self, klass=None, using=None):
         clone.models = self.models.copy()
         clone.boost = self.boost.copy()
         clone.highlight = self.highlight
+        clone.stats = self.stats.copy()
         clone.facets = self.facets.copy()
         clone.date_facets = self.date_facets.copy()
         clone.query_facets = self.query_facets[:]
@@ -838,6 +855,7 @@ def _clone(self, klass=None, using=None):
         clone.distance_point = self.distance_point.copy()
         clone._raw_query = self._raw_query
         clone._raw_query_params = self._raw_query_params
+
         return clone
 
 
diff --git a/haystack/backends/elasticsearch_backend.py b/haystack/backends/elasticsearch_backend.py
@@ -222,9 +222,6 @@ def clear(self, models=[], commit=True):
                 # a ``query`` root object. :/
                 query = {'query_string': {'query': " OR ".join(models_to_delete)}}
                 self.conn.delete_by_query(self.index_name, 'modelresult', query)
-
-            if commit:
-                self.conn.refresh(index=self.index_name)
         except (requests.RequestException, pyelasticsearch.ElasticHttpError), e:
             if not self.silently_fail:
                 raise
diff --git a/haystack/backends/solr_backend.py b/haystack/backends/solr_backend.py
@@ -136,7 +136,7 @@ def build_search_kwargs(self, query_string, sort_by=None, start_offset=0, end_of
                             narrow_queries=None, spelling_query=None,
                             within=None, dwithin=None, distance_point=None,
                             models=None, limit_to_registered_models=None,
-                            result_class=None):
+                            result_class=None, stats=None):
         kwargs = {'fl': '* score'}
 
         if fields:
@@ -230,6 +230,15 @@ def build_search_kwargs(self, query_string, sort_by=None, start_offset=0, end_of
         if narrow_queries is not None:
             kwargs['fq'] = list(narrow_queries)
 
+        if stats:
+            kwargs['stats'] = "true"
+
+            for k in stats.keys():
+                kwargs['stats.field'] = k
+
+                for facet in stats[k]:
+                    kwargs['f.%s.stats.facet' % k] = facet
+
         if within is not None:
             from haystack.utils.geo import generate_bounding_box
 
@@ -324,11 +333,15 @@ def _process_results(self, raw_results, highlight=False, result_class=None, dist
         results = []
         hits = raw_results.hits
         facets = {}
+        stats = {}
         spelling_suggestion = None
 
         if result_class is None:
             result_class = SearchResult
 
+        if hasattr(raw_results,'stats'):
+            stats = raw_results.stats.get('stats_fields',{})
+
         if hasattr(raw_results, 'facets'):
             facets = {
                 'fields': raw_results.facets.get('facet_fields', {}),
@@ -391,6 +404,7 @@ def _process_results(self, raw_results, highlight=False, result_class=None, dist
         return {
             'results': results,
             'hits': hits,
+            'stats': stats,
             'facets': facets,
             'spelling_suggestion': spelling_suggestion,
         }
@@ -612,7 +626,7 @@ def build_params(self, spelling_query=None, **kwargs):
         search_kwargs = {
             'start_offset': self.start_offset,
             'result_class': self.result_class
-        }        
+        }
         order_by_list = None
 
         if self.order_by:
@@ -663,16 +677,21 @@ def build_params(self, spelling_query=None, **kwargs):
         if spelling_query:
             search_kwargs['spelling_query'] = spelling_query
 
+        if self.stats:
+            search_kwargs['stats'] = self.stats
+
         return search_kwargs
-        
+
     def run(self, spelling_query=None, **kwargs):
         """Builds and executes the query. Returns a list of search results."""
         final_query = self.build_query()
         search_kwargs = self.build_params(spelling_query, **kwargs)
+
         results = self.backend.search(final_query, **search_kwargs)
         self._results = results.get('results', [])
         self._hit_count = results.get('hits', 0)
         self._facet_counts = self.post_process_facets(results)
+        self._stats = results.get('stats',{})
         self._spelling_suggestion = results.get('spelling_suggestion', None)
 
     def run_mlt(self, **kwargs):
diff --git a/haystack/exceptions.py b/haystack/exceptions.py
@@ -29,3 +29,7 @@ class FacetingError(HaystackError):
 class SpatialError(HaystackError):
     """Raised when incorrect arguments have been provided for spatial."""
     pass
+
+class StatsError(HaystackError):
+    "Raised when incorrect arguments have been provided for stats"
+    pass
diff --git a/haystack/indexes.py b/haystack/indexes.py
@@ -277,7 +277,7 @@ def remove_object(self, instance, using=None, **kwargs):
         backend = self._get_backend(using)
 
         if backend is not None:
-            backend.remove(instance)
+            backend.remove(instance, **kwargs)
 
     def clear(self, using=None):
         """
diff --git a/haystack/query.py b/haystack/query.py
@@ -78,7 +78,7 @@ def __setstate__(self, data_dict):
 
     def __repr__(self):
         data = list(self[:REPR_OUTPUT_SIZE])
-
+        
         if len(self) > REPR_OUTPUT_SIZE:
             data[-1] = "...(remaining elements truncated)..."
 
@@ -375,7 +375,23 @@ def dwithin(self, field, point, distance):
         clone = self._clone()
         clone.query.add_dwithin(field, point, distance)
         return clone
-
+    
+    def stats(self, field):
+        """Adds stats to a query for the provided field."""
+        return self.stats_facet(field, facet_fields=None)
+    
+    def stats_facet(self, field, facet_fields=None):
+        """Adds stats facet for the given field and facet_fields represents
+        the faceted fields."""
+        clone = self._clone()
+        stats_facets = []
+        try:
+            stats_facets.append(sum(facet_fields,[]))
+        except TypeError:
+            if facet_fields: stats_facets.append(facet_fields)
+        clone.query.add_stats_query(field,stats_facets)
+        return clone
+       
     def distance(self, field, point):
         """
         Spatial: Denotes results must have distance measurements from the
@@ -491,6 +507,16 @@ def facet_counts(self):
             clone = self._clone()
             return clone.query.get_facet_counts()
 
+    def stats_results(self):
+        """
+        Returns the stats results found by the query.
+        """
+        if self.query.has_run():
+            return self.query.get_stats()
+        else:
+            clone = self._clone()
+            return clone.query.get_stats()
+            
     def spelling_suggestion(self, preferred_query=None):
         """
         Returns the spelling suggestion found by the query.
diff --git a/tests/core/tests/indexes.py b/tests/core/tests/indexes.py
@@ -401,6 +401,19 @@ def test_remove_object(self):
 
         self.mi.remove_object(mock)
         self.assertEqual([(res.content_type(), res.pk) for res in self.sb.search('*')['results']], [(u'core.mockmodel', u'1'), (u'core.mockmodel', u'2'), (u'core.mockmodel', u'3')])
+
+        # Put it back so we can test passing kwargs.
+        mock = MockModel()
+        mock.pk = 20
+        mock.author = 'daniel%s' % mock.id
+        mock.pub_date = datetime.datetime(2009, 1, 31, 4, 19, 0)
+
+        self.mi.update_object(mock)
+        self.assertEqual(self.sb.search('*')['hits'], 4)
+
+        self.mi.remove_object(mock, commit=False)
+        self.assertEqual([(res.content_type(), res.pk) for res in self.sb.search('*')['results']], [(u'core.mockmodel', u'1'), (u'core.mockmodel', u'2'), (u'core.mockmodel', u'3'), (u'core.mockmodel', u'20')])
+
         self.sb.clear()
 
     def test_clear(self):
@@ -629,4 +642,4 @@ def test_float_integer_fields(self):
         self.assertTrue('average_delay' in self.yabmsi.fields)
         self.assertTrue(isinstance(self.yabmsi.fields['average_delay'], indexes.FloatField))
         self.assertEqual(self.yabmsi.fields['average_delay'].null, False)
-        self.assertEqual(self.yabmsi.fields['average_delay'].index_fieldname, 'average_delay')        
+        self.assertEqual(self.yabmsi.fields['average_delay'].index_fieldname, 'average_delay')
diff --git a/tests/core/tests/mocks.py b/tests/core/tests/mocks.py
@@ -46,7 +46,8 @@ def update(self, index, iterable, commit=True):
 
     def remove(self, obj, commit=True):
         global MOCK_INDEX_DATA
-        del(MOCK_INDEX_DATA[get_identifier(obj)])
+        if commit == True:
+            del(MOCK_INDEX_DATA[get_identifier(obj)])
 
     def clear(self, models=[], commit=True):
         global MOCK_INDEX_DATA
diff --git a/tests/core/tests/query.py b/tests/core/tests/query.py
diff --git a/tests/solr_tests/tests/solr_backend.py b/tests/solr_tests/tests/solr_backend.py