From 48f5642e045f5cf4ea893d534b5ef60bdff0c42c Mon Sep 17 00:00:00 2001
From: smilerz <smilerz@gmail.com>
Date: Sat, 29 May 2021 20:14:35 -0500
Subject: [PATCH] foundation for user configurable search paramaters

---
 cookbook/admin.py                             |  10 +-
 cookbook/helper/recipe_search.py              | 151 ++++++++++--------
 cookbook/management/commands/rebuildindex.py  |   9 +-
 .../migrations/0122_build_full_text_index.py  |  13 +-
 cookbook/signals.py                           |   5 +-
 5 files changed, 109 insertions(+), 79 deletions(-)

diff --git a/cookbook/admin.py b/cookbook/admin.py
index a5188ab6..174451f1 100644
--- a/cookbook/admin.py
+++ b/cookbook/admin.py
@@ -4,6 +4,7 @@ from django.contrib.postgres.search import SearchVector
 from django.contrib.auth.admin import UserAdmin
 from django.contrib.auth.models import User, Group
 from django_scopes import scopes_disabled
+from django.utils import translation
 
 from .models import (Comment, CookLog, Food, Ingredient, InviteLink, Keyword,
                      MealPlan, MealType, NutritionInformation, Recipe,
@@ -13,6 +14,8 @@ from .models import (Comment, CookLog, Food, Ingredient, InviteLink, Keyword,
                      ViewLog, Supermarket, SupermarketCategory, SupermarketCategoryRelation,
                      ImportLog, TelegramBot, BookmarkletImport, UserFile)
 
+from cookbook.managers import DICTIONARY
+
 
 class CustomUserAdmin(UserAdmin):
     def has_add_permission(self, request, obj=None):
@@ -96,12 +99,13 @@ admin.site.register(Step, StepAdmin)
 
 @admin.action(description='Rebuild index for selected recipes')
 def rebuild_index(modeladmin, request, queryset):
+    language = DICTIONARY.get(translation.get_language(), 'simple')
     with scopes_disabled():
         Recipe.objects.all().update(
-            name_search_vector=SearchVector('name__unaccent', weight='A'),
-            desc_search_vector=SearchVector('description__unaccent', weight='B')
+            name_search_vector=SearchVector('name__unaccent', weight='A', config=language),
+            desc_search_vector=SearchVector('description__unaccent', weight='B', config=language)
         )
-        Step.objects.all().update(search_vector=SearchVector('instruction__unaccent', weight='B'))
+        Step.objects.all().update(search_vector=SearchVector('instruction__unaccent', weight='B', config=language))
 
 
 class RecipeAdmin(admin.ModelAdmin):
diff --git a/cookbook/helper/recipe_search.py b/cookbook/helper/recipe_search.py
index 9f45b123..929ed2ff 100644
--- a/cookbook/helper/recipe_search.py
+++ b/cookbook/helper/recipe_search.py
@@ -1,32 +1,25 @@
 from datetime import datetime, timedelta
 
 from recipes import settings
-from django.contrib.postgres.aggregates import StringAgg
 from django.contrib.postgres.search import (
-    SearchQuery, SearchRank, SearchVector,
+    SearchQuery, SearchRank, TrigramSimilarity
 )
-from django.db.models import Q, Case, When, Value, Count, Sum
+from django.db.models import Q, Subquery, Case, When, Value
 from django.utils import translation
 
-from cookbook.models import ViewLog
-
-
-DICTIONARY = {
-    # TODO find custom dictionaries - maybe from here https://www.postgresql.org/message-id/CAF4Au4x6X_wSXFwsQYE8q5o0aQZANrvYjZJ8uOnsiHDnOVPPEg%40mail.gmail.com
-    # 'hy': 'Armenian',
-    # 'ca': 'Catalan',
-    # 'cs': 'Czech',
-    'nl': 'dutch',
-    'en': 'english',
-    'fr': 'french',
-    'de': 'german',
-    'it': 'italian',
-    # 'lv': 'Latvian',
-    'es': 'spanish',
-}
+from cookbook.managers import DICTIONARY
+from cookbook.models import Food, Keyword, ViewLog
 
 
 def search_recipes(request, queryset, params):
+    fields = {
+        'name': 'name',
+        'description': 'description',
+        'instructions': 'steps__instruction',
+        'foods': 'steps__ingredients__food__name',
+        'keywords': 'keywords__name'
+    }
+
     search_string = params.get('query', '')
     search_keywords = params.getlist('keywords', [])
     search_foods = params.getlist('foods', [])
@@ -53,54 +46,78 @@ def search_recipes(request, queryset, params):
             created_at__gte=(datetime.now() - timedelta(days=7)), then=Value(100)),
             default=Value(0), )).order_by('-new_recipe', 'name')
 
-    rank_results = False
-    if settings.DATABASES['default']['ENGINE'] in ['django.db.backends.postgresql_psycopg2', 'django.db.backends.postgresql'] and search_string != '':
-        rank_results = True
-        # queryset = queryset.annotate(similarity=TrigramSimilarity('name', search_string), )
-        # .filter(Q(similarity__gt=0.1) | Q(name__unaccent__icontains=search_string)).order_by('-similarity')
-        language = DICTIONARY.get(translation.get_language(), 'simple')
-        search_query = SearchQuery(
-            search_string,
-            search_type="websearch",
-            config=language,
-        )
-        # TODO create user options to add/remove query elements from search so that they can fine tune their own experience
-        # trigrams, icontains, unaccent and startswith all impact results and performance significantly
-        search_vectors = (
-            # SearchVector('search_vector') <-- this can be searched like a field
-            SearchVector(StringAgg('steps__ingredients__food__name__unaccent', delimiter=' '), weight='B')
-            + SearchVector(StringAgg('keywords__name__unaccent', delimiter=' '), weight='B')
-        )
-        # trigrams don't seem to add anything and severely limit accuracy of results.
-        # TODO add trigrams as an on/off feature
-        # trigram = (
-        #     TrigramSimilarity('name__unaccent', search_string)
-        #     + TrigramSimilarity('description__unaccent', search_string)
-        #     # adding trigrams to ingredients and keywords causes duplicate results that can't be made unique
-        #     + TrigramSimilarity('steps__ingredients__food__name__unaccent', search_string)
-        #     + TrigramSimilarity('keywords__name__unaccent', search_string)
-        # )
-        search_rank = (
-            SearchRank('name_search_vector', search_query)
-            + SearchRank('desc_search_vector', search_query)
-            + SearchRank('steps__search_vector', search_query)
-            + SearchRank(search_vectors, search_query)
-        )
-        queryset = (
-            queryset.annotate(
-                vector=search_vectors
+    search_type = None
+    search_sort = None
+    if len(search_string) > 0:
+        # TODO move all of these to settings somewhere - probably user settings
+
+        unaccent_include = ['name', 'description', 'instructions', 'keywords', 'foods']  # can also contain: description, instructions, keywords, foods
+        # TODO when setting up settings length of arrays below must be >=1
+
+        icontains_include = []  # can contain: name, description, instructions, keywords, foods
+        istartswith_include = ['name']  # can also contain: description, instructions, keywords, foods
+        trigram_include = ['name', 'description', 'instructions']  # only these choices - keywords and foods are really, really, really slow maybe add to subquery?
+        fulltext_include = ['name', 'description', 'instructions', 'foods', 'keywords']
+
+        # END OF SETTINGS SECTION
+        for f in unaccent_include:
+            fields[f] += '__unaccent'
+
+        filters = []
+        for f in icontains_include:
+            filters += [Q(**{"%s__icontains" % fields[f]: search_string})]
+
+        for f in istartswith_include:
+            filters += [Q(**{"%s__istartswith" % fields[f]: search_string})]
+
+        if settings.DATABASES['default']['ENGINE'] in ['django.db.backends.postgresql_psycopg2', 'django.db.backends.postgresql']:
+            language = DICTIONARY.get(translation.get_language(), 'simple')
+            # django full text search https://docs.djangoproject.com/en/3.2/ref/contrib/postgres/search/#searchquery
+            search_type = 'websearch'  # other postgress options are phrase or plain or raw (websearch and trigrams are mutually exclusive)
+            search_trigram = False
+            search_query = SearchQuery(
+                search_string,
+                search_type=search_type,
+                config=language,
             )
-            .filter(
-                # vector=search_query
-                Q(name_search_vector=search_query)
-                | Q(desc_search_vector=search_query)
-                | Q(steps__search_vector=search_query)
-                | Q(vector=search_query)
-                | Q(name__istartswith=search_string)
-            ).annotate(rank=search_rank)
-        )
-    else:
-        queryset = queryset.filter(name__icontains=search_string)
+
+            # iterate through fields to use in trigrams generating a single trigram
+            if search_trigram & len(trigram_include) > 1:
+                trigram = None
+                for f in trigram_include:
+                    if trigram:
+                        trigram += TrigramSimilarity(fields[f], search_string)
+                    else:
+                        trigram = TrigramSimilarity(fields[f], search_string)
+                queryset.annotate(simularity=trigram)
+                filters += [Q(simularity__gt=0.5)]
+
+            if 'name' in fulltext_include:
+                filters += [Q(name_search_vector=search_query)]
+            if 'description' in fulltext_include:
+                filters += [Q(desc_search_vector=search_query)]
+            if 'instructions' in fulltext_include:
+                filters += [Q(steps__search_vector=search_query)]
+            if 'keywords' in fulltext_include:
+                filters += [Q(keywords__in=Subquery(Keyword.objects.filter(name__search=search_query).values_list('id', flat=True)))]
+            if 'foods' in fulltext_include:
+                filters += [Q(steps__ingredients__food__in=Subquery(Food.objects.filter(name__search=search_query).values_list('id', flat=True)))]
+            query_filter = None
+            for f in filters:
+                if query_filter:
+                    query_filter |= f
+                else:
+                    query_filter = f
+
+            # TODO this is kind of a dumb method to sort.  create settings to choose rank vs most often made, date created or rating
+            search_rank = (
+                SearchRank('name_search_vector', search_query, cover_density=True)
+                + SearchRank('desc_search_vector', search_query, cover_density=True)
+                + SearchRank('steps__search_vector', search_query, cover_density=True)
+            )
+            queryset = queryset.filter(query_filter).annotate(rank=search_rank)
+        else:
+            queryset = queryset.filter(query_filter)
 
     if len(search_keywords) > 0:
         if search_keywords_or == 'true':
@@ -130,7 +147,7 @@ def search_recipes(request, queryset, params):
 
     if search_random == 'true':
         queryset = queryset.order_by("?")
-    elif rank_results:
+    elif search_sort == 'rank':
         queryset = queryset.order_by('-rank')
 
     return queryset
diff --git a/cookbook/management/commands/rebuildindex.py b/cookbook/management/commands/rebuildindex.py
index d9e20430..9da12c57 100644
--- a/cookbook/management/commands/rebuildindex.py
+++ b/cookbook/management/commands/rebuildindex.py
@@ -2,8 +2,10 @@ from django.conf import settings
 from django.contrib.postgres.search import SearchVector
 from django.core.management.base import BaseCommand
 from django_scopes import scopes_disabled
+from django.utils import translation
 from django.utils.translation import gettext_lazy as _
 
+from cookbook.managers import DICTIONARY
 from cookbook.models import Recipe, Step
 
 
@@ -16,12 +18,13 @@ class Command(BaseCommand):
             self.stdout.write(self.style.WARNING(_('Only Postgress databases use full text search, no index to rebuild')))
 
         try:
+            language = DICTIONARY.get(translation.get_language(), 'simple')
             with scopes_disabled():
                 Recipe.objects.all().update(
-                    name_search_vector=SearchVector('name__unaccent', weight='A'),
-                    desc_search_vector=SearchVector('description__unaccent', weight='B')
+                    name_search_vector=SearchVector('name__unaccent', weight='A', config=language),
+                    desc_search_vector=SearchVector('description__unaccent', weight='B', config=language)
                 )
-                Step.objects.all().update(search_vector=SearchVector('instruction__unaccent', weight='B'))
+                Step.objects.all().update(search_vector=SearchVector('instruction__unaccent', weight='B', config=language))
 
                 self.stdout.write(self.style.SUCCESS(_('Recipe index rebuild complete.')))
         except:
diff --git a/cookbook/migrations/0122_build_full_text_index.py b/cookbook/migrations/0122_build_full_text_index.py
index b1628586..18c96747 100644
--- a/cookbook/migrations/0122_build_full_text_index.py
+++ b/cookbook/migrations/0122_build_full_text_index.py
@@ -4,18 +4,23 @@ from django.contrib.postgres.indexes import GinIndex
 from django.contrib.postgres.search import SearchVectorField, SearchVector
 from django.db import migrations
 from django_scopes import scopes_disabled
+from django.utils import translation
+from cookbook.managers import DICTIONARY
 from cookbook.models import Recipe, Step
 
 
+
+
 def set_default_search_vector(apps, schema_editor):
     if settings.DATABASES['default']['ENGINE'] not in ['django.db.backends.postgresql_psycopg2', 'django.db.backends.postgresql']:
         return
+    language = DICTIONARY.get(translation.get_language(), 'simple')
     with scopes_disabled():
-        # TODO add language
+        # TODO this approach doesn't work terribly well if multiple languages are in use
         Recipe.objects.all().update(
-            name_search_vector=SearchVector('name__unaccent', weight='A'),
-            desc_search_vector=SearchVector('description__unaccent', weight='B')
-            )
+            name_search_vector=SearchVector('name__unaccent', weight='A', config=language),
+            desc_search_vector=SearchVector('description__unaccent', weight='B', config=language)
+        )
         Step.objects.all().update(search_vector=SearchVector('instruction__unaccent', weight='B'))
 
 
diff --git a/cookbook/signals.py b/cookbook/signals.py
index 0395fc71..dc820c11 100644
--- a/cookbook/signals.py
+++ b/cookbook/signals.py
@@ -19,7 +19,7 @@ def update_recipe_search_vector(sender, instance=None, created=False, **kwargs):
 
     language = DICTIONARY.get(translation.get_language(), 'simple')
     instance.name_search_vector = SearchVector('name__unaccent', weight='A', config=language)
-    instance.desc_search_vector =  SearchVector('description__unaccent', weight='C', config=language)
+    instance.desc_search_vector = SearchVector('description__unaccent', weight='C', config=language)
 
     try:
         instance._dirty = True
@@ -37,7 +37,8 @@ def update_step_search_vector(sender, instance=None, created=False, **kwargs):
     if hasattr(instance, '_dirty'):
         return
 
-    instance.search_vector = SearchVector('instruction__unaccent', weight='B')
+    language = DICTIONARY.get(translation.get_language(), 'simple')
+    instance.search_vector = SearchVector('instruction__unaccent', weight='B', config=language)
 
     try:
         instance._dirty = True