foundation for user configurable search paramaters

2021-05-29 20:14:35 -05:00 · 2021-05-29 20:14:35 -05:00 · 48f5642e04
commit 48f5642e04
parent 51cda4c2ff
5 changed files with 109 additions and 79 deletions
--- a/cookbook/admin.py
+++ b/cookbook/admin.py
@ -4,6 +4,7 @@ from django.contrib.postgres.search import SearchVector
 from django.contrib.auth.admin import UserAdmin
 from django.contrib.auth.models import User, Group
 from django_scopes import scopes_disabled
 from django.utils import translation
 from .models import (Comment, CookLog, Food, Ingredient, InviteLink, Keyword,
                     MealPlan, MealType, NutritionInformation, Recipe,
@ -13,6 +14,8 @@ from .models import (Comment, CookLog, Food, Ingredient, InviteLink, Keyword,
                     ViewLog, Supermarket, SupermarketCategory, SupermarketCategoryRelation,
                     ImportLog, TelegramBot, BookmarkletImport, UserFile)
 from cookbook.managers import DICTIONARY
 class CustomUserAdmin(UserAdmin):
    def has_add_permission(self, request, obj=None):
@ -96,12 +99,13 @@ admin.site.register(Step, StepAdmin)
@admin.action(description='Rebuild index for selected recipes')
 def rebuild_index(modeladmin, request, queryset):
    language = DICTIONARY.get(translation.get_language(), 'simple')
    with scopes_disabled():
        Recipe.objects.all().update(
-            name_search_vector=SearchVector('name__unaccent', weight='A'),
+            name_search_vector=SearchVector('name__unaccent', weight='A', config=language),
-            desc_search_vector=SearchVector('description__unaccent', weight='B')
+            desc_search_vector=SearchVector('description__unaccent', weight='B', config=language)
        )
-        Step.objects.all().update(search_vector=SearchVector('instruction__unaccent', weight='B'))
+        Step.objects.all().update(search_vector=SearchVector('instruction__unaccent', weight='B', config=language))
 class RecipeAdmin(admin.ModelAdmin):
--- a/cookbook/helper/recipe_search.py
+++ b/cookbook/helper/recipe_search.py
@ -1,32 +1,25 @@
 from datetime import datetime, timedelta
 from recipes import settings
 from django.contrib.postgres.aggregates import StringAgg
 from django.contrib.postgres.search import (
-    SearchQuery, SearchRank, SearchVector,
+    SearchQuery, SearchRank, TrigramSimilarity
 )
-from django.db.models import Q, Case, When, Value, Count, Sum
+from django.db.models import Q, Subquery, Case, When, Value
 from django.utils import translation
-from cookbook.models import ViewLog
+from cookbook.managers import DICTIONARY
-
+from cookbook.models import Food, Keyword, ViewLog
 DICTIONARY = {
    # TODO find custom dictionaries - maybe from here https://www.postgresql.org/message-id/CAF4Au4x6X_wSXFwsQYE8q5o0aQZANrvYjZJ8uOnsiHDnOVPPEg%40mail.gmail.com
    # 'hy': 'Armenian',
    # 'ca': 'Catalan',
    # 'cs': 'Czech',
    'nl': 'dutch',
    'en': 'english',
    'fr': 'french',
    'de': 'german',
    'it': 'italian',
    # 'lv': 'Latvian',
    'es': 'spanish',
 }
 def search_recipes(request, queryset, params):
    fields = {
        'name': 'name',
        'description': 'description',
        'instructions': 'steps__instruction',
        'foods': 'steps__ingredients__food__name',
        'keywords': 'keywords__name'
    }
    search_string = params.get('query', '')
    search_keywords = params.getlist('keywords', [])
    search_foods = params.getlist('foods', [])
@ -53,54 +46,78 @@ def search_recipes(request, queryset, params):
            created_at__gte=(datetime.now() - timedelta(days=7)), then=Value(100)),
            default=Value(0), )).order_by('-new_recipe', 'name')
-    rank_results = False
+    search_type = None
-    if settings.DATABASES['default']['ENGINE'] in ['django.db.backends.postgresql_psycopg2', 'django.db.backends.postgresql'] and search_string != '':
+    search_sort = None
-        rank_results = True
+    if len(search_string) > 0:
-        # queryset = queryset.annotate(similarity=TrigramSimilarity('name', search_string), )
+        # TODO move all of these to settings somewhere - probably user settings
-        # .filter(Q(similarity__gt=0.1) | Q(name__unaccent__icontains=search_string)).order_by('-similarity')
+
-        language = DICTIONARY.get(translation.get_language(), 'simple')
+        unaccent_include = ['name', 'description', 'instructions', 'keywords', 'foods']  # can also contain: description, instructions, keywords, foods
-        search_query = SearchQuery(
+        # TODO when setting up settings length of arrays below must be >=1
-            search_string,
+
-            search_type="websearch",
+        icontains_include = []  # can contain: name, description, instructions, keywords, foods
-            config=language,
+        istartswith_include = ['name']  # can also contain: description, instructions, keywords, foods
-        )
+        trigram_include = ['name', 'description', 'instructions']  # only these choices - keywords and foods are really, really, really slow maybe add to subquery?
-        # TODO create user options to add/remove query elements from search so that they can fine tune their own experience
+        fulltext_include = ['name', 'description', 'instructions', 'foods', 'keywords']
-        # trigrams, icontains, unaccent and startswith all impact results and performance significantly
+
-        search_vectors = (
+        # END OF SETTINGS SECTION
-            # SearchVector('search_vector') <-- this can be searched like a field
+        for f in unaccent_include:
-            SearchVector(StringAgg('steps__ingredients__food__name__unaccent', delimiter=' '), weight='B')
+            fields[f] += '__unaccent'
-            + SearchVector(StringAgg('keywords__name__unaccent', delimiter=' '), weight='B')
+
-        )
+        filters = []
-        # trigrams don't seem to add anything and severely limit accuracy of results.
+        for f in icontains_include:
-        # TODO add trigrams as an on/off feature
+            filters += [Q(**{"%s__icontains" % fields[f]: search_string})]
-        # trigram = (
+
-        #     TrigramSimilarity('name__unaccent', search_string)
+        for f in istartswith_include:
-        #     + TrigramSimilarity('description__unaccent', search_string)
+            filters += [Q(**{"%s__istartswith" % fields[f]: search_string})]
-        #     # adding trigrams to ingredients and keywords causes duplicate results that can't be made unique
+
-        #     + TrigramSimilarity('steps__ingredients__food__name__unaccent', search_string)
+        if settings.DATABASES['default']['ENGINE'] in ['django.db.backends.postgresql_psycopg2', 'django.db.backends.postgresql']:
-        #     + TrigramSimilarity('keywords__name__unaccent', search_string)
+            language = DICTIONARY.get(translation.get_language(), 'simple')
-        # )
+            # django full text search https://docs.djangoproject.com/en/3.2/ref/contrib/postgres/search/#searchquery
-        search_rank = (
+            search_type = 'websearch'  # other postgress options are phrase or plain or raw (websearch and trigrams are mutually exclusive)
-            SearchRank('name_search_vector', search_query)
+            search_trigram = False
-            + SearchRank('desc_search_vector', search_query)
+            search_query = SearchQuery(
-            + SearchRank('steps__search_vector', search_query)
+                search_string,
-            + SearchRank(search_vectors, search_query)
+                search_type=search_type,
-        )
+                config=language,
        queryset = (
            queryset.annotate(
                vector=search_vectors
            )
-            .filter(
+
-                # vector=search_query
+            # iterate through fields to use in trigrams generating a single trigram
-                Q(name_search_vector=search_query)
+            if search_trigram & len(trigram_include) > 1:
-                | Q(desc_search_vector=search_query)
+                trigram = None
-                | Q(steps__search_vector=search_query)
+                for f in trigram_include:
-                | Q(vector=search_query)
+                    if trigram:
-                | Q(name__istartswith=search_string)
+                        trigram += TrigramSimilarity(fields[f], search_string)
-            ).annotate(rank=search_rank)
+                    else:
-        )
+                        trigram = TrigramSimilarity(fields[f], search_string)
-    else:
+                queryset.annotate(simularity=trigram)
-        queryset = queryset.filter(name__icontains=search_string)
+                filters += [Q(simularity__gt=0.5)]
            if 'name' in fulltext_include:
                filters += [Q(name_search_vector=search_query)]
            if 'description' in fulltext_include:
                filters += [Q(desc_search_vector=search_query)]
            if 'instructions' in fulltext_include:
                filters += [Q(steps__search_vector=search_query)]
            if 'keywords' in fulltext_include:
                filters += [Q(keywords__in=Subquery(Keyword.objects.filter(name__search=search_query).values_list('id', flat=True)))]
            if 'foods' in fulltext_include:
                filters += [Q(steps__ingredients__food__in=Subquery(Food.objects.filter(name__search=search_query).values_list('id', flat=True)))]
            query_filter = None
            for f in filters:
                if query_filter:
                    query_filter |= f
                else:
                    query_filter = f
            # TODO this is kind of a dumb method to sort.  create settings to choose rank vs most often made, date created or rating
            search_rank = (
                SearchRank('name_search_vector', search_query, cover_density=True)
                + SearchRank('desc_search_vector', search_query, cover_density=True)
                + SearchRank('steps__search_vector', search_query, cover_density=True)
            )
            queryset = queryset.filter(query_filter).annotate(rank=search_rank)
        else:
            queryset = queryset.filter(query_filter)
    if len(search_keywords) > 0:
        if search_keywords_or == 'true':
@ -130,7 +147,7 @@ def search_recipes(request, queryset, params):
    if search_random == 'true':
        queryset = queryset.order_by("?")
-    elif rank_results:
+    elif search_sort == 'rank':
        queryset = queryset.order_by('-rank')
    return queryset
--- a/cookbook/management/commands/rebuildindex.py
+++ b/cookbook/management/commands/rebuildindex.py
@ -2,8 +2,10 @@ from django.conf import settings
 from django.contrib.postgres.search import SearchVector
 from django.core.management.base import BaseCommand
 from django_scopes import scopes_disabled
 from django.utils import translation
 from django.utils.translation import gettext_lazy as _
 from cookbook.managers import DICTIONARY
 from cookbook.models import Recipe, Step
@ -16,12 +18,13 @@ class Command(BaseCommand):
            self.stdout.write(self.style.WARNING(_('Only Postgress databases use full text search, no index to rebuild')))
        try:
            language = DICTIONARY.get(translation.get_language(), 'simple')
            with scopes_disabled():
                Recipe.objects.all().update(
-                    name_search_vector=SearchVector('name__unaccent', weight='A'),
+                    name_search_vector=SearchVector('name__unaccent', weight='A', config=language),
-                    desc_search_vector=SearchVector('description__unaccent', weight='B')
+                    desc_search_vector=SearchVector('description__unaccent', weight='B', config=language)
                )
-                Step.objects.all().update(search_vector=SearchVector('instruction__unaccent', weight='B'))
+                Step.objects.all().update(search_vector=SearchVector('instruction__unaccent', weight='B', config=language))
                self.stdout.write(self.style.SUCCESS(_('Recipe index rebuild complete.')))
        except:
--- a/cookbook/migrations/0122_build_full_text_index.py
+++ b/cookbook/migrations/0122_build_full_text_index.py
@ -4,18 +4,23 @@ from django.contrib.postgres.indexes import GinIndex
 from django.contrib.postgres.search import SearchVectorField, SearchVector
 from django.db import migrations
 from django_scopes import scopes_disabled
 from django.utils import translation
 from cookbook.managers import DICTIONARY
 from cookbook.models import Recipe, Step
 def set_default_search_vector(apps, schema_editor):
    if settings.DATABASES['default']['ENGINE'] not in ['django.db.backends.postgresql_psycopg2', 'django.db.backends.postgresql']:
        return
    language = DICTIONARY.get(translation.get_language(), 'simple')
    with scopes_disabled():
-        # TODO add language
+        # TODO this approach doesn't work terribly well if multiple languages are in use
        Recipe.objects.all().update(
-            name_search_vector=SearchVector('name__unaccent', weight='A'),
+            name_search_vector=SearchVector('name__unaccent', weight='A', config=language),
-            desc_search_vector=SearchVector('description__unaccent', weight='B')
+            desc_search_vector=SearchVector('description__unaccent', weight='B', config=language)
-            )
+        )
        Step.objects.all().update(search_vector=SearchVector('instruction__unaccent', weight='B'))
--- a/cookbook/signals.py
+++ b/cookbook/signals.py
@ -19,7 +19,7 @@ def update_recipe_search_vector(sender, instance=None, created=False, **kwargs):
    language = DICTIONARY.get(translation.get_language(), 'simple')
    instance.name_search_vector = SearchVector('name__unaccent', weight='A', config=language)
-    instance.desc_search_vector =  SearchVector('description__unaccent', weight='C', config=language)
+    instance.desc_search_vector = SearchVector('description__unaccent', weight='C', config=language)
    try:
        instance._dirty = True
@ -37,7 +37,8 @@ def update_step_search_vector(sender, instance=None, created=False, **kwargs):
    if hasattr(instance, '_dirty'):
        return
-    instance.search_vector = SearchVector('instruction__unaccent', weight='B')
+    language = DICTIONARY.get(translation.get_language(), 'simple')
    instance.search_vector = SearchVector('instruction__unaccent', weight='B', config=language)
    try:
        instance._dirty = True