From 48f5642e045f5cf4ea893d534b5ef60bdff0c42c Mon Sep 17 00:00:00 2001 From: smilerz Date: Sat, 29 May 2021 20:14:35 -0500 Subject: [PATCH] foundation for user configurable search paramaters --- cookbook/admin.py | 10 +- cookbook/helper/recipe_search.py | 151 ++++++++++-------- cookbook/management/commands/rebuildindex.py | 9 +- .../migrations/0122_build_full_text_index.py | 13 +- cookbook/signals.py | 5 +- 5 files changed, 109 insertions(+), 79 deletions(-) diff --git a/cookbook/admin.py b/cookbook/admin.py index a5188ab6..174451f1 100644 --- a/cookbook/admin.py +++ b/cookbook/admin.py @@ -4,6 +4,7 @@ from django.contrib.postgres.search import SearchVector from django.contrib.auth.admin import UserAdmin from django.contrib.auth.models import User, Group from django_scopes import scopes_disabled +from django.utils import translation from .models import (Comment, CookLog, Food, Ingredient, InviteLink, Keyword, MealPlan, MealType, NutritionInformation, Recipe, @@ -13,6 +14,8 @@ from .models import (Comment, CookLog, Food, Ingredient, InviteLink, Keyword, ViewLog, Supermarket, SupermarketCategory, SupermarketCategoryRelation, ImportLog, TelegramBot, BookmarkletImport, UserFile) +from cookbook.managers import DICTIONARY + class CustomUserAdmin(UserAdmin): def has_add_permission(self, request, obj=None): @@ -96,12 +99,13 @@ admin.site.register(Step, StepAdmin) @admin.action(description='Rebuild index for selected recipes') def rebuild_index(modeladmin, request, queryset): + language = DICTIONARY.get(translation.get_language(), 'simple') with scopes_disabled(): Recipe.objects.all().update( - name_search_vector=SearchVector('name__unaccent', weight='A'), - desc_search_vector=SearchVector('description__unaccent', weight='B') + name_search_vector=SearchVector('name__unaccent', weight='A', config=language), + desc_search_vector=SearchVector('description__unaccent', weight='B', config=language) ) - Step.objects.all().update(search_vector=SearchVector('instruction__unaccent', weight='B')) + Step.objects.all().update(search_vector=SearchVector('instruction__unaccent', weight='B', config=language)) class RecipeAdmin(admin.ModelAdmin): diff --git a/cookbook/helper/recipe_search.py b/cookbook/helper/recipe_search.py index 9f45b123..929ed2ff 100644 --- a/cookbook/helper/recipe_search.py +++ b/cookbook/helper/recipe_search.py @@ -1,32 +1,25 @@ from datetime import datetime, timedelta from recipes import settings -from django.contrib.postgres.aggregates import StringAgg from django.contrib.postgres.search import ( - SearchQuery, SearchRank, SearchVector, + SearchQuery, SearchRank, TrigramSimilarity ) -from django.db.models import Q, Case, When, Value, Count, Sum +from django.db.models import Q, Subquery, Case, When, Value from django.utils import translation -from cookbook.models import ViewLog - - -DICTIONARY = { - # TODO find custom dictionaries - maybe from here https://www.postgresql.org/message-id/CAF4Au4x6X_wSXFwsQYE8q5o0aQZANrvYjZJ8uOnsiHDnOVPPEg%40mail.gmail.com - # 'hy': 'Armenian', - # 'ca': 'Catalan', - # 'cs': 'Czech', - 'nl': 'dutch', - 'en': 'english', - 'fr': 'french', - 'de': 'german', - 'it': 'italian', - # 'lv': 'Latvian', - 'es': 'spanish', -} +from cookbook.managers import DICTIONARY +from cookbook.models import Food, Keyword, ViewLog def search_recipes(request, queryset, params): + fields = { + 'name': 'name', + 'description': 'description', + 'instructions': 'steps__instruction', + 'foods': 'steps__ingredients__food__name', + 'keywords': 'keywords__name' + } + search_string = params.get('query', '') search_keywords = params.getlist('keywords', []) search_foods = params.getlist('foods', []) @@ -53,54 +46,78 @@ def search_recipes(request, queryset, params): created_at__gte=(datetime.now() - timedelta(days=7)), then=Value(100)), default=Value(0), )).order_by('-new_recipe', 'name') - rank_results = False - if settings.DATABASES['default']['ENGINE'] in ['django.db.backends.postgresql_psycopg2', 'django.db.backends.postgresql'] and search_string != '': - rank_results = True - # queryset = queryset.annotate(similarity=TrigramSimilarity('name', search_string), ) - # .filter(Q(similarity__gt=0.1) | Q(name__unaccent__icontains=search_string)).order_by('-similarity') - language = DICTIONARY.get(translation.get_language(), 'simple') - search_query = SearchQuery( - search_string, - search_type="websearch", - config=language, - ) - # TODO create user options to add/remove query elements from search so that they can fine tune their own experience - # trigrams, icontains, unaccent and startswith all impact results and performance significantly - search_vectors = ( - # SearchVector('search_vector') <-- this can be searched like a field - SearchVector(StringAgg('steps__ingredients__food__name__unaccent', delimiter=' '), weight='B') - + SearchVector(StringAgg('keywords__name__unaccent', delimiter=' '), weight='B') - ) - # trigrams don't seem to add anything and severely limit accuracy of results. - # TODO add trigrams as an on/off feature - # trigram = ( - # TrigramSimilarity('name__unaccent', search_string) - # + TrigramSimilarity('description__unaccent', search_string) - # # adding trigrams to ingredients and keywords causes duplicate results that can't be made unique - # + TrigramSimilarity('steps__ingredients__food__name__unaccent', search_string) - # + TrigramSimilarity('keywords__name__unaccent', search_string) - # ) - search_rank = ( - SearchRank('name_search_vector', search_query) - + SearchRank('desc_search_vector', search_query) - + SearchRank('steps__search_vector', search_query) - + SearchRank(search_vectors, search_query) - ) - queryset = ( - queryset.annotate( - vector=search_vectors + search_type = None + search_sort = None + if len(search_string) > 0: + # TODO move all of these to settings somewhere - probably user settings + + unaccent_include = ['name', 'description', 'instructions', 'keywords', 'foods'] # can also contain: description, instructions, keywords, foods + # TODO when setting up settings length of arrays below must be >=1 + + icontains_include = [] # can contain: name, description, instructions, keywords, foods + istartswith_include = ['name'] # can also contain: description, instructions, keywords, foods + trigram_include = ['name', 'description', 'instructions'] # only these choices - keywords and foods are really, really, really slow maybe add to subquery? + fulltext_include = ['name', 'description', 'instructions', 'foods', 'keywords'] + + # END OF SETTINGS SECTION + for f in unaccent_include: + fields[f] += '__unaccent' + + filters = [] + for f in icontains_include: + filters += [Q(**{"%s__icontains" % fields[f]: search_string})] + + for f in istartswith_include: + filters += [Q(**{"%s__istartswith" % fields[f]: search_string})] + + if settings.DATABASES['default']['ENGINE'] in ['django.db.backends.postgresql_psycopg2', 'django.db.backends.postgresql']: + language = DICTIONARY.get(translation.get_language(), 'simple') + # django full text search https://docs.djangoproject.com/en/3.2/ref/contrib/postgres/search/#searchquery + search_type = 'websearch' # other postgress options are phrase or plain or raw (websearch and trigrams are mutually exclusive) + search_trigram = False + search_query = SearchQuery( + search_string, + search_type=search_type, + config=language, ) - .filter( - # vector=search_query - Q(name_search_vector=search_query) - | Q(desc_search_vector=search_query) - | Q(steps__search_vector=search_query) - | Q(vector=search_query) - | Q(name__istartswith=search_string) - ).annotate(rank=search_rank) - ) - else: - queryset = queryset.filter(name__icontains=search_string) + + # iterate through fields to use in trigrams generating a single trigram + if search_trigram & len(trigram_include) > 1: + trigram = None + for f in trigram_include: + if trigram: + trigram += TrigramSimilarity(fields[f], search_string) + else: + trigram = TrigramSimilarity(fields[f], search_string) + queryset.annotate(simularity=trigram) + filters += [Q(simularity__gt=0.5)] + + if 'name' in fulltext_include: + filters += [Q(name_search_vector=search_query)] + if 'description' in fulltext_include: + filters += [Q(desc_search_vector=search_query)] + if 'instructions' in fulltext_include: + filters += [Q(steps__search_vector=search_query)] + if 'keywords' in fulltext_include: + filters += [Q(keywords__in=Subquery(Keyword.objects.filter(name__search=search_query).values_list('id', flat=True)))] + if 'foods' in fulltext_include: + filters += [Q(steps__ingredients__food__in=Subquery(Food.objects.filter(name__search=search_query).values_list('id', flat=True)))] + query_filter = None + for f in filters: + if query_filter: + query_filter |= f + else: + query_filter = f + + # TODO this is kind of a dumb method to sort. create settings to choose rank vs most often made, date created or rating + search_rank = ( + SearchRank('name_search_vector', search_query, cover_density=True) + + SearchRank('desc_search_vector', search_query, cover_density=True) + + SearchRank('steps__search_vector', search_query, cover_density=True) + ) + queryset = queryset.filter(query_filter).annotate(rank=search_rank) + else: + queryset = queryset.filter(query_filter) if len(search_keywords) > 0: if search_keywords_or == 'true': @@ -130,7 +147,7 @@ def search_recipes(request, queryset, params): if search_random == 'true': queryset = queryset.order_by("?") - elif rank_results: + elif search_sort == 'rank': queryset = queryset.order_by('-rank') return queryset diff --git a/cookbook/management/commands/rebuildindex.py b/cookbook/management/commands/rebuildindex.py index d9e20430..9da12c57 100644 --- a/cookbook/management/commands/rebuildindex.py +++ b/cookbook/management/commands/rebuildindex.py @@ -2,8 +2,10 @@ from django.conf import settings from django.contrib.postgres.search import SearchVector from django.core.management.base import BaseCommand from django_scopes import scopes_disabled +from django.utils import translation from django.utils.translation import gettext_lazy as _ +from cookbook.managers import DICTIONARY from cookbook.models import Recipe, Step @@ -16,12 +18,13 @@ class Command(BaseCommand): self.stdout.write(self.style.WARNING(_('Only Postgress databases use full text search, no index to rebuild'))) try: + language = DICTIONARY.get(translation.get_language(), 'simple') with scopes_disabled(): Recipe.objects.all().update( - name_search_vector=SearchVector('name__unaccent', weight='A'), - desc_search_vector=SearchVector('description__unaccent', weight='B') + name_search_vector=SearchVector('name__unaccent', weight='A', config=language), + desc_search_vector=SearchVector('description__unaccent', weight='B', config=language) ) - Step.objects.all().update(search_vector=SearchVector('instruction__unaccent', weight='B')) + Step.objects.all().update(search_vector=SearchVector('instruction__unaccent', weight='B', config=language)) self.stdout.write(self.style.SUCCESS(_('Recipe index rebuild complete.'))) except: diff --git a/cookbook/migrations/0122_build_full_text_index.py b/cookbook/migrations/0122_build_full_text_index.py index b1628586..18c96747 100644 --- a/cookbook/migrations/0122_build_full_text_index.py +++ b/cookbook/migrations/0122_build_full_text_index.py @@ -4,18 +4,23 @@ from django.contrib.postgres.indexes import GinIndex from django.contrib.postgres.search import SearchVectorField, SearchVector from django.db import migrations from django_scopes import scopes_disabled +from django.utils import translation +from cookbook.managers import DICTIONARY from cookbook.models import Recipe, Step + + def set_default_search_vector(apps, schema_editor): if settings.DATABASES['default']['ENGINE'] not in ['django.db.backends.postgresql_psycopg2', 'django.db.backends.postgresql']: return + language = DICTIONARY.get(translation.get_language(), 'simple') with scopes_disabled(): - # TODO add language + # TODO this approach doesn't work terribly well if multiple languages are in use Recipe.objects.all().update( - name_search_vector=SearchVector('name__unaccent', weight='A'), - desc_search_vector=SearchVector('description__unaccent', weight='B') - ) + name_search_vector=SearchVector('name__unaccent', weight='A', config=language), + desc_search_vector=SearchVector('description__unaccent', weight='B', config=language) + ) Step.objects.all().update(search_vector=SearchVector('instruction__unaccent', weight='B')) diff --git a/cookbook/signals.py b/cookbook/signals.py index 0395fc71..dc820c11 100644 --- a/cookbook/signals.py +++ b/cookbook/signals.py @@ -19,7 +19,7 @@ def update_recipe_search_vector(sender, instance=None, created=False, **kwargs): language = DICTIONARY.get(translation.get_language(), 'simple') instance.name_search_vector = SearchVector('name__unaccent', weight='A', config=language) - instance.desc_search_vector = SearchVector('description__unaccent', weight='C', config=language) + instance.desc_search_vector = SearchVector('description__unaccent', weight='C', config=language) try: instance._dirty = True @@ -37,7 +37,8 @@ def update_step_search_vector(sender, instance=None, created=False, **kwargs): if hasattr(instance, '_dirty'): return - instance.search_vector = SearchVector('instruction__unaccent', weight='B') + language = DICTIONARY.get(translation.get_language(), 'simple') + instance.search_vector = SearchVector('instruction__unaccent', weight='B', config=language) try: instance._dirty = True