diff --git a/cookbook/admin.py b/cookbook/admin.py index 864c7793..01ac8bb2 100644 --- a/cookbook/admin.py +++ b/cookbook/admin.py @@ -88,10 +88,11 @@ admin.site.register(Step, StepAdmin) @admin.action(description='Rebuild index for selected recipes') def rebuild_index(modeladmin, request, queryset): with scopes_disabled(): - search_vector = ( - SearchVector('name__unaccent', weight='A') - + SearchVector('description__unaccent', weight='B')) - queryset.update(search_vector=search_vector) + Recipe.objects.all().update( + name_search_vector=SearchVector('name__unaccent', weight='A'), + desc_search_vector=SearchVector('description__unaccent', weight='B') + ) + Step.objects.all().update(search_vector=SearchVector('instruction__unaccent', weight='B')) class RecipeAdmin(admin.ModelAdmin): diff --git a/cookbook/helper/recipe_search.py b/cookbook/helper/recipe_search.py index 54443745..7938d3f1 100644 --- a/cookbook/helper/recipe_search.py +++ b/cookbook/helper/recipe_search.py @@ -1,13 +1,11 @@ from datetime import datetime, timedelta -from functools import reduce from recipes import settings from django.contrib.postgres.aggregates import StringAgg from django.contrib.postgres.search import ( - SearchQuery, SearchRank, SearchVector, TrigramSimilarity, + SearchQuery, SearchRank, SearchVector, ) -from django.db.models import Q, Case, When, Value -from django.forms import IntegerField +from django.db.models import Q, Case, When, Value, Count, Sum from django.utils import translation from cookbook.models import ViewLog @@ -54,7 +52,9 @@ def search_recipes(request, queryset, params): created_at__gte=(datetime.now() - timedelta(days=7)), then=Value(100)), default=Value(0), )).order_by('-new_recipe', 'name') + rank_results = False if settings.DATABASES['default']['ENGINE'] in ['django.db.backends.postgresql_psycopg2', 'django.db.backends.postgresql'] and search_string != '': + rank_results = True # queryset = queryset.annotate(similarity=TrigramSimilarity('name', search_string), ) # .filter(Q(similarity__gt=0.1) | Q(name__unaccent__icontains=search_string)).order_by('-similarity') language = DICTIONARY.get(translation.get_language(), 'simple') @@ -66,34 +66,41 @@ def search_recipes(request, queryset, params): # TODO create user options to add/remove query elements from search so that they can fine tune their own experience # trigrams, icontains, unaccent and startswith all impact results and performance significantly search_vectors = ( - SearchVector('search_vector') - # searching instruction is extremely slow - # TODO add search vector field, GIN index and save signal to update the vector on step save - # + SearchVector('steps__instruction', weight='D', config=language) - + SearchVector(StringAgg('steps__ingredients__food__name__unaccent', delimiter=' '), weight='B', config=language) - + SearchVector(StringAgg('keywords__name__unaccent', delimiter=' '), weight='B', config=language)) - trigram = ( - TrigramSimilarity('name__unaccent', search_string) - + TrigramSimilarity('description__unaccent', search_string) - # adding trigrams to ingredients and keywords causes duplicate results that can't be made unique - # + TrigramSimilarity('steps__ingredients__food__name__unaccent', search_string) - # + TrigramSimilarity('keywords__name__unaccent', search_string) + # SearchVector('search_vector') <-- this can be searched like a field + SearchVector(StringAgg('steps__ingredients__food__name__unaccent', delimiter=' '), weight='B') + + SearchVector(StringAgg('keywords__name__unaccent', delimiter=' '), weight='B') + ) + # trigrams don't seem to add anything and severely limit accuracy of results. + # TODO add trigrams as an on/off feature + # trigram = ( + # TrigramSimilarity('name__unaccent', search_string) + # + TrigramSimilarity('description__unaccent', search_string) + # # adding trigrams to ingredients and keywords causes duplicate results that can't be made unique + # + TrigramSimilarity('steps__ingredients__food__name__unaccent', search_string) + # + TrigramSimilarity('keywords__name__unaccent', search_string) + # ) + search_rank = ( + SearchRank('name_search_vector', search_query) + + SearchRank('desc_search_vector', search_query) + + SearchRank('steps__search_vector', search_query) + + SearchRank(search_vectors, search_query) ) - search_rank = SearchRank(search_vectors, search_query) queryset = ( queryset.annotate( vector=search_vectors, - rank=search_rank + trigram, - trigram=trigram + rank=search_rank ) .filter( # vector=search_query - Q(vector=search_query) + Q(name_search_vector=search_query) + | Q(desc_search_vector=search_query) + | Q(steps__search_vector=search_query) + | Q(vector=search_query) # adding trigrams to ingredients causes duplicate results that can't be made unique # | Q(trigram__gt=0.2) | Q(name__istartswith=search_string) ) - .order_by('-rank')) + ) else: queryset = queryset.filter(name__icontains=search_string) @@ -118,12 +125,14 @@ def search_recipes(request, queryset, params): for k in search_books: queryset = queryset.filter(recipebookentry__book__id=k) - queryset = queryset.distinct() - if search_internal == 'true': queryset = queryset.filter(internal=True) + queryset = queryset.distinct() + if search_random == 'true': queryset = queryset.order_by("?") + elif rank_results: + queryset = queryset.order_by('-rank') return queryset diff --git a/cookbook/management/commands/rebuildindex.py b/cookbook/management/commands/rebuildindex.py index e3f33665..d9e20430 100644 --- a/cookbook/management/commands/rebuildindex.py +++ b/cookbook/management/commands/rebuildindex.py @@ -4,7 +4,7 @@ from django.core.management.base import BaseCommand from django_scopes import scopes_disabled from django.utils.translation import gettext_lazy as _ -from cookbook.models import Recipe +from cookbook.models import Recipe, Step # can be executed at the command line with 'python manage.py rebuildindex' @@ -17,10 +17,11 @@ class Command(BaseCommand): try: with scopes_disabled(): - search_vector = ( - SearchVector('name__unaccent', weight='A') - + SearchVector('description__unaccent', weight='B')) - Recipe.objects.all().update(search_vector=search_vector) + Recipe.objects.all().update( + name_search_vector=SearchVector('name__unaccent', weight='A'), + desc_search_vector=SearchVector('description__unaccent', weight='B') + ) + Step.objects.all().update(search_vector=SearchVector('instruction__unaccent', weight='B')) self.stdout.write(self.style.SUCCESS(_('Recipe index rebuild complete.'))) except: diff --git a/cookbook/migrations/0122_build_full_text_index.py b/cookbook/migrations/0122_build_full_text_index.py index f1d84d0d..b1628586 100644 --- a/cookbook/migrations/0122_build_full_text_index.py +++ b/cookbook/migrations/0122_build_full_text_index.py @@ -4,17 +4,19 @@ from django.contrib.postgres.indexes import GinIndex from django.contrib.postgres.search import SearchVectorField, SearchVector from django.db import migrations from django_scopes import scopes_disabled -from cookbook.models import Recipe +from cookbook.models import Recipe, Step def set_default_search_vector(apps, schema_editor): if settings.DATABASES['default']['ENGINE'] not in ['django.db.backends.postgresql_psycopg2', 'django.db.backends.postgresql']: return with scopes_disabled(): - search_vector = ( - SearchVector('name__unaccent', weight='A') - + SearchVector('description__unaccent', weight='B')) - Recipe.objects.all().update(search_vector=search_vector) + # TODO add language + Recipe.objects.all().update( + name_search_vector=SearchVector('name__unaccent', weight='A'), + desc_search_vector=SearchVector('description__unaccent', weight='B') + ) + Step.objects.all().update(search_vector=SearchVector('instruction__unaccent', weight='B')) class Migration(migrations.Migration): @@ -24,12 +26,26 @@ class Migration(migrations.Migration): operations = [ migrations.AddField( model_name='recipe', - name='search_vector', + name='desc_search_vector', + field=SearchVectorField(null=True), + ), + migrations.AddField( + model_name='recipe', + name='name_search_vector', field=SearchVectorField(null=True), ), migrations.AddIndex( model_name='recipe', - index=GinIndex(fields=['search_vector'], name='cookbook_re_search__404e46_gin'), + index=GinIndex(fields=['name_search_vector', 'desc_search_vector'], name='cookbook_re_name_se_bdf3ca_gin'), + ), + migrations.AddField( + model_name='step', + name='search_vector', + field=SearchVectorField(null=True), + ), + migrations.AddIndex( + model_name='step', + index=GinIndex(fields=['search_vector'], name='cookbook_st_search__2ef7fa_gin'), ), migrations.RunPython( set_default_search_vector diff --git a/cookbook/models.py b/cookbook/models.py index fd17f3b9..f7a81a3f 100644 --- a/cookbook/models.py +++ b/cookbook/models.py @@ -344,6 +344,7 @@ class Step(ExportModelOperationsMixin('step'), models.Model, PermissionModelMixi time = models.IntegerField(default=0, blank=True) order = models.IntegerField(default=0) show_as_header = models.BooleanField(default=True) + search_vector = SearchVectorField(null=True) objects = ScopedManager(space='recipe__space') @@ -360,6 +361,7 @@ class Step(ExportModelOperationsMixin('step'), models.Model, PermissionModelMixi class Meta: ordering = ['order', 'pk'] + indexes = (GinIndex(fields=["search_vector"]),) class NutritionInformation(models.Model, PermissionModelMixin): @@ -411,7 +413,8 @@ class Recipe(models.Model, PermissionModelMixin): created_at = models.DateTimeField(auto_now_add=True) updated_at = models.DateTimeField(auto_now=True) - search_vector = SearchVectorField(null=True) + name_search_vector = SearchVectorField(null=True) + desc_search_vector = SearchVectorField(null=True) space = models.ForeignKey(Space, on_delete=models.CASCADE) # load custom manager for full text search if postgress is available @@ -424,7 +427,7 @@ class Recipe(models.Model, PermissionModelMixin): return self.name class Meta(): - indexes = (GinIndex(fields=["search_vector"]),) + indexes = (GinIndex(fields=["name_search_vector", "desc_search_vector"]),) class Comment(ExportModelOperationsMixin('comment'), models.Model, PermissionModelMixin): diff --git a/cookbook/serializer.py b/cookbook/serializer.py index 5016bef8..043e8251 100644 --- a/cookbook/serializer.py +++ b/cookbook/serializer.py @@ -177,7 +177,7 @@ class UnitSerializer(UniqueFieldsMixin, serializers.ModelSerializer): def create(self, validated_data): obj, created = Unit.objects.get_or_create(name=validated_data['name'].strip(), space=self.context['request'].space) return obj - + def update(self, instance, validated_data): validated_data['name'] = validated_data['name'].strip() return super(UnitSerializer, self).update(instance, validated_data) diff --git a/cookbook/signals.py b/cookbook/signals.py index 45f1863c..0395fc71 100644 --- a/cookbook/signals.py +++ b/cookbook/signals.py @@ -3,10 +3,11 @@ from django.db.models.signals import post_save from django.dispatch import receiver from django.utils import translation -from cookbook.models import Recipe +from cookbook.models import Recipe, Step from cookbook.managers import DICTIONARY +# TODO there is probably a way to generalize this @receiver(post_save, sender=Recipe) def update_recipe_search_vector(sender, instance=None, created=False, **kwargs): if not instance: @@ -17,10 +18,26 @@ def update_recipe_search_vector(sender, instance=None, created=False, **kwargs): return language = DICTIONARY.get(translation.get_language(), 'simple') - instance.search_vector = ( - SearchVector('name__unaccent', weight='A', config=language) - + SearchVector('description__unaccent', weight='C', config=language) - ) + instance.name_search_vector = SearchVector('name__unaccent', weight='A', config=language) + instance.desc_search_vector = SearchVector('description__unaccent', weight='C', config=language) + + try: + instance._dirty = True + instance.save() + finally: + del instance._dirty + + +@receiver(post_save, sender=Step) +def update_step_search_vector(sender, instance=None, created=False, **kwargs): + if not instance: + return + + # needed to ensure search vector update doesn't trigger recursion + if hasattr(instance, '_dirty'): + return + + instance.search_vector = SearchVector('instruction__unaccent', weight='B') try: instance._dirty = True