split search vectors for recipe name and description

This commit is contained in:
smilerz 2021-05-28 11:37:07 -05:00
parent e170ae983b
commit 1df6ca685a
7 changed files with 94 additions and 47 deletions

View File

@ -88,10 +88,11 @@ admin.site.register(Step, StepAdmin)
@admin.action(description='Rebuild index for selected recipes') @admin.action(description='Rebuild index for selected recipes')
def rebuild_index(modeladmin, request, queryset): def rebuild_index(modeladmin, request, queryset):
with scopes_disabled(): with scopes_disabled():
search_vector = ( Recipe.objects.all().update(
SearchVector('name__unaccent', weight='A') name_search_vector=SearchVector('name__unaccent', weight='A'),
+ SearchVector('description__unaccent', weight='B')) desc_search_vector=SearchVector('description__unaccent', weight='B')
queryset.update(search_vector=search_vector) )
Step.objects.all().update(search_vector=SearchVector('instruction__unaccent', weight='B'))
class RecipeAdmin(admin.ModelAdmin): class RecipeAdmin(admin.ModelAdmin):

View File

@ -1,13 +1,11 @@
from datetime import datetime, timedelta from datetime import datetime, timedelta
from functools import reduce
from recipes import settings from recipes import settings
from django.contrib.postgres.aggregates import StringAgg from django.contrib.postgres.aggregates import StringAgg
from django.contrib.postgres.search import ( from django.contrib.postgres.search import (
SearchQuery, SearchRank, SearchVector, TrigramSimilarity, SearchQuery, SearchRank, SearchVector,
) )
from django.db.models import Q, Case, When, Value from django.db.models import Q, Case, When, Value, Count, Sum
from django.forms import IntegerField
from django.utils import translation from django.utils import translation
from cookbook.models import ViewLog from cookbook.models import ViewLog
@ -54,7 +52,9 @@ def search_recipes(request, queryset, params):
created_at__gte=(datetime.now() - timedelta(days=7)), then=Value(100)), created_at__gte=(datetime.now() - timedelta(days=7)), then=Value(100)),
default=Value(0), )).order_by('-new_recipe', 'name') default=Value(0), )).order_by('-new_recipe', 'name')
rank_results = False
if settings.DATABASES['default']['ENGINE'] in ['django.db.backends.postgresql_psycopg2', 'django.db.backends.postgresql'] and search_string != '': if settings.DATABASES['default']['ENGINE'] in ['django.db.backends.postgresql_psycopg2', 'django.db.backends.postgresql'] and search_string != '':
rank_results = True
# queryset = queryset.annotate(similarity=TrigramSimilarity('name', search_string), ) # queryset = queryset.annotate(similarity=TrigramSimilarity('name', search_string), )
# .filter(Q(similarity__gt=0.1) | Q(name__unaccent__icontains=search_string)).order_by('-similarity') # .filter(Q(similarity__gt=0.1) | Q(name__unaccent__icontains=search_string)).order_by('-similarity')
language = DICTIONARY.get(translation.get_language(), 'simple') language = DICTIONARY.get(translation.get_language(), 'simple')
@ -66,34 +66,41 @@ def search_recipes(request, queryset, params):
# TODO create user options to add/remove query elements from search so that they can fine tune their own experience # TODO create user options to add/remove query elements from search so that they can fine tune their own experience
# trigrams, icontains, unaccent and startswith all impact results and performance significantly # trigrams, icontains, unaccent and startswith all impact results and performance significantly
search_vectors = ( search_vectors = (
SearchVector('search_vector') # SearchVector('search_vector') <-- this can be searched like a field
# searching instruction is extremely slow SearchVector(StringAgg('steps__ingredients__food__name__unaccent', delimiter=' '), weight='B')
# TODO add search vector field, GIN index and save signal to update the vector on step save + SearchVector(StringAgg('keywords__name__unaccent', delimiter=' '), weight='B')
# + SearchVector('steps__instruction', weight='D', config=language) )
+ SearchVector(StringAgg('steps__ingredients__food__name__unaccent', delimiter=' '), weight='B', config=language) # trigrams don't seem to add anything and severely limit accuracy of results.
+ SearchVector(StringAgg('keywords__name__unaccent', delimiter=' '), weight='B', config=language)) # TODO add trigrams as an on/off feature
trigram = ( # trigram = (
TrigramSimilarity('name__unaccent', search_string) # TrigramSimilarity('name__unaccent', search_string)
+ TrigramSimilarity('description__unaccent', search_string) # + TrigramSimilarity('description__unaccent', search_string)
# adding trigrams to ingredients and keywords causes duplicate results that can't be made unique # # adding trigrams to ingredients and keywords causes duplicate results that can't be made unique
# + TrigramSimilarity('steps__ingredients__food__name__unaccent', search_string) # + TrigramSimilarity('steps__ingredients__food__name__unaccent', search_string)
# + TrigramSimilarity('keywords__name__unaccent', search_string) # + TrigramSimilarity('keywords__name__unaccent', search_string)
# )
search_rank = (
SearchRank('name_search_vector', search_query)
+ SearchRank('desc_search_vector', search_query)
+ SearchRank('steps__search_vector', search_query)
+ SearchRank(search_vectors, search_query)
) )
search_rank = SearchRank(search_vectors, search_query)
queryset = ( queryset = (
queryset.annotate( queryset.annotate(
vector=search_vectors, vector=search_vectors,
rank=search_rank + trigram, rank=search_rank
trigram=trigram
) )
.filter( .filter(
# vector=search_query # vector=search_query
Q(vector=search_query) Q(name_search_vector=search_query)
| Q(desc_search_vector=search_query)
| Q(steps__search_vector=search_query)
| Q(vector=search_query)
# adding trigrams to ingredients causes duplicate results that can't be made unique # adding trigrams to ingredients causes duplicate results that can't be made unique
# | Q(trigram__gt=0.2) # | Q(trigram__gt=0.2)
| Q(name__istartswith=search_string) | Q(name__istartswith=search_string)
) )
.order_by('-rank')) )
else: else:
queryset = queryset.filter(name__icontains=search_string) queryset = queryset.filter(name__icontains=search_string)
@ -118,12 +125,14 @@ def search_recipes(request, queryset, params):
for k in search_books: for k in search_books:
queryset = queryset.filter(recipebookentry__book__id=k) queryset = queryset.filter(recipebookentry__book__id=k)
queryset = queryset.distinct()
if search_internal == 'true': if search_internal == 'true':
queryset = queryset.filter(internal=True) queryset = queryset.filter(internal=True)
queryset = queryset.distinct()
if search_random == 'true': if search_random == 'true':
queryset = queryset.order_by("?") queryset = queryset.order_by("?")
elif rank_results:
queryset = queryset.order_by('-rank')
return queryset return queryset

View File

@ -4,7 +4,7 @@ from django.core.management.base import BaseCommand
from django_scopes import scopes_disabled from django_scopes import scopes_disabled
from django.utils.translation import gettext_lazy as _ from django.utils.translation import gettext_lazy as _
from cookbook.models import Recipe from cookbook.models import Recipe, Step
# can be executed at the command line with 'python manage.py rebuildindex' # can be executed at the command line with 'python manage.py rebuildindex'
@ -17,10 +17,11 @@ class Command(BaseCommand):
try: try:
with scopes_disabled(): with scopes_disabled():
search_vector = ( Recipe.objects.all().update(
SearchVector('name__unaccent', weight='A') name_search_vector=SearchVector('name__unaccent', weight='A'),
+ SearchVector('description__unaccent', weight='B')) desc_search_vector=SearchVector('description__unaccent', weight='B')
Recipe.objects.all().update(search_vector=search_vector) )
Step.objects.all().update(search_vector=SearchVector('instruction__unaccent', weight='B'))
self.stdout.write(self.style.SUCCESS(_('Recipe index rebuild complete.'))) self.stdout.write(self.style.SUCCESS(_('Recipe index rebuild complete.')))
except: except:

View File

@ -4,17 +4,19 @@ from django.contrib.postgres.indexes import GinIndex
from django.contrib.postgres.search import SearchVectorField, SearchVector from django.contrib.postgres.search import SearchVectorField, SearchVector
from django.db import migrations from django.db import migrations
from django_scopes import scopes_disabled from django_scopes import scopes_disabled
from cookbook.models import Recipe from cookbook.models import Recipe, Step
def set_default_search_vector(apps, schema_editor): def set_default_search_vector(apps, schema_editor):
if settings.DATABASES['default']['ENGINE'] not in ['django.db.backends.postgresql_psycopg2', 'django.db.backends.postgresql']: if settings.DATABASES['default']['ENGINE'] not in ['django.db.backends.postgresql_psycopg2', 'django.db.backends.postgresql']:
return return
with scopes_disabled(): with scopes_disabled():
search_vector = ( # TODO add language
SearchVector('name__unaccent', weight='A') Recipe.objects.all().update(
+ SearchVector('description__unaccent', weight='B')) name_search_vector=SearchVector('name__unaccent', weight='A'),
Recipe.objects.all().update(search_vector=search_vector) desc_search_vector=SearchVector('description__unaccent', weight='B')
)
Step.objects.all().update(search_vector=SearchVector('instruction__unaccent', weight='B'))
class Migration(migrations.Migration): class Migration(migrations.Migration):
@ -24,12 +26,26 @@ class Migration(migrations.Migration):
operations = [ operations = [
migrations.AddField( migrations.AddField(
model_name='recipe', model_name='recipe',
name='search_vector', name='desc_search_vector',
field=SearchVectorField(null=True),
),
migrations.AddField(
model_name='recipe',
name='name_search_vector',
field=SearchVectorField(null=True), field=SearchVectorField(null=True),
), ),
migrations.AddIndex( migrations.AddIndex(
model_name='recipe', model_name='recipe',
index=GinIndex(fields=['search_vector'], name='cookbook_re_search__404e46_gin'), index=GinIndex(fields=['name_search_vector', 'desc_search_vector'], name='cookbook_re_name_se_bdf3ca_gin'),
),
migrations.AddField(
model_name='step',
name='search_vector',
field=SearchVectorField(null=True),
),
migrations.AddIndex(
model_name='step',
index=GinIndex(fields=['search_vector'], name='cookbook_st_search__2ef7fa_gin'),
), ),
migrations.RunPython( migrations.RunPython(
set_default_search_vector set_default_search_vector

View File

@ -344,6 +344,7 @@ class Step(ExportModelOperationsMixin('step'), models.Model, PermissionModelMixi
time = models.IntegerField(default=0, blank=True) time = models.IntegerField(default=0, blank=True)
order = models.IntegerField(default=0) order = models.IntegerField(default=0)
show_as_header = models.BooleanField(default=True) show_as_header = models.BooleanField(default=True)
search_vector = SearchVectorField(null=True)
objects = ScopedManager(space='recipe__space') objects = ScopedManager(space='recipe__space')
@ -360,6 +361,7 @@ class Step(ExportModelOperationsMixin('step'), models.Model, PermissionModelMixi
class Meta: class Meta:
ordering = ['order', 'pk'] ordering = ['order', 'pk']
indexes = (GinIndex(fields=["search_vector"]),)
class NutritionInformation(models.Model, PermissionModelMixin): class NutritionInformation(models.Model, PermissionModelMixin):
@ -411,7 +413,8 @@ class Recipe(models.Model, PermissionModelMixin):
created_at = models.DateTimeField(auto_now_add=True) created_at = models.DateTimeField(auto_now_add=True)
updated_at = models.DateTimeField(auto_now=True) updated_at = models.DateTimeField(auto_now=True)
search_vector = SearchVectorField(null=True) name_search_vector = SearchVectorField(null=True)
desc_search_vector = SearchVectorField(null=True)
space = models.ForeignKey(Space, on_delete=models.CASCADE) space = models.ForeignKey(Space, on_delete=models.CASCADE)
# load custom manager for full text search if postgress is available # load custom manager for full text search if postgress is available
@ -424,7 +427,7 @@ class Recipe(models.Model, PermissionModelMixin):
return self.name return self.name
class Meta(): class Meta():
indexes = (GinIndex(fields=["search_vector"]),) indexes = (GinIndex(fields=["name_search_vector", "desc_search_vector"]),)
class Comment(ExportModelOperationsMixin('comment'), models.Model, PermissionModelMixin): class Comment(ExportModelOperationsMixin('comment'), models.Model, PermissionModelMixin):

View File

@ -177,7 +177,7 @@ class UnitSerializer(UniqueFieldsMixin, serializers.ModelSerializer):
def create(self, validated_data): def create(self, validated_data):
obj, created = Unit.objects.get_or_create(name=validated_data['name'].strip(), space=self.context['request'].space) obj, created = Unit.objects.get_or_create(name=validated_data['name'].strip(), space=self.context['request'].space)
return obj return obj
def update(self, instance, validated_data): def update(self, instance, validated_data):
validated_data['name'] = validated_data['name'].strip() validated_data['name'] = validated_data['name'].strip()
return super(UnitSerializer, self).update(instance, validated_data) return super(UnitSerializer, self).update(instance, validated_data)

View File

@ -3,10 +3,11 @@ from django.db.models.signals import post_save
from django.dispatch import receiver from django.dispatch import receiver
from django.utils import translation from django.utils import translation
from cookbook.models import Recipe from cookbook.models import Recipe, Step
from cookbook.managers import DICTIONARY from cookbook.managers import DICTIONARY
# TODO there is probably a way to generalize this
@receiver(post_save, sender=Recipe) @receiver(post_save, sender=Recipe)
def update_recipe_search_vector(sender, instance=None, created=False, **kwargs): def update_recipe_search_vector(sender, instance=None, created=False, **kwargs):
if not instance: if not instance:
@ -17,10 +18,26 @@ def update_recipe_search_vector(sender, instance=None, created=False, **kwargs):
return return
language = DICTIONARY.get(translation.get_language(), 'simple') language = DICTIONARY.get(translation.get_language(), 'simple')
instance.search_vector = ( instance.name_search_vector = SearchVector('name__unaccent', weight='A', config=language)
SearchVector('name__unaccent', weight='A', config=language) instance.desc_search_vector = SearchVector('description__unaccent', weight='C', config=language)
+ SearchVector('description__unaccent', weight='C', config=language)
) try:
instance._dirty = True
instance.save()
finally:
del instance._dirty
@receiver(post_save, sender=Step)
def update_step_search_vector(sender, instance=None, created=False, **kwargs):
if not instance:
return
# needed to ensure search vector update doesn't trigger recursion
if hasattr(instance, '_dirty'):
return
instance.search_vector = SearchVector('instruction__unaccent', weight='B')
try: try:
instance._dirty = True instance._dirty = True