foundation for user configurable search paramaters

This commit is contained in:
smilerz 2021-05-29 20:14:35 -05:00
parent 51cda4c2ff
commit 48f5642e04
5 changed files with 109 additions and 79 deletions

View File

@ -4,6 +4,7 @@ from django.contrib.postgres.search import SearchVector
from django.contrib.auth.admin import UserAdmin from django.contrib.auth.admin import UserAdmin
from django.contrib.auth.models import User, Group from django.contrib.auth.models import User, Group
from django_scopes import scopes_disabled from django_scopes import scopes_disabled
from django.utils import translation
from .models import (Comment, CookLog, Food, Ingredient, InviteLink, Keyword, from .models import (Comment, CookLog, Food, Ingredient, InviteLink, Keyword,
MealPlan, MealType, NutritionInformation, Recipe, MealPlan, MealType, NutritionInformation, Recipe,
@ -13,6 +14,8 @@ from .models import (Comment, CookLog, Food, Ingredient, InviteLink, Keyword,
ViewLog, Supermarket, SupermarketCategory, SupermarketCategoryRelation, ViewLog, Supermarket, SupermarketCategory, SupermarketCategoryRelation,
ImportLog, TelegramBot, BookmarkletImport, UserFile) ImportLog, TelegramBot, BookmarkletImport, UserFile)
from cookbook.managers import DICTIONARY
class CustomUserAdmin(UserAdmin): class CustomUserAdmin(UserAdmin):
def has_add_permission(self, request, obj=None): def has_add_permission(self, request, obj=None):
@ -96,12 +99,13 @@ admin.site.register(Step, StepAdmin)
@admin.action(description='Rebuild index for selected recipes') @admin.action(description='Rebuild index for selected recipes')
def rebuild_index(modeladmin, request, queryset): def rebuild_index(modeladmin, request, queryset):
language = DICTIONARY.get(translation.get_language(), 'simple')
with scopes_disabled(): with scopes_disabled():
Recipe.objects.all().update( Recipe.objects.all().update(
name_search_vector=SearchVector('name__unaccent', weight='A'), name_search_vector=SearchVector('name__unaccent', weight='A', config=language),
desc_search_vector=SearchVector('description__unaccent', weight='B') desc_search_vector=SearchVector('description__unaccent', weight='B', config=language)
) )
Step.objects.all().update(search_vector=SearchVector('instruction__unaccent', weight='B')) Step.objects.all().update(search_vector=SearchVector('instruction__unaccent', weight='B', config=language))
class RecipeAdmin(admin.ModelAdmin): class RecipeAdmin(admin.ModelAdmin):

View File

@ -1,32 +1,25 @@
from datetime import datetime, timedelta from datetime import datetime, timedelta
from recipes import settings from recipes import settings
from django.contrib.postgres.aggregates import StringAgg
from django.contrib.postgres.search import ( from django.contrib.postgres.search import (
SearchQuery, SearchRank, SearchVector, SearchQuery, SearchRank, TrigramSimilarity
) )
from django.db.models import Q, Case, When, Value, Count, Sum from django.db.models import Q, Subquery, Case, When, Value
from django.utils import translation from django.utils import translation
from cookbook.models import ViewLog from cookbook.managers import DICTIONARY
from cookbook.models import Food, Keyword, ViewLog
DICTIONARY = {
# TODO find custom dictionaries - maybe from here https://www.postgresql.org/message-id/CAF4Au4x6X_wSXFwsQYE8q5o0aQZANrvYjZJ8uOnsiHDnOVPPEg%40mail.gmail.com
# 'hy': 'Armenian',
# 'ca': 'Catalan',
# 'cs': 'Czech',
'nl': 'dutch',
'en': 'english',
'fr': 'french',
'de': 'german',
'it': 'italian',
# 'lv': 'Latvian',
'es': 'spanish',
}
def search_recipes(request, queryset, params): def search_recipes(request, queryset, params):
fields = {
'name': 'name',
'description': 'description',
'instructions': 'steps__instruction',
'foods': 'steps__ingredients__food__name',
'keywords': 'keywords__name'
}
search_string = params.get('query', '') search_string = params.get('query', '')
search_keywords = params.getlist('keywords', []) search_keywords = params.getlist('keywords', [])
search_foods = params.getlist('foods', []) search_foods = params.getlist('foods', [])
@ -53,54 +46,78 @@ def search_recipes(request, queryset, params):
created_at__gte=(datetime.now() - timedelta(days=7)), then=Value(100)), created_at__gte=(datetime.now() - timedelta(days=7)), then=Value(100)),
default=Value(0), )).order_by('-new_recipe', 'name') default=Value(0), )).order_by('-new_recipe', 'name')
rank_results = False search_type = None
if settings.DATABASES['default']['ENGINE'] in ['django.db.backends.postgresql_psycopg2', 'django.db.backends.postgresql'] and search_string != '': search_sort = None
rank_results = True if len(search_string) > 0:
# queryset = queryset.annotate(similarity=TrigramSimilarity('name', search_string), ) # TODO move all of these to settings somewhere - probably user settings
# .filter(Q(similarity__gt=0.1) | Q(name__unaccent__icontains=search_string)).order_by('-similarity')
language = DICTIONARY.get(translation.get_language(), 'simple') unaccent_include = ['name', 'description', 'instructions', 'keywords', 'foods'] # can also contain: description, instructions, keywords, foods
search_query = SearchQuery( # TODO when setting up settings length of arrays below must be >=1
search_string,
search_type="websearch", icontains_include = [] # can contain: name, description, instructions, keywords, foods
config=language, istartswith_include = ['name'] # can also contain: description, instructions, keywords, foods
) trigram_include = ['name', 'description', 'instructions'] # only these choices - keywords and foods are really, really, really slow maybe add to subquery?
# TODO create user options to add/remove query elements from search so that they can fine tune their own experience fulltext_include = ['name', 'description', 'instructions', 'foods', 'keywords']
# trigrams, icontains, unaccent and startswith all impact results and performance significantly
search_vectors = ( # END OF SETTINGS SECTION
# SearchVector('search_vector') <-- this can be searched like a field for f in unaccent_include:
SearchVector(StringAgg('steps__ingredients__food__name__unaccent', delimiter=' '), weight='B') fields[f] += '__unaccent'
+ SearchVector(StringAgg('keywords__name__unaccent', delimiter=' '), weight='B')
) filters = []
# trigrams don't seem to add anything and severely limit accuracy of results. for f in icontains_include:
# TODO add trigrams as an on/off feature filters += [Q(**{"%s__icontains" % fields[f]: search_string})]
# trigram = (
# TrigramSimilarity('name__unaccent', search_string) for f in istartswith_include:
# + TrigramSimilarity('description__unaccent', search_string) filters += [Q(**{"%s__istartswith" % fields[f]: search_string})]
# # adding trigrams to ingredients and keywords causes duplicate results that can't be made unique
# + TrigramSimilarity('steps__ingredients__food__name__unaccent', search_string) if settings.DATABASES['default']['ENGINE'] in ['django.db.backends.postgresql_psycopg2', 'django.db.backends.postgresql']:
# + TrigramSimilarity('keywords__name__unaccent', search_string) language = DICTIONARY.get(translation.get_language(), 'simple')
# ) # django full text search https://docs.djangoproject.com/en/3.2/ref/contrib/postgres/search/#searchquery
search_rank = ( search_type = 'websearch' # other postgress options are phrase or plain or raw (websearch and trigrams are mutually exclusive)
SearchRank('name_search_vector', search_query) search_trigram = False
+ SearchRank('desc_search_vector', search_query) search_query = SearchQuery(
+ SearchRank('steps__search_vector', search_query) search_string,
+ SearchRank(search_vectors, search_query) search_type=search_type,
) config=language,
queryset = (
queryset.annotate(
vector=search_vectors
) )
.filter(
# vector=search_query # iterate through fields to use in trigrams generating a single trigram
Q(name_search_vector=search_query) if search_trigram & len(trigram_include) > 1:
| Q(desc_search_vector=search_query) trigram = None
| Q(steps__search_vector=search_query) for f in trigram_include:
| Q(vector=search_query) if trigram:
| Q(name__istartswith=search_string) trigram += TrigramSimilarity(fields[f], search_string)
).annotate(rank=search_rank) else:
) trigram = TrigramSimilarity(fields[f], search_string)
else: queryset.annotate(simularity=trigram)
queryset = queryset.filter(name__icontains=search_string) filters += [Q(simularity__gt=0.5)]
if 'name' in fulltext_include:
filters += [Q(name_search_vector=search_query)]
if 'description' in fulltext_include:
filters += [Q(desc_search_vector=search_query)]
if 'instructions' in fulltext_include:
filters += [Q(steps__search_vector=search_query)]
if 'keywords' in fulltext_include:
filters += [Q(keywords__in=Subquery(Keyword.objects.filter(name__search=search_query).values_list('id', flat=True)))]
if 'foods' in fulltext_include:
filters += [Q(steps__ingredients__food__in=Subquery(Food.objects.filter(name__search=search_query).values_list('id', flat=True)))]
query_filter = None
for f in filters:
if query_filter:
query_filter |= f
else:
query_filter = f
# TODO this is kind of a dumb method to sort. create settings to choose rank vs most often made, date created or rating
search_rank = (
SearchRank('name_search_vector', search_query, cover_density=True)
+ SearchRank('desc_search_vector', search_query, cover_density=True)
+ SearchRank('steps__search_vector', search_query, cover_density=True)
)
queryset = queryset.filter(query_filter).annotate(rank=search_rank)
else:
queryset = queryset.filter(query_filter)
if len(search_keywords) > 0: if len(search_keywords) > 0:
if search_keywords_or == 'true': if search_keywords_or == 'true':
@ -130,7 +147,7 @@ def search_recipes(request, queryset, params):
if search_random == 'true': if search_random == 'true':
queryset = queryset.order_by("?") queryset = queryset.order_by("?")
elif rank_results: elif search_sort == 'rank':
queryset = queryset.order_by('-rank') queryset = queryset.order_by('-rank')
return queryset return queryset

View File

@ -2,8 +2,10 @@ from django.conf import settings
from django.contrib.postgres.search import SearchVector from django.contrib.postgres.search import SearchVector
from django.core.management.base import BaseCommand from django.core.management.base import BaseCommand
from django_scopes import scopes_disabled from django_scopes import scopes_disabled
from django.utils import translation
from django.utils.translation import gettext_lazy as _ from django.utils.translation import gettext_lazy as _
from cookbook.managers import DICTIONARY
from cookbook.models import Recipe, Step from cookbook.models import Recipe, Step
@ -16,12 +18,13 @@ class Command(BaseCommand):
self.stdout.write(self.style.WARNING(_('Only Postgress databases use full text search, no index to rebuild'))) self.stdout.write(self.style.WARNING(_('Only Postgress databases use full text search, no index to rebuild')))
try: try:
language = DICTIONARY.get(translation.get_language(), 'simple')
with scopes_disabled(): with scopes_disabled():
Recipe.objects.all().update( Recipe.objects.all().update(
name_search_vector=SearchVector('name__unaccent', weight='A'), name_search_vector=SearchVector('name__unaccent', weight='A', config=language),
desc_search_vector=SearchVector('description__unaccent', weight='B') desc_search_vector=SearchVector('description__unaccent', weight='B', config=language)
) )
Step.objects.all().update(search_vector=SearchVector('instruction__unaccent', weight='B')) Step.objects.all().update(search_vector=SearchVector('instruction__unaccent', weight='B', config=language))
self.stdout.write(self.style.SUCCESS(_('Recipe index rebuild complete.'))) self.stdout.write(self.style.SUCCESS(_('Recipe index rebuild complete.')))
except: except:

View File

@ -4,18 +4,23 @@ from django.contrib.postgres.indexes import GinIndex
from django.contrib.postgres.search import SearchVectorField, SearchVector from django.contrib.postgres.search import SearchVectorField, SearchVector
from django.db import migrations from django.db import migrations
from django_scopes import scopes_disabled from django_scopes import scopes_disabled
from django.utils import translation
from cookbook.managers import DICTIONARY
from cookbook.models import Recipe, Step from cookbook.models import Recipe, Step
def set_default_search_vector(apps, schema_editor): def set_default_search_vector(apps, schema_editor):
if settings.DATABASES['default']['ENGINE'] not in ['django.db.backends.postgresql_psycopg2', 'django.db.backends.postgresql']: if settings.DATABASES['default']['ENGINE'] not in ['django.db.backends.postgresql_psycopg2', 'django.db.backends.postgresql']:
return return
language = DICTIONARY.get(translation.get_language(), 'simple')
with scopes_disabled(): with scopes_disabled():
# TODO add language # TODO this approach doesn't work terribly well if multiple languages are in use
Recipe.objects.all().update( Recipe.objects.all().update(
name_search_vector=SearchVector('name__unaccent', weight='A'), name_search_vector=SearchVector('name__unaccent', weight='A', config=language),
desc_search_vector=SearchVector('description__unaccent', weight='B') desc_search_vector=SearchVector('description__unaccent', weight='B', config=language)
) )
Step.objects.all().update(search_vector=SearchVector('instruction__unaccent', weight='B')) Step.objects.all().update(search_vector=SearchVector('instruction__unaccent', weight='B'))

View File

@ -19,7 +19,7 @@ def update_recipe_search_vector(sender, instance=None, created=False, **kwargs):
language = DICTIONARY.get(translation.get_language(), 'simple') language = DICTIONARY.get(translation.get_language(), 'simple')
instance.name_search_vector = SearchVector('name__unaccent', weight='A', config=language) instance.name_search_vector = SearchVector('name__unaccent', weight='A', config=language)
instance.desc_search_vector = SearchVector('description__unaccent', weight='C', config=language) instance.desc_search_vector = SearchVector('description__unaccent', weight='C', config=language)
try: try:
instance._dirty = True instance._dirty = True
@ -37,7 +37,8 @@ def update_step_search_vector(sender, instance=None, created=False, **kwargs):
if hasattr(instance, '_dirty'): if hasattr(instance, '_dirty'):
return return
instance.search_vector = SearchVector('instruction__unaccent', weight='B') language = DICTIONARY.get(translation.get_language(), 'simple')
instance.search_vector = SearchVector('instruction__unaccent', weight='B', config=language)
try: try:
instance._dirty = True instance._dirty = True