foundation for user configurable search paramaters

This commit is contained in:
smilerz 2021-05-29 20:14:35 -05:00
parent 51cda4c2ff
commit 48f5642e04
5 changed files with 109 additions and 79 deletions

View File

@ -4,6 +4,7 @@ from django.contrib.postgres.search import SearchVector
from django.contrib.auth.admin import UserAdmin from django.contrib.auth.admin import UserAdmin
from django.contrib.auth.models import User, Group from django.contrib.auth.models import User, Group
from django_scopes import scopes_disabled from django_scopes import scopes_disabled
from django.utils import translation
from .models import (Comment, CookLog, Food, Ingredient, InviteLink, Keyword, from .models import (Comment, CookLog, Food, Ingredient, InviteLink, Keyword,
MealPlan, MealType, NutritionInformation, Recipe, MealPlan, MealType, NutritionInformation, Recipe,
@ -13,6 +14,8 @@ from .models import (Comment, CookLog, Food, Ingredient, InviteLink, Keyword,
ViewLog, Supermarket, SupermarketCategory, SupermarketCategoryRelation, ViewLog, Supermarket, SupermarketCategory, SupermarketCategoryRelation,
ImportLog, TelegramBot, BookmarkletImport, UserFile) ImportLog, TelegramBot, BookmarkletImport, UserFile)
from cookbook.managers import DICTIONARY
class CustomUserAdmin(UserAdmin): class CustomUserAdmin(UserAdmin):
def has_add_permission(self, request, obj=None): def has_add_permission(self, request, obj=None):
@ -96,12 +99,13 @@ admin.site.register(Step, StepAdmin)
@admin.action(description='Rebuild index for selected recipes') @admin.action(description='Rebuild index for selected recipes')
def rebuild_index(modeladmin, request, queryset): def rebuild_index(modeladmin, request, queryset):
language = DICTIONARY.get(translation.get_language(), 'simple')
with scopes_disabled(): with scopes_disabled():
Recipe.objects.all().update( Recipe.objects.all().update(
name_search_vector=SearchVector('name__unaccent', weight='A'), name_search_vector=SearchVector('name__unaccent', weight='A', config=language),
desc_search_vector=SearchVector('description__unaccent', weight='B') desc_search_vector=SearchVector('description__unaccent', weight='B', config=language)
) )
Step.objects.all().update(search_vector=SearchVector('instruction__unaccent', weight='B')) Step.objects.all().update(search_vector=SearchVector('instruction__unaccent', weight='B', config=language))
class RecipeAdmin(admin.ModelAdmin): class RecipeAdmin(admin.ModelAdmin):

View File

@ -1,32 +1,25 @@
from datetime import datetime, timedelta from datetime import datetime, timedelta
from recipes import settings from recipes import settings
from django.contrib.postgres.aggregates import StringAgg
from django.contrib.postgres.search import ( from django.contrib.postgres.search import (
SearchQuery, SearchRank, SearchVector, SearchQuery, SearchRank, TrigramSimilarity
) )
from django.db.models import Q, Case, When, Value, Count, Sum from django.db.models import Q, Subquery, Case, When, Value
from django.utils import translation from django.utils import translation
from cookbook.models import ViewLog from cookbook.managers import DICTIONARY
from cookbook.models import Food, Keyword, ViewLog
DICTIONARY = {
# TODO find custom dictionaries - maybe from here https://www.postgresql.org/message-id/CAF4Au4x6X_wSXFwsQYE8q5o0aQZANrvYjZJ8uOnsiHDnOVPPEg%40mail.gmail.com
# 'hy': 'Armenian',
# 'ca': 'Catalan',
# 'cs': 'Czech',
'nl': 'dutch',
'en': 'english',
'fr': 'french',
'de': 'german',
'it': 'italian',
# 'lv': 'Latvian',
'es': 'spanish',
}
def search_recipes(request, queryset, params): def search_recipes(request, queryset, params):
fields = {
'name': 'name',
'description': 'description',
'instructions': 'steps__instruction',
'foods': 'steps__ingredients__food__name',
'keywords': 'keywords__name'
}
search_string = params.get('query', '') search_string = params.get('query', '')
search_keywords = params.getlist('keywords', []) search_keywords = params.getlist('keywords', [])
search_foods = params.getlist('foods', []) search_foods = params.getlist('foods', [])
@ -53,54 +46,78 @@ def search_recipes(request, queryset, params):
created_at__gte=(datetime.now() - timedelta(days=7)), then=Value(100)), created_at__gte=(datetime.now() - timedelta(days=7)), then=Value(100)),
default=Value(0), )).order_by('-new_recipe', 'name') default=Value(0), )).order_by('-new_recipe', 'name')
rank_results = False search_type = None
if settings.DATABASES['default']['ENGINE'] in ['django.db.backends.postgresql_psycopg2', 'django.db.backends.postgresql'] and search_string != '': search_sort = None
rank_results = True if len(search_string) > 0:
# queryset = queryset.annotate(similarity=TrigramSimilarity('name', search_string), ) # TODO move all of these to settings somewhere - probably user settings
# .filter(Q(similarity__gt=0.1) | Q(name__unaccent__icontains=search_string)).order_by('-similarity')
unaccent_include = ['name', 'description', 'instructions', 'keywords', 'foods'] # can also contain: description, instructions, keywords, foods
# TODO when setting up settings length of arrays below must be >=1
icontains_include = [] # can contain: name, description, instructions, keywords, foods
istartswith_include = ['name'] # can also contain: description, instructions, keywords, foods
trigram_include = ['name', 'description', 'instructions'] # only these choices - keywords and foods are really, really, really slow maybe add to subquery?
fulltext_include = ['name', 'description', 'instructions', 'foods', 'keywords']
# END OF SETTINGS SECTION
for f in unaccent_include:
fields[f] += '__unaccent'
filters = []
for f in icontains_include:
filters += [Q(**{"%s__icontains" % fields[f]: search_string})]
for f in istartswith_include:
filters += [Q(**{"%s__istartswith" % fields[f]: search_string})]
if settings.DATABASES['default']['ENGINE'] in ['django.db.backends.postgresql_psycopg2', 'django.db.backends.postgresql']:
language = DICTIONARY.get(translation.get_language(), 'simple') language = DICTIONARY.get(translation.get_language(), 'simple')
# django full text search https://docs.djangoproject.com/en/3.2/ref/contrib/postgres/search/#searchquery
search_type = 'websearch' # other postgress options are phrase or plain or raw (websearch and trigrams are mutually exclusive)
search_trigram = False
search_query = SearchQuery( search_query = SearchQuery(
search_string, search_string,
search_type="websearch", search_type=search_type,
config=language, config=language,
) )
# TODO create user options to add/remove query elements from search so that they can fine tune their own experience
# trigrams, icontains, unaccent and startswith all impact results and performance significantly # iterate through fields to use in trigrams generating a single trigram
search_vectors = ( if search_trigram & len(trigram_include) > 1:
# SearchVector('search_vector') <-- this can be searched like a field trigram = None
SearchVector(StringAgg('steps__ingredients__food__name__unaccent', delimiter=' '), weight='B') for f in trigram_include:
+ SearchVector(StringAgg('keywords__name__unaccent', delimiter=' '), weight='B') if trigram:
) trigram += TrigramSimilarity(fields[f], search_string)
# trigrams don't seem to add anything and severely limit accuracy of results.
# TODO add trigrams as an on/off feature
# trigram = (
# TrigramSimilarity('name__unaccent', search_string)
# + TrigramSimilarity('description__unaccent', search_string)
# # adding trigrams to ingredients and keywords causes duplicate results that can't be made unique
# + TrigramSimilarity('steps__ingredients__food__name__unaccent', search_string)
# + TrigramSimilarity('keywords__name__unaccent', search_string)
# )
search_rank = (
SearchRank('name_search_vector', search_query)
+ SearchRank('desc_search_vector', search_query)
+ SearchRank('steps__search_vector', search_query)
+ SearchRank(search_vectors, search_query)
)
queryset = (
queryset.annotate(
vector=search_vectors
)
.filter(
# vector=search_query
Q(name_search_vector=search_query)
| Q(desc_search_vector=search_query)
| Q(steps__search_vector=search_query)
| Q(vector=search_query)
| Q(name__istartswith=search_string)
).annotate(rank=search_rank)
)
else: else:
queryset = queryset.filter(name__icontains=search_string) trigram = TrigramSimilarity(fields[f], search_string)
queryset.annotate(simularity=trigram)
filters += [Q(simularity__gt=0.5)]
if 'name' in fulltext_include:
filters += [Q(name_search_vector=search_query)]
if 'description' in fulltext_include:
filters += [Q(desc_search_vector=search_query)]
if 'instructions' in fulltext_include:
filters += [Q(steps__search_vector=search_query)]
if 'keywords' in fulltext_include:
filters += [Q(keywords__in=Subquery(Keyword.objects.filter(name__search=search_query).values_list('id', flat=True)))]
if 'foods' in fulltext_include:
filters += [Q(steps__ingredients__food__in=Subquery(Food.objects.filter(name__search=search_query).values_list('id', flat=True)))]
query_filter = None
for f in filters:
if query_filter:
query_filter |= f
else:
query_filter = f
# TODO this is kind of a dumb method to sort. create settings to choose rank vs most often made, date created or rating
search_rank = (
SearchRank('name_search_vector', search_query, cover_density=True)
+ SearchRank('desc_search_vector', search_query, cover_density=True)
+ SearchRank('steps__search_vector', search_query, cover_density=True)
)
queryset = queryset.filter(query_filter).annotate(rank=search_rank)
else:
queryset = queryset.filter(query_filter)
if len(search_keywords) > 0: if len(search_keywords) > 0:
if search_keywords_or == 'true': if search_keywords_or == 'true':
@ -130,7 +147,7 @@ def search_recipes(request, queryset, params):
if search_random == 'true': if search_random == 'true':
queryset = queryset.order_by("?") queryset = queryset.order_by("?")
elif rank_results: elif search_sort == 'rank':
queryset = queryset.order_by('-rank') queryset = queryset.order_by('-rank')
return queryset return queryset

View File

@ -2,8 +2,10 @@ from django.conf import settings
from django.contrib.postgres.search import SearchVector from django.contrib.postgres.search import SearchVector
from django.core.management.base import BaseCommand from django.core.management.base import BaseCommand
from django_scopes import scopes_disabled from django_scopes import scopes_disabled
from django.utils import translation
from django.utils.translation import gettext_lazy as _ from django.utils.translation import gettext_lazy as _
from cookbook.managers import DICTIONARY
from cookbook.models import Recipe, Step from cookbook.models import Recipe, Step
@ -16,12 +18,13 @@ class Command(BaseCommand):
self.stdout.write(self.style.WARNING(_('Only Postgress databases use full text search, no index to rebuild'))) self.stdout.write(self.style.WARNING(_('Only Postgress databases use full text search, no index to rebuild')))
try: try:
language = DICTIONARY.get(translation.get_language(), 'simple')
with scopes_disabled(): with scopes_disabled():
Recipe.objects.all().update( Recipe.objects.all().update(
name_search_vector=SearchVector('name__unaccent', weight='A'), name_search_vector=SearchVector('name__unaccent', weight='A', config=language),
desc_search_vector=SearchVector('description__unaccent', weight='B') desc_search_vector=SearchVector('description__unaccent', weight='B', config=language)
) )
Step.objects.all().update(search_vector=SearchVector('instruction__unaccent', weight='B')) Step.objects.all().update(search_vector=SearchVector('instruction__unaccent', weight='B', config=language))
self.stdout.write(self.style.SUCCESS(_('Recipe index rebuild complete.'))) self.stdout.write(self.style.SUCCESS(_('Recipe index rebuild complete.')))
except: except:

View File

@ -4,17 +4,22 @@ from django.contrib.postgres.indexes import GinIndex
from django.contrib.postgres.search import SearchVectorField, SearchVector from django.contrib.postgres.search import SearchVectorField, SearchVector
from django.db import migrations from django.db import migrations
from django_scopes import scopes_disabled from django_scopes import scopes_disabled
from django.utils import translation
from cookbook.managers import DICTIONARY
from cookbook.models import Recipe, Step from cookbook.models import Recipe, Step
def set_default_search_vector(apps, schema_editor): def set_default_search_vector(apps, schema_editor):
if settings.DATABASES['default']['ENGINE'] not in ['django.db.backends.postgresql_psycopg2', 'django.db.backends.postgresql']: if settings.DATABASES['default']['ENGINE'] not in ['django.db.backends.postgresql_psycopg2', 'django.db.backends.postgresql']:
return return
language = DICTIONARY.get(translation.get_language(), 'simple')
with scopes_disabled(): with scopes_disabled():
# TODO add language # TODO this approach doesn't work terribly well if multiple languages are in use
Recipe.objects.all().update( Recipe.objects.all().update(
name_search_vector=SearchVector('name__unaccent', weight='A'), name_search_vector=SearchVector('name__unaccent', weight='A', config=language),
desc_search_vector=SearchVector('description__unaccent', weight='B') desc_search_vector=SearchVector('description__unaccent', weight='B', config=language)
) )
Step.objects.all().update(search_vector=SearchVector('instruction__unaccent', weight='B')) Step.objects.all().update(search_vector=SearchVector('instruction__unaccent', weight='B'))

View File

@ -37,7 +37,8 @@ def update_step_search_vector(sender, instance=None, created=False, **kwargs):
if hasattr(instance, '_dirty'): if hasattr(instance, '_dirty'):
return return
instance.search_vector = SearchVector('instruction__unaccent', weight='B') language = DICTIONARY.get(translation.get_language(), 'simple')
instance.search_vector = SearchVector('instruction__unaccent', weight='B', config=language)
try: try:
instance._dirty = True instance._dirty = True