diff --git a/cookbook/forms.py b/cookbook/forms.py index 2cbefd9e..b1fb72f2 100644 --- a/cookbook/forms.py +++ b/cookbook/forms.py @@ -487,7 +487,7 @@ class SearchPreferenceForm(forms.ModelForm): fields = ('search', 'unaccent', 'icontains', 'istartswith', 'trigram', 'fulltext') help_texts = { - 'search': _('Select type method of search. Click here for full desciption of choices.'), + 'search': _('Select type method of search. Click here for full desciption of choices.'), 'unaccent': _('Fields to search ignoring accents. Selecting this option can improve or degrade search quality depending on language'), 'icontains': _("Fields to search for partial matches. (e.g. searching for 'Pie' will return 'pie' and 'piece' and 'soapie')"), 'istartswith': _("Fields to search for beginning of word matches. (e.g. searching for 'sa' will return 'salad' and 'sandwich')"), @@ -512,3 +512,5 @@ class SearchPreferenceForm(forms.ModelForm): 'trigram': MultiSelectWidget, 'fulltext': MultiSelectWidget, } + + diff --git a/cookbook/helper/recipe_search.py b/cookbook/helper/recipe_search.py index af4403d6..21db1384 100644 --- a/cookbook/helper/recipe_search.py +++ b/cookbook/helper/recipe_search.py @@ -12,14 +12,7 @@ from cookbook.models import Food, Keyword, ViewLog def search_recipes(request, queryset, params): - fields = { - 'name': 'name', - 'description': 'description', - 'instructions': 'steps__instruction', - 'foods': 'steps__ingredients__food__name', - 'keywords': 'keywords__name' - } - + search_prefs = request.user.searchpreference search_string = params.get('query', '') search_keywords = params.getlist('keywords', []) search_foods = params.getlist('foods', []) @@ -45,35 +38,38 @@ def search_recipes(request, queryset, params): created_at__gte=(datetime.now() - timedelta(days=7)), then=Value(100)), default=Value(0), )).order_by('-new_recipe', 'name') - search_type = None + search_type = search_prefs.search or 'plain' search_sort = None if len(search_string) > 0: - # TODO move all of these to settings somewhere - probably user settings + unaccent_include = search_prefs.unaccent.values_list('field', flat=True) - unaccent_include = ['name', 'description', 'instructions', 'keywords', 'foods'] # can also contain: description, instructions, keywords, foods - # TODO when setting up settings length of arrays below must be >=1 + icontains_include = [x + '__unaccent' if x in unaccent_include else x for x in search_prefs.icontains.values_list('field', flat=True)] + istartswith_include = [x + '__unaccent' if x in unaccent_include else x for x in search_prefs.istartswith.values_list('field', flat=True)] + trigram_include = [x + '__unaccent' if x in unaccent_include else x for x in search_prefs.trigram.values_list('field', flat=True)] + fulltext_include = search_prefs.fulltext.values_list('field', flat=True) # fulltext doesn't use field name directly - icontains_include = [] # can contain: name, description, instructions, keywords, foods - istartswith_include = ['name'] # can also contain: description, instructions, keywords, foods - trigram_include = ['name', 'description', 'instructions'] # only these choices - keywords and foods are really, really, really slow maybe add to subquery? - fulltext_include = ['name', 'description', 'instructions', 'foods', 'keywords'] + # if no filters are configured use name__icontains as default + if len(icontains_include) + len(istartswith_include) + len(trigram_include) + len(fulltext_include) == 0: + filters = [Q(**{"name__icontains": search_string})] + else: + filters = [] - # END OF SETTINGS SECTION - for f in unaccent_include: - fields[f] += '__unaccent' - - filters = [] + # dynamically build array of filters that will be applied for f in icontains_include: - filters += [Q(**{"%s__icontains" % fields[f]: search_string})] + filters += [Q(**{"%s__icontains" % f: search_string})] for f in istartswith_include: - filters += [Q(**{"%s__istartswith" % fields[f]: search_string})] + filters += [Q(**{"%s__istartswith" % f: search_string})] if settings.DATABASES['default']['ENGINE'] in ['django.db.backends.postgresql_psycopg2', 'django.db.backends.postgresql']: language = DICTIONARY.get(translation.get_language(), 'simple') # django full text search https://docs.djangoproject.com/en/3.2/ref/contrib/postgres/search/#searchquery - search_type = 'websearch' # other postgress options are phrase or plain or raw (websearch and trigrams are mutually exclusive) - search_trigram = False + # TODO can options install this extension to further enhance search query language https://github.com/caub/pg-tsquery + # trigram breaks full text search 'websearch' and 'raw' capabilities and will be ignored if those methods are chosen + if search_type in ['websearch', 'raw']: + search_trigram = False + else: + search_trigram = True search_query = SearchQuery( search_string, search_type=search_type, @@ -85,10 +81,11 @@ def search_recipes(request, queryset, params): trigram = None for f in trigram_include: if trigram: - trigram += TrigramSimilarity(fields[f], search_string) + trigram += TrigramSimilarity(f, search_string) else: - trigram = TrigramSimilarity(fields[f], search_string) + trigram = TrigramSimilarity(f, search_string) queryset.annotate(simularity=trigram) + # TODO allow user to play with trigram scores filters += [Q(simularity__gt=0.5)] if 'name' in fulltext_include: diff --git a/cookbook/migrations/0124_build_full_text_index.py b/cookbook/migrations/0124_build_full_text_index.py index 5a6776c1..658538fd 100644 --- a/cookbook/migrations/0124_build_full_text_index.py +++ b/cookbook/migrations/0124_build_full_text_index.py @@ -95,7 +95,7 @@ class Migration(migrations.Migration): name='SearchPreference', fields=[ ('user', annoying.fields.AutoOneToOneField(on_delete=deletion.CASCADE, primary_key=True, serialize=False, to='auth.user')), - ('search', models.CharField(choices=[('PLAIN', 'Plain'), ('PHRASE', 'Phrase'), ('WEBSEARCH', 'Web'), ('RAW', 'Raw')], default='SIMPLE', max_length=32)), + ('search', models.CharField(choices=[('plain', 'Plain'), ('phrase', 'Phrase'), ('websearch', 'Web'), ('raw', 'Raw')], default='plain', max_length=32)), ('fulltext', models.ManyToManyField(blank=True, related_name='fulltext_fields', to='cookbook.SearchFields')), ('icontains', models.ManyToManyField(blank=True, default=nameSearchField, related_name='icontains_fields', to='cookbook.SearchFields')), ('istartswith', models.ManyToManyField(blank=True, related_name='istartswith_fields', to='cookbook.SearchFields')), diff --git a/cookbook/migrations/0125_create_searchfields.py.stop b/cookbook/migrations/0125_create_searchfields.py similarity index 100% rename from cookbook/migrations/0125_create_searchfields.py.stop rename to cookbook/migrations/0125_create_searchfields.py diff --git a/cookbook/models.py b/cookbook/models.py index 64c09a35..a09b8e38 100644 --- a/cookbook/models.py +++ b/cookbook/models.py @@ -749,13 +749,12 @@ def nameSearchField(): class SearchPreference(models.Model, PermissionModelMixin): # Search Style (validation parsleyjs.org) # phrase or plain or raw (websearch and trigrams are mutually exclusive) - SIMPLE = 'SIMPLE' - PLAIN = 'PLAIN' - PHRASE = 'PHRASE' - WEB = 'WEBSEARCH' - RAW = 'RAW' + SIMPLE = 'plain' + PHRASE = 'phrase' + WEB = 'websearch' + RAW = 'raw' SEARCH_STYLE = ( - (PLAIN, _('Plain')), + (SIMPLE, _('Simple')), (PHRASE, _('Phrase')), (WEB, _('Web')), (RAW, _('Raw')) diff --git a/cookbook/templates/search_info.html b/cookbook/templates/search_info.html new file mode 100644 index 00000000..6f267a58 --- /dev/null +++ b/cookbook/templates/search_info.html @@ -0,0 +1,110 @@ +{% extends "base.html" %} +{% load static %} +{% load i18n %} + +{% block title %}{% trans "Search Settings" %}{% endblock %} + +{% block content %} + +

{% trans 'Search Settings' %}

+ {% blocktrans %} + Creating the best search experience is complicated and weighs heavily on your personal configuration. + Changing any of the search settings can have significant impact on the speed and quality of the results. + Search Methods, Trigrams and Full Text Search configurations are only available if you are using Postgres for your database. + {% endblocktrans %} + +
+
+ +

{% trans 'Search Methods' %}

+ +
+
+

{% blocktrans %} + Full text searches attempt to normalize the words provided to match common variants. For example: 'forked', 'forking', 'forks' will all normalize to 'fork'. + There are several methods available, described below, that will control how the search behavior should react when multiple words are searched. + Full technical details on how these operate can be viewed on Postgresql's website. + {% endblocktrans %}

+

{% trans 'Simple' %}

+

{% blocktrans %} + Simple searches ignore punctuation and common words such as 'the', 'a', 'and'. And will treat seperate words as required. + Searching for 'apple or flour' will return any recipe that includes both 'apple' and 'flour' anywhere in the fields that have been selected for a full text search. + {% endblocktrans %}

+

{% trans 'Phrase' %}

+

{% blocktrans %} + Phrase searches ignore punctuation, but will search for all of the words in the exact order provided. + Searching for 'apple or flour' will only return a recipe that includes the exact phrase 'apple or flour' in any of the fields that have been selected for a full text search. + {% endblocktrans %}

+

{% trans 'Web' %}

+

{% blocktrans %} + Web searches simulate functionality found on many web search sites supporting special syntax. + Placing quotes around several words will convert those words into a phrase. + 'or' is recongized as searching for the word (or phrase) immediately before 'or' OR the word (or phrase) directly after. + '-' is recognized as searching for recipes that do not include the word (or phrase) that comes immediately after. + For example searching for 'apple pie' or cherry -butter will return any recipe that includes the phrase 'apple pie' or the word 'cherry' + in any field included in the full text search but exclude any recipe that has the word 'butter' in any field included. + {% endblocktrans %}

+

{% trans 'Raw' %}

+

{% blocktrans %} + Raw search is similar to Web except will take puncuation operators such as '|', '&' and '()' + {% endblocktrans %}

+
+ +
+ +
+

fuzzy search

+
+
+ {% blocktrans %} + Another approach to searching that also requires Postgresql is fuzzy search or trigram similarity. A trigram is a group of three consecutive characters. + For example searching for 'apple' will create x trigrams 'app', 'ppl', 'ple' and will create a score of how closely words match the generated trigrams. + One benefit of searching trigams is that a search for 'sandwich' will find mispelled words such as 'sandwhich' that would be missed by other methods. + {% endblocktrans %} +
+ +
+ +
+

{% trans 'Search Fields' %}

+ +
+
+ {% blocktrans %} + Unaccent is a special case in that it enables searching a field 'unaccented' for each search style attempting to ignore accented values. + For example when you enable unaccent for 'Name' any search (starts with, contains, trigram) will attempt the search ignoring accented characters. + + For the other options, you can enable search on any or all fields and they will be combined together with an assumed 'OR'. + For example enabling 'Name' for Starts With, 'Name' and 'Description' for Partial Match and 'Ingredients' and 'Keywords' for Full Search + and searching for 'apple' will generate a search that will return recipes that have: + - A recipe name that starts with 'apple' + - OR a recipe name that contains 'apple' + - OR a recipe description that contains 'apple' + - OR a recipe that will have a full text search match ('apple' or 'apples') in ingredients + - OR a recipe that will have a full text search match in Keywords + + Combining too many fields in too many types of search can have a negative impact on performance, create duplicate results or return unexpected results. + For example, enabling fuzzy search or partial matches will interfere with web search methods. + Searching for 'apple -pie' with fuzzy search and full text search will return the recipe Apple Pie. Though it is not included in the full text results, it does match the trigram results. + {% endblocktrans %} +
+ +
+ +
+

{% trans 'Search Index' %}

+ +
+
+ {% blocktrans %} + Trigram search and Full Text Search both rely on database indexes to perform effectively. + You can rebuild the indexes on all fields in the Admin page for Recipes and selecting all recipes and running 'rebuild index for selected recipes' + You can also rebuild indexes at the command line by executing the management command 'python manage.py rebuildindex' + {% endblocktrans %} +
+ +
+
+
+
+{% endblock %} diff --git a/cookbook/templates/settings.html b/cookbook/templates/settings.html index d5d66083..42df367b 100644 --- a/cookbook/templates/settings.html +++ b/cookbook/templates/settings.html @@ -19,27 +19,35 @@
-
+

{% trans 'Name Settings' %}

{% csrf_token %} @@ -71,7 +79,7 @@
-
+
@@ -122,7 +130,7 @@
-
+
@@ -154,7 +162,7 @@
-