make automation parameters case insensitive on search

2023-05-02 16:50:13 -05:00 · 2023-05-02 16:50:13 -05:00 · 9b50ea4c22
commit 9b50ea4c22
parent cde8dd8b53
2 changed files with 57 additions and 41 deletions
--- a/cookbook/helper/ingredient_parser.py
+++ b/cookbook/helper/ingredient_parser.py
@ -4,6 +4,7 @@ import unicodedata

 from django.core.cache import caches
 from django.db.models import Q
+from django.db.models.functions import Lower

 from cookbook.models import Automation, Food, Ingredient, Unit

@ -32,7 +33,7 @@ class IngredientParser:
                caches['default'].touch(FOOD_CACHE_KEY, 30)
            else:
                for a in Automation.objects.filter(space=self.request.space, disabled=False, type=Automation.FOOD_ALIAS).only('param_1', 'param_2').order_by('order').all():
-                    self.food_aliases[a.param_1] = a.param_2
+                    self.food_aliases[a.param_1.lower()] = a.param_2
                caches['default'].set(FOOD_CACHE_KEY, self.food_aliases, 30)

            UNIT_CACHE_KEY = f'automation_unit_alias_{self.request.space.pk}'
@ -41,7 +42,7 @@ class IngredientParser:
                caches['default'].touch(UNIT_CACHE_KEY, 30)
            else:
                for a in Automation.objects.filter(space=self.request.space, disabled=False, type=Automation.UNIT_ALIAS).only('param_1', 'param_2').order_by('order').all():
-                    self.unit_aliases[a.param_1] = a.param_2
+                    self.unit_aliases[a.param_1.lower()] = a.param_2
                caches['default'].set(UNIT_CACHE_KEY, self.unit_aliases, 30)

            NEVER_UNIT_CACHE_KEY = f'automation_never_unit_{self.request.space.pk}'
@ -50,7 +51,7 @@ class IngredientParser:
                caches['default'].touch(NEVER_UNIT_CACHE_KEY, 30)
            else:
                for a in Automation.objects.filter(space=self.request.space, disabled=False, type=Automation.NEVER_UNIT).only('param_1', 'param_2').order_by('order').all():
-                    self.never_unit[a.param_1] = a.param_2
+                    self.never_unit[a.param_1.lower()] = a.param_2
                caches['default'].set(NEVER_UNIT_CACHE_KEY, self.never_unit, 30)

            TRANSPOSE_WORDS_CACHE_KEY = f'automation_transpose_words_{self.request.space.pk}'
@ -60,7 +61,7 @@ class IngredientParser:
            else:
                i = 0
                for a in Automation.objects.filter(space=self.request.space, disabled=False, type=Automation.TRANSPOSE_WORDS).only('param_1', 'param_2').order_by('order').all():
-                    self.transpose_words[i] = [a.param_1, a.param_2]
+                    self.transpose_words[i] = [a.param_1.lower(), a.param_2.lower()]
                    i += 1
                caches['default'].set(TRANSPOSE_WORDS_CACHE_KEY, self.transpose_words, 30)
        else:
@ -80,11 +81,11 @@ class IngredientParser:
        else:
            if self.food_aliases:
                try:
-                    return self.food_aliases[food]
+                    return self.food_aliases[food.lower()]
                except KeyError:
                    return food
            else:
-                if automation := Automation.objects.filter(space=self.request.space, type=Automation.FOOD_ALIAS, param_1=food, disabled=False).order_by('order').first():
+                if automation := Automation.objects.filter(space=self.request.space, type=Automation.FOOD_ALIAS, param_1__iexact=food, disabled=False).order_by('order').first():
                    return automation.param_2
        return food

@ -99,11 +100,11 @@ class IngredientParser:
        else:
            if self.transpose_words:
                try:
-                    return self.unit_aliases[unit]
+                    return self.unit_aliases[unit.lower()]
                except KeyError:
                    return unit
            else:
-                if automation := Automation.objects.filter(space=self.request.space, type=Automation.UNIT_ALIAS, param_1=unit, disabled=False).order_by('order').first():
+                if automation := Automation.objects.filter(space=self.request.space, type=Automation.UNIT_ALIAS, param_1__iexact=unit, disabled=False).order_by('order').first():
                    return automation.param_2
        return unit

@ -249,14 +250,14 @@ class IngredientParser:
        never_unit = False
        if self.never_unit:
            try:
-                new_unit = self.never_unit[tokens[1]]
+                new_unit = self.never_unit[tokens[1].lower()]
                never_unit = True
            except KeyError:
                return tokens

        else:
-            if automation := Automation.objects.filter(space=self.request.space, type=Automation.NEVER_UNIT, param_1__in=[
-                                                       tokens[1], alt_unit], disabled=False).order_by('order').first():
+            if automation := Automation.objects.annotate(param_1_lower=Lower('param_1')).filter(space=self.request.space, type=Automation.NEVER_UNIT, param_1_lower__in=[
+                    tokens[1].lower(), alt_unit.lower()], disabled=False).order_by('order').first():
                new_unit = automation.param_2
                never_unit = True

@ -277,18 +278,19 @@ class IngredientParser:
            return ingredient

        else:
-            tokens = ingredient.replace(',', ' ').split()
+            tokens = [x.lower() for x in ingredient.replace(',', ' ').split()]
            if self.transpose_words:
                filtered_rules = {}
                for key, value in self.transpose_words.items():
                    if value[0] in tokens and value[1] in tokens:
                        filtered_rules[key] = value
                for k, v in filtered_rules.items():
-                    ingredient = re.sub(rf"\b({v[0]})\W*({v[1]})\b", r"\2 \1", ingredient)
+                    ingredient = re.sub(rf"\b({v[0]})\W*({v[1]})\b", r"\2 \1", ingredient, flags=re.IGNORECASE)
            else:
-                for rule in Automation.objects.filter(space=self.request.space, type=Automation.TRANSPOSE_WORDS, disabled=False).filter(
-                        Q(Q(param_1__in=tokens) | Q(param_2__in=tokens))).order_by('order'):
-                    ingredient = re.sub(rf"\b({v[0]})\W*({v[1]})\b", r"\2 \1", ingredient)
+                for rule in Automation.objects.filter(space=self.request.space, type=Automation.TRANSPOSE_WORDS, disabled=False) \
+                        .annotate(param_1_lower=Lower('param_1'), param_2_lower=Lower('param_2')) \
+                        .filter(Q(Q(param_1_lower__in=tokens) | Q(param_2_lower__in=tokens))).order_by('order'):
+                    ingredient = re.sub(rf"\b({v[0]})\W*({v[1]})\b", r"\2 \1", ingredient, flags=re.IGNORECASE)
        return ingredient

    def parse(self, ingredient):
--- a/cookbook/helper/recipe_url_import.py
+++ b/cookbook/helper/recipe_url_import.py
@ -15,7 +15,6 @@ from recipe_scrapers._utils import get_host_name, get_minutes
 from cookbook.helper.ingredient_parser import IngredientParser
 from cookbook.models import Automation, Keyword, PropertyType

-
 # from unicodedata import decomposition


@ -51,7 +50,8 @@ def get_from_scraper(scrape, request):
    recipe_json['internal'] = True

    try:
-        servings = scrape.schema.data.get('recipeYield') or 1  # dont use scrape.yields() as this will always return "x servings" or "x items", should be improved in scrapers directly
+        # dont use scrape.yields() as this will always return "x servings" or "x items", should be improved in scrapers directly
+        servings = scrape.schema.data.get('recipeYield') or 1
    except Exception:
        servings = 1

@ -156,7 +156,14 @@ def get_from_scraper(scrape, request):
    parsed_description = parse_description(description)
    # TODO notify user about limit if reached
    # limits exist to limit the attack surface for dos style attacks
-    automations = Automation.objects.filter(type=Automation.DESCRIPTION_REPLACE, space=request.space, disabled=False).only('param_1', 'param_2', 'param_3').all().order_by('order')[:512]
+    automations = Automation.objects.filter(
+        type=Automation.DESCRIPTION_REPLACE,
+        space=request.space,
+        disabled=False).only(
+        'param_1',
+        'param_2',
+        'param_3').all().order_by('order')[
+            :512]
    for a in automations:
        if re.match(a.param_1, (recipe_json['source_url'])[:512]):
            parsed_description = re.sub(a.param_2, a.param_3, parsed_description, count=1)
@ -206,7 +213,14 @@ def get_from_scraper(scrape, request):
        pass

    if 'source_url' in recipe_json and recipe_json['source_url']:
-        automations = Automation.objects.filter(type=Automation.INSTRUCTION_REPLACE, space=request.space, disabled=False).only('param_1', 'param_2', 'param_3').order_by('order').all()[:512]
+        automations = Automation.objects.filter(
+            type=Automation.INSTRUCTION_REPLACE,
+            space=request.space,
+            disabled=False).only(
+            'param_1',
+            'param_2',
+            'param_3').order_by('order').all()[
+            :512]
        for a in automations:
            if re.match(a.param_1, (recipe_json['source_url'])[:512]):
                for s in recipe_json['steps']:
@ -272,7 +286,7 @@ def get_from_youtube_scraper(url, request):


 def parse_name(name):
-    if type(name) == list:
+    if isinstance(name, list):
        try:
            name = name[0]
        except Exception:
@ -316,16 +330,16 @@ def parse_instructions(instructions):
    """
    instruction_list = []

-    if type(instructions) == list:
+    if isinstance(instructions, list):
        for i in instructions:
-            if type(i) == str:
+            if isinstance(i, str):
                instruction_list.append(clean_instruction_string(i))
            else:
                if 'text' in i:
                    instruction_list.append(clean_instruction_string(i['text']))
                elif 'itemListElement' in i:
                    for ile in i['itemListElement']:
-                        if type(ile) == str:
+                        if isinstance(ile, str):
                            instruction_list.append(clean_instruction_string(ile))
                        elif 'text' in ile:
                            instruction_list.append(clean_instruction_string(ile['text']))
@ -341,13 +355,13 @@ def parse_image(image):
    # check if list of images is returned, take first if so
    if not image:
        return None
-    if type(image) == list:
+    if isinstance(image, list):
        for pic in image:
-            if (type(pic) == str) and (pic[:4] == 'http'):
+            if (isinstance(pic, str)) and (pic[:4] == 'http'):
                image = pic
            elif 'url' in pic:
                image = pic['url']
-    elif type(image) == dict:
+    elif isinstance(image, dict):
        if 'url' in image:
            image = image['url']

@ -358,12 +372,12 @@ def parse_image(image):


 def parse_servings(servings):
-    if type(servings) == str:
+    if isinstance(servings, str):
        try:
            servings = int(re.search(r'\d+', servings).group())
        except AttributeError:
            servings = 1
-    elif type(servings) == list:
+    elif isinstance(servings, list):
        try:
            servings = int(re.findall(r'\b\d+\b', servings[0])[0])
        except KeyError:
@ -372,12 +386,12 @@ def parse_servings(servings):


 def parse_servings_text(servings):
-    if type(servings) == str:
+    if isinstance(servings, str):
        try:
-            servings = re.sub("\d+", '', servings).strip()
+            servings = re.sub("\\d+", '', servings).strip()
        except Exception:
            servings = ''
-    if type(servings) == list:
+    if isinstance(servings, list):
        try:
            servings = parse_servings_text(servings[1])
        except Exception:
@ -394,7 +408,7 @@ def parse_time(recipe_time):
                recipe_time = round(iso_parse_duration(recipe_time).seconds / 60)
            except ISO8601Error:
                try:
-                    if (type(recipe_time) == list and len(recipe_time) > 0):
+                    if (isinstance(recipe_time, list) and len(recipe_time) > 0):
                        recipe_time = recipe_time[0]
                    recipe_time = round(parse_duration(recipe_time).seconds / 60)
                except AttributeError:
@ -413,7 +427,7 @@ def parse_keywords(keyword_json, space):
        caches['default'].touch(KEYWORD_CACHE_KEY, 30)
    else:
        for a in Automation.objects.filter(space=space, disabled=False, type=Automation.KEYWORD_ALIAS).only('param_1', 'param_2').order_by('order').all():
-            keyword_aliases[a.param_1] = a.param_2
+            keyword_aliases[a.param_1.lower()] = a.param_2
        caches['default'].set(KEYWORD_CACHE_KEY, keyword_aliases, 30)

    # keywords as list
@ -424,7 +438,7 @@ def parse_keywords(keyword_json, space):
        if len(kw) != 0:
            if keyword_aliases:
                try:
-                    kw = keyword_aliases[kw]
+                    kw = keyword_aliases[kw.lower()]
                except KeyError:
                    pass
            if k := Keyword.objects.filter(name=kw, space=space).first():
@ -438,15 +452,15 @@ def parse_keywords(keyword_json, space):
 def listify_keywords(keyword_list):
    # keywords as string
    try:
-        if type(keyword_list[0]) == dict:
+        if isinstance(keyword_list[0], dict):
            return keyword_list
    except (KeyError, IndexError):
        pass
-    if type(keyword_list) == str:
+    if isinstance(keyword_list, str):
        keyword_list = keyword_list.split(',')

    # keywords as string in list
-    if (type(keyword_list) == list and len(keyword_list) == 1 and ',' in keyword_list[0]):
+    if (isinstance(keyword_list, list) and len(keyword_list) == 1 and ',' in keyword_list[0]):
        keyword_list = keyword_list[0].split(',')
    return [x.strip() for x in keyword_list]

@ -500,13 +514,13 @@ def get_images_from_soup(soup, url):


 def clean_dict(input_dict, key):
-    if type(input_dict) == dict:
+    if isinstance(input_dict, dict):
        for x in list(input_dict):
            if x == key:
                del input_dict[x]
-            elif type(input_dict[x]) == dict:
+            elif isinstance(input_dict[x], dict):
                input_dict[x] = clean_dict(input_dict[x], key)
-            elif type(input_dict[x]) == list:
+            elif isinstance(input_dict[x], list):
                temp_list = []
                for e in input_dict[x]:
                    temp_list.append(clean_dict(e, key))