From 9b50ea4c22c28aabda3f8a583c6c2d4961b1fe3c Mon Sep 17 00:00:00 2001 From: smilerz Date: Tue, 2 May 2023 16:50:13 -0500 Subject: [PATCH] make automation parameters case insensitive on search --- cookbook/helper/ingredient_parser.py | 34 ++++++++------- cookbook/helper/recipe_url_import.py | 64 +++++++++++++++++----------- 2 files changed, 57 insertions(+), 41 deletions(-) diff --git a/cookbook/helper/ingredient_parser.py b/cookbook/helper/ingredient_parser.py index 21864c43..b5bdbf67 100644 --- a/cookbook/helper/ingredient_parser.py +++ b/cookbook/helper/ingredient_parser.py @@ -4,6 +4,7 @@ import unicodedata from django.core.cache import caches from django.db.models import Q +from django.db.models.functions import Lower from cookbook.models import Automation, Food, Ingredient, Unit @@ -32,7 +33,7 @@ class IngredientParser: caches['default'].touch(FOOD_CACHE_KEY, 30) else: for a in Automation.objects.filter(space=self.request.space, disabled=False, type=Automation.FOOD_ALIAS).only('param_1', 'param_2').order_by('order').all(): - self.food_aliases[a.param_1] = a.param_2 + self.food_aliases[a.param_1.lower()] = a.param_2 caches['default'].set(FOOD_CACHE_KEY, self.food_aliases, 30) UNIT_CACHE_KEY = f'automation_unit_alias_{self.request.space.pk}' @@ -41,7 +42,7 @@ class IngredientParser: caches['default'].touch(UNIT_CACHE_KEY, 30) else: for a in Automation.objects.filter(space=self.request.space, disabled=False, type=Automation.UNIT_ALIAS).only('param_1', 'param_2').order_by('order').all(): - self.unit_aliases[a.param_1] = a.param_2 + self.unit_aliases[a.param_1.lower()] = a.param_2 caches['default'].set(UNIT_CACHE_KEY, self.unit_aliases, 30) NEVER_UNIT_CACHE_KEY = f'automation_never_unit_{self.request.space.pk}' @@ -50,7 +51,7 @@ class IngredientParser: caches['default'].touch(NEVER_UNIT_CACHE_KEY, 30) else: for a in Automation.objects.filter(space=self.request.space, disabled=False, type=Automation.NEVER_UNIT).only('param_1', 'param_2').order_by('order').all(): - self.never_unit[a.param_1] = a.param_2 + self.never_unit[a.param_1.lower()] = a.param_2 caches['default'].set(NEVER_UNIT_CACHE_KEY, self.never_unit, 30) TRANSPOSE_WORDS_CACHE_KEY = f'automation_transpose_words_{self.request.space.pk}' @@ -60,7 +61,7 @@ class IngredientParser: else: i = 0 for a in Automation.objects.filter(space=self.request.space, disabled=False, type=Automation.TRANSPOSE_WORDS).only('param_1', 'param_2').order_by('order').all(): - self.transpose_words[i] = [a.param_1, a.param_2] + self.transpose_words[i] = [a.param_1.lower(), a.param_2.lower()] i += 1 caches['default'].set(TRANSPOSE_WORDS_CACHE_KEY, self.transpose_words, 30) else: @@ -80,11 +81,11 @@ class IngredientParser: else: if self.food_aliases: try: - return self.food_aliases[food] + return self.food_aliases[food.lower()] except KeyError: return food else: - if automation := Automation.objects.filter(space=self.request.space, type=Automation.FOOD_ALIAS, param_1=food, disabled=False).order_by('order').first(): + if automation := Automation.objects.filter(space=self.request.space, type=Automation.FOOD_ALIAS, param_1__iexact=food, disabled=False).order_by('order').first(): return automation.param_2 return food @@ -99,11 +100,11 @@ class IngredientParser: else: if self.transpose_words: try: - return self.unit_aliases[unit] + return self.unit_aliases[unit.lower()] except KeyError: return unit else: - if automation := Automation.objects.filter(space=self.request.space, type=Automation.UNIT_ALIAS, param_1=unit, disabled=False).order_by('order').first(): + if automation := Automation.objects.filter(space=self.request.space, type=Automation.UNIT_ALIAS, param_1__iexact=unit, disabled=False).order_by('order').first(): return automation.param_2 return unit @@ -249,14 +250,14 @@ class IngredientParser: never_unit = False if self.never_unit: try: - new_unit = self.never_unit[tokens[1]] + new_unit = self.never_unit[tokens[1].lower()] never_unit = True except KeyError: return tokens else: - if automation := Automation.objects.filter(space=self.request.space, type=Automation.NEVER_UNIT, param_1__in=[ - tokens[1], alt_unit], disabled=False).order_by('order').first(): + if automation := Automation.objects.annotate(param_1_lower=Lower('param_1')).filter(space=self.request.space, type=Automation.NEVER_UNIT, param_1_lower__in=[ + tokens[1].lower(), alt_unit.lower()], disabled=False).order_by('order').first(): new_unit = automation.param_2 never_unit = True @@ -277,18 +278,19 @@ class IngredientParser: return ingredient else: - tokens = ingredient.replace(',', ' ').split() + tokens = [x.lower() for x in ingredient.replace(',', ' ').split()] if self.transpose_words: filtered_rules = {} for key, value in self.transpose_words.items(): if value[0] in tokens and value[1] in tokens: filtered_rules[key] = value for k, v in filtered_rules.items(): - ingredient = re.sub(rf"\b({v[0]})\W*({v[1]})\b", r"\2 \1", ingredient) + ingredient = re.sub(rf"\b({v[0]})\W*({v[1]})\b", r"\2 \1", ingredient, flags=re.IGNORECASE) else: - for rule in Automation.objects.filter(space=self.request.space, type=Automation.TRANSPOSE_WORDS, disabled=False).filter( - Q(Q(param_1__in=tokens) | Q(param_2__in=tokens))).order_by('order'): - ingredient = re.sub(rf"\b({v[0]})\W*({v[1]})\b", r"\2 \1", ingredient) + for rule in Automation.objects.filter(space=self.request.space, type=Automation.TRANSPOSE_WORDS, disabled=False) \ + .annotate(param_1_lower=Lower('param_1'), param_2_lower=Lower('param_2')) \ + .filter(Q(Q(param_1_lower__in=tokens) | Q(param_2_lower__in=tokens))).order_by('order'): + ingredient = re.sub(rf"\b({v[0]})\W*({v[1]})\b", r"\2 \1", ingredient, flags=re.IGNORECASE) return ingredient def parse(self, ingredient): diff --git a/cookbook/helper/recipe_url_import.py b/cookbook/helper/recipe_url_import.py index f3e624d4..a7365573 100644 --- a/cookbook/helper/recipe_url_import.py +++ b/cookbook/helper/recipe_url_import.py @@ -15,7 +15,6 @@ from recipe_scrapers._utils import get_host_name, get_minutes from cookbook.helper.ingredient_parser import IngredientParser from cookbook.models import Automation, Keyword, PropertyType - # from unicodedata import decomposition @@ -51,7 +50,8 @@ def get_from_scraper(scrape, request): recipe_json['internal'] = True try: - servings = scrape.schema.data.get('recipeYield') or 1 # dont use scrape.yields() as this will always return "x servings" or "x items", should be improved in scrapers directly + # dont use scrape.yields() as this will always return "x servings" or "x items", should be improved in scrapers directly + servings = scrape.schema.data.get('recipeYield') or 1 except Exception: servings = 1 @@ -156,7 +156,14 @@ def get_from_scraper(scrape, request): parsed_description = parse_description(description) # TODO notify user about limit if reached # limits exist to limit the attack surface for dos style attacks - automations = Automation.objects.filter(type=Automation.DESCRIPTION_REPLACE, space=request.space, disabled=False).only('param_1', 'param_2', 'param_3').all().order_by('order')[:512] + automations = Automation.objects.filter( + type=Automation.DESCRIPTION_REPLACE, + space=request.space, + disabled=False).only( + 'param_1', + 'param_2', + 'param_3').all().order_by('order')[ + :512] for a in automations: if re.match(a.param_1, (recipe_json['source_url'])[:512]): parsed_description = re.sub(a.param_2, a.param_3, parsed_description, count=1) @@ -206,7 +213,14 @@ def get_from_scraper(scrape, request): pass if 'source_url' in recipe_json and recipe_json['source_url']: - automations = Automation.objects.filter(type=Automation.INSTRUCTION_REPLACE, space=request.space, disabled=False).only('param_1', 'param_2', 'param_3').order_by('order').all()[:512] + automations = Automation.objects.filter( + type=Automation.INSTRUCTION_REPLACE, + space=request.space, + disabled=False).only( + 'param_1', + 'param_2', + 'param_3').order_by('order').all()[ + :512] for a in automations: if re.match(a.param_1, (recipe_json['source_url'])[:512]): for s in recipe_json['steps']: @@ -272,7 +286,7 @@ def get_from_youtube_scraper(url, request): def parse_name(name): - if type(name) == list: + if isinstance(name, list): try: name = name[0] except Exception: @@ -316,16 +330,16 @@ def parse_instructions(instructions): """ instruction_list = [] - if type(instructions) == list: + if isinstance(instructions, list): for i in instructions: - if type(i) == str: + if isinstance(i, str): instruction_list.append(clean_instruction_string(i)) else: if 'text' in i: instruction_list.append(clean_instruction_string(i['text'])) elif 'itemListElement' in i: for ile in i['itemListElement']: - if type(ile) == str: + if isinstance(ile, str): instruction_list.append(clean_instruction_string(ile)) elif 'text' in ile: instruction_list.append(clean_instruction_string(ile['text'])) @@ -341,13 +355,13 @@ def parse_image(image): # check if list of images is returned, take first if so if not image: return None - if type(image) == list: + if isinstance(image, list): for pic in image: - if (type(pic) == str) and (pic[:4] == 'http'): + if (isinstance(pic, str)) and (pic[:4] == 'http'): image = pic elif 'url' in pic: image = pic['url'] - elif type(image) == dict: + elif isinstance(image, dict): if 'url' in image: image = image['url'] @@ -358,12 +372,12 @@ def parse_image(image): def parse_servings(servings): - if type(servings) == str: + if isinstance(servings, str): try: servings = int(re.search(r'\d+', servings).group()) except AttributeError: servings = 1 - elif type(servings) == list: + elif isinstance(servings, list): try: servings = int(re.findall(r'\b\d+\b', servings[0])[0]) except KeyError: @@ -372,12 +386,12 @@ def parse_servings(servings): def parse_servings_text(servings): - if type(servings) == str: + if isinstance(servings, str): try: - servings = re.sub("\d+", '', servings).strip() + servings = re.sub("\\d+", '', servings).strip() except Exception: servings = '' - if type(servings) == list: + if isinstance(servings, list): try: servings = parse_servings_text(servings[1]) except Exception: @@ -394,7 +408,7 @@ def parse_time(recipe_time): recipe_time = round(iso_parse_duration(recipe_time).seconds / 60) except ISO8601Error: try: - if (type(recipe_time) == list and len(recipe_time) > 0): + if (isinstance(recipe_time, list) and len(recipe_time) > 0): recipe_time = recipe_time[0] recipe_time = round(parse_duration(recipe_time).seconds / 60) except AttributeError: @@ -413,7 +427,7 @@ def parse_keywords(keyword_json, space): caches['default'].touch(KEYWORD_CACHE_KEY, 30) else: for a in Automation.objects.filter(space=space, disabled=False, type=Automation.KEYWORD_ALIAS).only('param_1', 'param_2').order_by('order').all(): - keyword_aliases[a.param_1] = a.param_2 + keyword_aliases[a.param_1.lower()] = a.param_2 caches['default'].set(KEYWORD_CACHE_KEY, keyword_aliases, 30) # keywords as list @@ -424,7 +438,7 @@ def parse_keywords(keyword_json, space): if len(kw) != 0: if keyword_aliases: try: - kw = keyword_aliases[kw] + kw = keyword_aliases[kw.lower()] except KeyError: pass if k := Keyword.objects.filter(name=kw, space=space).first(): @@ -438,15 +452,15 @@ def parse_keywords(keyword_json, space): def listify_keywords(keyword_list): # keywords as string try: - if type(keyword_list[0]) == dict: + if isinstance(keyword_list[0], dict): return keyword_list except (KeyError, IndexError): pass - if type(keyword_list) == str: + if isinstance(keyword_list, str): keyword_list = keyword_list.split(',') # keywords as string in list - if (type(keyword_list) == list and len(keyword_list) == 1 and ',' in keyword_list[0]): + if (isinstance(keyword_list, list) and len(keyword_list) == 1 and ',' in keyword_list[0]): keyword_list = keyword_list[0].split(',') return [x.strip() for x in keyword_list] @@ -500,13 +514,13 @@ def get_images_from_soup(soup, url): def clean_dict(input_dict, key): - if type(input_dict) == dict: + if isinstance(input_dict, dict): for x in list(input_dict): if x == key: del input_dict[x] - elif type(input_dict[x]) == dict: + elif isinstance(input_dict[x], dict): input_dict[x] = clean_dict(input_dict[x], key) - elif type(input_dict[x]) == list: + elif isinstance(input_dict[x], list): temp_list = [] for e in input_dict[x]: temp_list.append(clean_dict(e, key))