make automation parameters case insensitive on search

This commit is contained in:
smilerz 2023-05-02 16:50:13 -05:00
parent cde8dd8b53
commit 9b50ea4c22
No known key found for this signature in database
GPG Key ID: 39444C7606D47126
2 changed files with 57 additions and 41 deletions

View File

@ -4,6 +4,7 @@ import unicodedata
from django.core.cache import caches
from django.db.models import Q
from django.db.models.functions import Lower
from cookbook.models import Automation, Food, Ingredient, Unit
@ -32,7 +33,7 @@ class IngredientParser:
caches['default'].touch(FOOD_CACHE_KEY, 30)
else:
for a in Automation.objects.filter(space=self.request.space, disabled=False, type=Automation.FOOD_ALIAS).only('param_1', 'param_2').order_by('order').all():
self.food_aliases[a.param_1] = a.param_2
self.food_aliases[a.param_1.lower()] = a.param_2
caches['default'].set(FOOD_CACHE_KEY, self.food_aliases, 30)
UNIT_CACHE_KEY = f'automation_unit_alias_{self.request.space.pk}'
@ -41,7 +42,7 @@ class IngredientParser:
caches['default'].touch(UNIT_CACHE_KEY, 30)
else:
for a in Automation.objects.filter(space=self.request.space, disabled=False, type=Automation.UNIT_ALIAS).only('param_1', 'param_2').order_by('order').all():
self.unit_aliases[a.param_1] = a.param_2
self.unit_aliases[a.param_1.lower()] = a.param_2
caches['default'].set(UNIT_CACHE_KEY, self.unit_aliases, 30)
NEVER_UNIT_CACHE_KEY = f'automation_never_unit_{self.request.space.pk}'
@ -50,7 +51,7 @@ class IngredientParser:
caches['default'].touch(NEVER_UNIT_CACHE_KEY, 30)
else:
for a in Automation.objects.filter(space=self.request.space, disabled=False, type=Automation.NEVER_UNIT).only('param_1', 'param_2').order_by('order').all():
self.never_unit[a.param_1] = a.param_2
self.never_unit[a.param_1.lower()] = a.param_2
caches['default'].set(NEVER_UNIT_CACHE_KEY, self.never_unit, 30)
TRANSPOSE_WORDS_CACHE_KEY = f'automation_transpose_words_{self.request.space.pk}'
@ -60,7 +61,7 @@ class IngredientParser:
else:
i = 0
for a in Automation.objects.filter(space=self.request.space, disabled=False, type=Automation.TRANSPOSE_WORDS).only('param_1', 'param_2').order_by('order').all():
self.transpose_words[i] = [a.param_1, a.param_2]
self.transpose_words[i] = [a.param_1.lower(), a.param_2.lower()]
i += 1
caches['default'].set(TRANSPOSE_WORDS_CACHE_KEY, self.transpose_words, 30)
else:
@ -80,11 +81,11 @@ class IngredientParser:
else:
if self.food_aliases:
try:
return self.food_aliases[food]
return self.food_aliases[food.lower()]
except KeyError:
return food
else:
if automation := Automation.objects.filter(space=self.request.space, type=Automation.FOOD_ALIAS, param_1=food, disabled=False).order_by('order').first():
if automation := Automation.objects.filter(space=self.request.space, type=Automation.FOOD_ALIAS, param_1__iexact=food, disabled=False).order_by('order').first():
return automation.param_2
return food
@ -99,11 +100,11 @@ class IngredientParser:
else:
if self.transpose_words:
try:
return self.unit_aliases[unit]
return self.unit_aliases[unit.lower()]
except KeyError:
return unit
else:
if automation := Automation.objects.filter(space=self.request.space, type=Automation.UNIT_ALIAS, param_1=unit, disabled=False).order_by('order').first():
if automation := Automation.objects.filter(space=self.request.space, type=Automation.UNIT_ALIAS, param_1__iexact=unit, disabled=False).order_by('order').first():
return automation.param_2
return unit
@ -249,14 +250,14 @@ class IngredientParser:
never_unit = False
if self.never_unit:
try:
new_unit = self.never_unit[tokens[1]]
new_unit = self.never_unit[tokens[1].lower()]
never_unit = True
except KeyError:
return tokens
else:
if automation := Automation.objects.filter(space=self.request.space, type=Automation.NEVER_UNIT, param_1__in=[
tokens[1], alt_unit], disabled=False).order_by('order').first():
if automation := Automation.objects.annotate(param_1_lower=Lower('param_1')).filter(space=self.request.space, type=Automation.NEVER_UNIT, param_1_lower__in=[
tokens[1].lower(), alt_unit.lower()], disabled=False).order_by('order').first():
new_unit = automation.param_2
never_unit = True
@ -277,18 +278,19 @@ class IngredientParser:
return ingredient
else:
tokens = ingredient.replace(',', ' ').split()
tokens = [x.lower() for x in ingredient.replace(',', ' ').split()]
if self.transpose_words:
filtered_rules = {}
for key, value in self.transpose_words.items():
if value[0] in tokens and value[1] in tokens:
filtered_rules[key] = value
for k, v in filtered_rules.items():
ingredient = re.sub(rf"\b({v[0]})\W*({v[1]})\b", r"\2 \1", ingredient)
ingredient = re.sub(rf"\b({v[0]})\W*({v[1]})\b", r"\2 \1", ingredient, flags=re.IGNORECASE)
else:
for rule in Automation.objects.filter(space=self.request.space, type=Automation.TRANSPOSE_WORDS, disabled=False).filter(
Q(Q(param_1__in=tokens) | Q(param_2__in=tokens))).order_by('order'):
ingredient = re.sub(rf"\b({v[0]})\W*({v[1]})\b", r"\2 \1", ingredient)
for rule in Automation.objects.filter(space=self.request.space, type=Automation.TRANSPOSE_WORDS, disabled=False) \
.annotate(param_1_lower=Lower('param_1'), param_2_lower=Lower('param_2')) \
.filter(Q(Q(param_1_lower__in=tokens) | Q(param_2_lower__in=tokens))).order_by('order'):
ingredient = re.sub(rf"\b({v[0]})\W*({v[1]})\b", r"\2 \1", ingredient, flags=re.IGNORECASE)
return ingredient
def parse(self, ingredient):

View File

@ -15,7 +15,6 @@ from recipe_scrapers._utils import get_host_name, get_minutes
from cookbook.helper.ingredient_parser import IngredientParser
from cookbook.models import Automation, Keyword, PropertyType
# from unicodedata import decomposition
@ -51,7 +50,8 @@ def get_from_scraper(scrape, request):
recipe_json['internal'] = True
try:
servings = scrape.schema.data.get('recipeYield') or 1 # dont use scrape.yields() as this will always return "x servings" or "x items", should be improved in scrapers directly
# dont use scrape.yields() as this will always return "x servings" or "x items", should be improved in scrapers directly
servings = scrape.schema.data.get('recipeYield') or 1
except Exception:
servings = 1
@ -156,7 +156,14 @@ def get_from_scraper(scrape, request):
parsed_description = parse_description(description)
# TODO notify user about limit if reached
# limits exist to limit the attack surface for dos style attacks
automations = Automation.objects.filter(type=Automation.DESCRIPTION_REPLACE, space=request.space, disabled=False).only('param_1', 'param_2', 'param_3').all().order_by('order')[:512]
automations = Automation.objects.filter(
type=Automation.DESCRIPTION_REPLACE,
space=request.space,
disabled=False).only(
'param_1',
'param_2',
'param_3').all().order_by('order')[
:512]
for a in automations:
if re.match(a.param_1, (recipe_json['source_url'])[:512]):
parsed_description = re.sub(a.param_2, a.param_3, parsed_description, count=1)
@ -206,7 +213,14 @@ def get_from_scraper(scrape, request):
pass
if 'source_url' in recipe_json and recipe_json['source_url']:
automations = Automation.objects.filter(type=Automation.INSTRUCTION_REPLACE, space=request.space, disabled=False).only('param_1', 'param_2', 'param_3').order_by('order').all()[:512]
automations = Automation.objects.filter(
type=Automation.INSTRUCTION_REPLACE,
space=request.space,
disabled=False).only(
'param_1',
'param_2',
'param_3').order_by('order').all()[
:512]
for a in automations:
if re.match(a.param_1, (recipe_json['source_url'])[:512]):
for s in recipe_json['steps']:
@ -272,7 +286,7 @@ def get_from_youtube_scraper(url, request):
def parse_name(name):
if type(name) == list:
if isinstance(name, list):
try:
name = name[0]
except Exception:
@ -316,16 +330,16 @@ def parse_instructions(instructions):
"""
instruction_list = []
if type(instructions) == list:
if isinstance(instructions, list):
for i in instructions:
if type(i) == str:
if isinstance(i, str):
instruction_list.append(clean_instruction_string(i))
else:
if 'text' in i:
instruction_list.append(clean_instruction_string(i['text']))
elif 'itemListElement' in i:
for ile in i['itemListElement']:
if type(ile) == str:
if isinstance(ile, str):
instruction_list.append(clean_instruction_string(ile))
elif 'text' in ile:
instruction_list.append(clean_instruction_string(ile['text']))
@ -341,13 +355,13 @@ def parse_image(image):
# check if list of images is returned, take first if so
if not image:
return None
if type(image) == list:
if isinstance(image, list):
for pic in image:
if (type(pic) == str) and (pic[:4] == 'http'):
if (isinstance(pic, str)) and (pic[:4] == 'http'):
image = pic
elif 'url' in pic:
image = pic['url']
elif type(image) == dict:
elif isinstance(image, dict):
if 'url' in image:
image = image['url']
@ -358,12 +372,12 @@ def parse_image(image):
def parse_servings(servings):
if type(servings) == str:
if isinstance(servings, str):
try:
servings = int(re.search(r'\d+', servings).group())
except AttributeError:
servings = 1
elif type(servings) == list:
elif isinstance(servings, list):
try:
servings = int(re.findall(r'\b\d+\b', servings[0])[0])
except KeyError:
@ -372,12 +386,12 @@ def parse_servings(servings):
def parse_servings_text(servings):
if type(servings) == str:
if isinstance(servings, str):
try:
servings = re.sub("\d+", '', servings).strip()
servings = re.sub("\\d+", '', servings).strip()
except Exception:
servings = ''
if type(servings) == list:
if isinstance(servings, list):
try:
servings = parse_servings_text(servings[1])
except Exception:
@ -394,7 +408,7 @@ def parse_time(recipe_time):
recipe_time = round(iso_parse_duration(recipe_time).seconds / 60)
except ISO8601Error:
try:
if (type(recipe_time) == list and len(recipe_time) > 0):
if (isinstance(recipe_time, list) and len(recipe_time) > 0):
recipe_time = recipe_time[0]
recipe_time = round(parse_duration(recipe_time).seconds / 60)
except AttributeError:
@ -413,7 +427,7 @@ def parse_keywords(keyword_json, space):
caches['default'].touch(KEYWORD_CACHE_KEY, 30)
else:
for a in Automation.objects.filter(space=space, disabled=False, type=Automation.KEYWORD_ALIAS).only('param_1', 'param_2').order_by('order').all():
keyword_aliases[a.param_1] = a.param_2
keyword_aliases[a.param_1.lower()] = a.param_2
caches['default'].set(KEYWORD_CACHE_KEY, keyword_aliases, 30)
# keywords as list
@ -424,7 +438,7 @@ def parse_keywords(keyword_json, space):
if len(kw) != 0:
if keyword_aliases:
try:
kw = keyword_aliases[kw]
kw = keyword_aliases[kw.lower()]
except KeyError:
pass
if k := Keyword.objects.filter(name=kw, space=space).first():
@ -438,15 +452,15 @@ def parse_keywords(keyword_json, space):
def listify_keywords(keyword_list):
# keywords as string
try:
if type(keyword_list[0]) == dict:
if isinstance(keyword_list[0], dict):
return keyword_list
except (KeyError, IndexError):
pass
if type(keyword_list) == str:
if isinstance(keyword_list, str):
keyword_list = keyword_list.split(',')
# keywords as string in list
if (type(keyword_list) == list and len(keyword_list) == 1 and ',' in keyword_list[0]):
if (isinstance(keyword_list, list) and len(keyword_list) == 1 and ',' in keyword_list[0]):
keyword_list = keyword_list[0].split(',')
return [x.strip() for x in keyword_list]
@ -500,13 +514,13 @@ def get_images_from_soup(soup, url):
def clean_dict(input_dict, key):
if type(input_dict) == dict:
if isinstance(input_dict, dict):
for x in list(input_dict):
if x == key:
del input_dict[x]
elif type(input_dict[x]) == dict:
elif isinstance(input_dict[x], dict):
input_dict[x] = clean_dict(input_dict[x], key)
elif type(input_dict[x]) == list:
elif isinstance(input_dict[x], list):
temp_list = []
for e in input_dict[x]:
temp_list.append(clean_dict(e, key))