ingredient parser produces expected results again

This commit is contained in:
vabene1111
2022-04-23 13:03:15 +02:00
parent 161ae9879a
commit 012a1a7915
4 changed files with 62 additions and 66 deletions

View File

@ -46,7 +46,7 @@ class IngredientParser:
def apply_food_automation(self, food):
"""
Apply food alias automations to passed foood
Apply food alias automations to passed food
:param food: unit as string
:return: food as string (possibly changed by automation)
"""
@ -155,33 +155,36 @@ class IngredientParser:
except ValueError:
unit = x[end:]
if unit is not None and unit.strip() == '':
unit = None
if unit is not None and (unit.startswith('(') or unit.startswith('-')): # i dont know any unit that starts with ( or - so its likely an alternative like 1L (500ml) Water or 2-3
unit = ''
unit = None
note = x
return amount, unit, note
def parse_ingredient_with_comma(self, tokens):
ingredient = ''
def parse_food_with_comma(self, tokens):
food = ''
note = ''
start = 0
# search for first occurrence of an argument ending in a comma
while start < len(tokens) and not tokens[start].endswith(','):
start += 1
if start == len(tokens):
# no token ending in a comma found -> use everything as ingredient
ingredient = ' '.join(tokens)
# no token ending in a comma found -> use everything as food
food = ' '.join(tokens)
else:
ingredient = ' '.join(tokens[:start + 1])[:-1]
food = ' '.join(tokens[:start + 1])[:-1]
note = ' '.join(tokens[start + 1:])
return ingredient, note
return food, note
def parse_ingredient(self, tokens):
ingredient = ''
def parse_food(self, tokens):
food = ''
note = ''
if tokens[-1].endswith(')'):
# Check if the matching opening bracket is in the same token
if (not tokens[-1].startswith('(')) and ('(' in tokens[-1]):
return self.parse_ingredient_with_comma(tokens)
return self.parse_food_with_comma(tokens)
# last argument ends with closing bracket -> look for opening bracket
start = len(tokens) - 1
while not tokens[start].startswith('(') and not start == 0:
@ -191,36 +194,41 @@ class IngredientParser:
raise ValueError
elif start < 0:
# no opening bracket anywhere -> just ignore the last bracket
ingredient, note = self.parse_ingredient_with_comma(tokens)
food, note = self.parse_food_with_comma(tokens)
else:
# opening bracket found -> split in ingredient and note, remove brackets from note # noqa: E501
# opening bracket found -> split in food and note, remove brackets from note # noqa: E501
note = ' '.join(tokens[start:])[1:-1]
ingredient = ' '.join(tokens[:start])
food = ' '.join(tokens[:start])
else:
ingredient, note = self.parse_ingredient_with_comma(tokens)
return ingredient, note
food, note = self.parse_food_with_comma(tokens)
return food, note
def parse(self, x):
def parse(self, ingredient):
"""
Main parsing function, takes an ingredient string (e.g. '1 l Water') and extracts amount, unit, food, ...
:param ingredient: string ingredient
:return: amount, unit (can be None), food, note (can be empty)
"""
# initialize default values
amount = 0
unit = None
ingredient = ''
food = ''
note = ''
unit_note = ''
if len(x) == 0:
if len(ingredient) == 0:
raise ValueError('string to parse cannot be empty')
# if the string contains parenthesis early on remove it and place it at the end
# because its likely some kind of note
if re.match('(.){1,6}\s\((.[^\(\)])+\)\s', x):
match = re.search('\((.[^\(])+\)', x)
x = x[:match.start()] + x[match.end():] + ' ' + x[match.start():match.end()]
if re.match('(.){1,6}\s\((.[^\(\)])+\)\s', ingredient):
match = re.search('\((.[^\(])+\)', ingredient)
ingredient = ingredient[:match.start()] + ingredient[match.end():] + ' ' + ingredient[match.start():match.end()]
tokens = x.split()
tokens = ingredient.split() # split at each space into tokens
if len(tokens) == 1:
# there only is one argument, that must be the ingredient
ingredient = tokens[0]
# there only is one argument, that must be the food
food = tokens[0]
else:
try:
# try to parse first argument as amount
@ -232,51 +240,50 @@ class IngredientParser:
try:
if unit is not None:
# a unit is already found, no need to try the second argument for a fraction
# probably not the best method to do it, but I didn't want to make an if check and paste the exact same thing in the else as already is in the except # noqa: E501
# probably not the best method to do it, but I didn't want to make an if check and paste the exact same thing in the else as already is in the except
raise ValueError
# try to parse second argument as amount and add that, in case of '2 1/2' or '2 ½'
amount += self.parse_fraction(tokens[1])
# assume that units can't end with a comma
if len(tokens) > 3 and not tokens[2].endswith(','):
# try to use third argument as unit and everything else as ingredient, use everything as ingredient if it fails # noqa: E501
# try to use third argument as unit and everything else as food, use everything as food if it fails
try:
ingredient, note = self.parse_ingredient(tokens[3:])
food, note = self.parse_food(tokens[3:])
unit = tokens[2]
except ValueError:
ingredient, note = self.parse_ingredient(tokens[2:])
food, note = self.parse_food(tokens[2:])
else:
ingredient, note = self.parse_ingredient(tokens[2:])
food, note = self.parse_food(tokens[2:])
except ValueError:
# assume that units can't end with a comma
if not tokens[1].endswith(','):
# try to use second argument as unit and everything else as ingredient, use everything as ingredient if it fails # noqa: E501
# try to use second argument as unit and everything else as food, use everything as food if it fails
try:
ingredient, note = self.parse_ingredient(tokens[2:])
food, note = self.parse_food(tokens[2:])
if unit is None:
unit = tokens[1]
else:
note = tokens[1]
except ValueError:
ingredient, note = self.parse_ingredient(tokens[1:])
food, note = self.parse_food(tokens[1:])
else:
ingredient, note = self.parse_ingredient(tokens[1:])
food, note = self.parse_food(tokens[1:])
else:
# only two arguments, first one is the amount
# which means this is the ingredient
ingredient = tokens[1]
# which means this is the food
food = tokens[1]
except ValueError:
try:
# can't parse first argument as amount
# -> no unit -> parse everything as ingredient
ingredient, note = self.parse_ingredient(tokens)
# -> no unit -> parse everything as food
food, note = self.parse_food(tokens)
except ValueError:
ingredient = ' '.join(tokens[1:])
food = ' '.join(tokens[1:])
if unit_note not in note:
note += ' ' + unit_note
try:
unit = self.apply_unit_automation(unit.strip())
except Exception:
pass
return amount, unit, self.apply_food_automation(ingredient.strip()), note.strip()
if unit:
unit = self.apply_unit_automation(unit.strip())
return amount, unit, self.apply_food_automation(food.strip()), note.strip()