ingredient parser produces expected results again
This commit is contained in:
@ -46,7 +46,7 @@ class IngredientParser:
|
|||||||
|
|
||||||
def apply_food_automation(self, food):
|
def apply_food_automation(self, food):
|
||||||
"""
|
"""
|
||||||
Apply food alias automations to passed foood
|
Apply food alias automations to passed food
|
||||||
:param food: unit as string
|
:param food: unit as string
|
||||||
:return: food as string (possibly changed by automation)
|
:return: food as string (possibly changed by automation)
|
||||||
"""
|
"""
|
||||||
@ -155,33 +155,36 @@ class IngredientParser:
|
|||||||
except ValueError:
|
except ValueError:
|
||||||
unit = x[end:]
|
unit = x[end:]
|
||||||
|
|
||||||
|
if unit is not None and unit.strip() == '':
|
||||||
|
unit = None
|
||||||
|
|
||||||
if unit is not None and (unit.startswith('(') or unit.startswith('-')): # i dont know any unit that starts with ( or - so its likely an alternative like 1L (500ml) Water or 2-3
|
if unit is not None and (unit.startswith('(') or unit.startswith('-')): # i dont know any unit that starts with ( or - so its likely an alternative like 1L (500ml) Water or 2-3
|
||||||
unit = ''
|
unit = None
|
||||||
note = x
|
note = x
|
||||||
return amount, unit, note
|
return amount, unit, note
|
||||||
|
|
||||||
def parse_ingredient_with_comma(self, tokens):
|
def parse_food_with_comma(self, tokens):
|
||||||
ingredient = ''
|
food = ''
|
||||||
note = ''
|
note = ''
|
||||||
start = 0
|
start = 0
|
||||||
# search for first occurrence of an argument ending in a comma
|
# search for first occurrence of an argument ending in a comma
|
||||||
while start < len(tokens) and not tokens[start].endswith(','):
|
while start < len(tokens) and not tokens[start].endswith(','):
|
||||||
start += 1
|
start += 1
|
||||||
if start == len(tokens):
|
if start == len(tokens):
|
||||||
# no token ending in a comma found -> use everything as ingredient
|
# no token ending in a comma found -> use everything as food
|
||||||
ingredient = ' '.join(tokens)
|
food = ' '.join(tokens)
|
||||||
else:
|
else:
|
||||||
ingredient = ' '.join(tokens[:start + 1])[:-1]
|
food = ' '.join(tokens[:start + 1])[:-1]
|
||||||
note = ' '.join(tokens[start + 1:])
|
note = ' '.join(tokens[start + 1:])
|
||||||
return ingredient, note
|
return food, note
|
||||||
|
|
||||||
def parse_ingredient(self, tokens):
|
def parse_food(self, tokens):
|
||||||
ingredient = ''
|
food = ''
|
||||||
note = ''
|
note = ''
|
||||||
if tokens[-1].endswith(')'):
|
if tokens[-1].endswith(')'):
|
||||||
# Check if the matching opening bracket is in the same token
|
# Check if the matching opening bracket is in the same token
|
||||||
if (not tokens[-1].startswith('(')) and ('(' in tokens[-1]):
|
if (not tokens[-1].startswith('(')) and ('(' in tokens[-1]):
|
||||||
return self.parse_ingredient_with_comma(tokens)
|
return self.parse_food_with_comma(tokens)
|
||||||
# last argument ends with closing bracket -> look for opening bracket
|
# last argument ends with closing bracket -> look for opening bracket
|
||||||
start = len(tokens) - 1
|
start = len(tokens) - 1
|
||||||
while not tokens[start].startswith('(') and not start == 0:
|
while not tokens[start].startswith('(') and not start == 0:
|
||||||
@ -191,36 +194,41 @@ class IngredientParser:
|
|||||||
raise ValueError
|
raise ValueError
|
||||||
elif start < 0:
|
elif start < 0:
|
||||||
# no opening bracket anywhere -> just ignore the last bracket
|
# no opening bracket anywhere -> just ignore the last bracket
|
||||||
ingredient, note = self.parse_ingredient_with_comma(tokens)
|
food, note = self.parse_food_with_comma(tokens)
|
||||||
else:
|
else:
|
||||||
# opening bracket found -> split in ingredient and note, remove brackets from note # noqa: E501
|
# opening bracket found -> split in food and note, remove brackets from note # noqa: E501
|
||||||
note = ' '.join(tokens[start:])[1:-1]
|
note = ' '.join(tokens[start:])[1:-1]
|
||||||
ingredient = ' '.join(tokens[:start])
|
food = ' '.join(tokens[:start])
|
||||||
else:
|
else:
|
||||||
ingredient, note = self.parse_ingredient_with_comma(tokens)
|
food, note = self.parse_food_with_comma(tokens)
|
||||||
return ingredient, note
|
return food, note
|
||||||
|
|
||||||
def parse(self, x):
|
def parse(self, ingredient):
|
||||||
|
"""
|
||||||
|
Main parsing function, takes an ingredient string (e.g. '1 l Water') and extracts amount, unit, food, ...
|
||||||
|
:param ingredient: string ingredient
|
||||||
|
:return: amount, unit (can be None), food, note (can be empty)
|
||||||
|
"""
|
||||||
# initialize default values
|
# initialize default values
|
||||||
amount = 0
|
amount = 0
|
||||||
unit = None
|
unit = None
|
||||||
ingredient = ''
|
food = ''
|
||||||
note = ''
|
note = ''
|
||||||
unit_note = ''
|
unit_note = ''
|
||||||
|
|
||||||
if len(x) == 0:
|
if len(ingredient) == 0:
|
||||||
raise ValueError('string to parse cannot be empty')
|
raise ValueError('string to parse cannot be empty')
|
||||||
|
|
||||||
# if the string contains parenthesis early on remove it and place it at the end
|
# if the string contains parenthesis early on remove it and place it at the end
|
||||||
# because its likely some kind of note
|
# because its likely some kind of note
|
||||||
if re.match('(.){1,6}\s\((.[^\(\)])+\)\s', x):
|
if re.match('(.){1,6}\s\((.[^\(\)])+\)\s', ingredient):
|
||||||
match = re.search('\((.[^\(])+\)', x)
|
match = re.search('\((.[^\(])+\)', ingredient)
|
||||||
x = x[:match.start()] + x[match.end():] + ' ' + x[match.start():match.end()]
|
ingredient = ingredient[:match.start()] + ingredient[match.end():] + ' ' + ingredient[match.start():match.end()]
|
||||||
|
|
||||||
tokens = x.split()
|
tokens = ingredient.split() # split at each space into tokens
|
||||||
if len(tokens) == 1:
|
if len(tokens) == 1:
|
||||||
# there only is one argument, that must be the ingredient
|
# there only is one argument, that must be the food
|
||||||
ingredient = tokens[0]
|
food = tokens[0]
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
# try to parse first argument as amount
|
# try to parse first argument as amount
|
||||||
@ -232,51 +240,50 @@ class IngredientParser:
|
|||||||
try:
|
try:
|
||||||
if unit is not None:
|
if unit is not None:
|
||||||
# a unit is already found, no need to try the second argument for a fraction
|
# a unit is already found, no need to try the second argument for a fraction
|
||||||
# probably not the best method to do it, but I didn't want to make an if check and paste the exact same thing in the else as already is in the except # noqa: E501
|
# probably not the best method to do it, but I didn't want to make an if check and paste the exact same thing in the else as already is in the except
|
||||||
raise ValueError
|
raise ValueError
|
||||||
# try to parse second argument as amount and add that, in case of '2 1/2' or '2 ½'
|
# try to parse second argument as amount and add that, in case of '2 1/2' or '2 ½'
|
||||||
amount += self.parse_fraction(tokens[1])
|
amount += self.parse_fraction(tokens[1])
|
||||||
# assume that units can't end with a comma
|
# assume that units can't end with a comma
|
||||||
if len(tokens) > 3 and not tokens[2].endswith(','):
|
if len(tokens) > 3 and not tokens[2].endswith(','):
|
||||||
# try to use third argument as unit and everything else as ingredient, use everything as ingredient if it fails # noqa: E501
|
# try to use third argument as unit and everything else as food, use everything as food if it fails
|
||||||
try:
|
try:
|
||||||
ingredient, note = self.parse_ingredient(tokens[3:])
|
food, note = self.parse_food(tokens[3:])
|
||||||
unit = tokens[2]
|
unit = tokens[2]
|
||||||
except ValueError:
|
except ValueError:
|
||||||
ingredient, note = self.parse_ingredient(tokens[2:])
|
food, note = self.parse_food(tokens[2:])
|
||||||
else:
|
else:
|
||||||
ingredient, note = self.parse_ingredient(tokens[2:])
|
food, note = self.parse_food(tokens[2:])
|
||||||
except ValueError:
|
except ValueError:
|
||||||
# assume that units can't end with a comma
|
# assume that units can't end with a comma
|
||||||
if not tokens[1].endswith(','):
|
if not tokens[1].endswith(','):
|
||||||
# try to use second argument as unit and everything else as ingredient, use everything as ingredient if it fails # noqa: E501
|
# try to use second argument as unit and everything else as food, use everything as food if it fails
|
||||||
try:
|
try:
|
||||||
ingredient, note = self.parse_ingredient(tokens[2:])
|
food, note = self.parse_food(tokens[2:])
|
||||||
if unit is None:
|
if unit is None:
|
||||||
unit = tokens[1]
|
unit = tokens[1]
|
||||||
else:
|
else:
|
||||||
note = tokens[1]
|
note = tokens[1]
|
||||||
except ValueError:
|
except ValueError:
|
||||||
ingredient, note = self.parse_ingredient(tokens[1:])
|
food, note = self.parse_food(tokens[1:])
|
||||||
else:
|
else:
|
||||||
ingredient, note = self.parse_ingredient(tokens[1:])
|
food, note = self.parse_food(tokens[1:])
|
||||||
else:
|
else:
|
||||||
# only two arguments, first one is the amount
|
# only two arguments, first one is the amount
|
||||||
# which means this is the ingredient
|
# which means this is the food
|
||||||
ingredient = tokens[1]
|
food = tokens[1]
|
||||||
except ValueError:
|
except ValueError:
|
||||||
try:
|
try:
|
||||||
# can't parse first argument as amount
|
# can't parse first argument as amount
|
||||||
# -> no unit -> parse everything as ingredient
|
# -> no unit -> parse everything as food
|
||||||
ingredient, note = self.parse_ingredient(tokens)
|
food, note = self.parse_food(tokens)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
ingredient = ' '.join(tokens[1:])
|
food = ' '.join(tokens[1:])
|
||||||
|
|
||||||
if unit_note not in note:
|
if unit_note not in note:
|
||||||
note += ' ' + unit_note
|
note += ' ' + unit_note
|
||||||
try:
|
|
||||||
unit = self.apply_unit_automation(unit.strip())
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
return amount, unit, self.apply_food_automation(ingredient.strip()), note.strip()
|
if unit:
|
||||||
|
unit = self.apply_unit_automation(unit.strip())
|
||||||
|
|
||||||
|
return amount, unit, self.apply_food_automation(food.strip()), note.strip()
|
||||||
|
@ -10,27 +10,11 @@
|
|||||||
|
|
||||||
{% block content_fluid %}
|
{% block content_fluid %}
|
||||||
|
|
||||||
<div id="app">
|
{{ data }}
|
||||||
|
|
||||||
<import-view></import-view>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
|
|
||||||
{% endblock %}
|
{% endblock %}
|
||||||
|
|
||||||
|
|
||||||
{% block script %}
|
{% block script %}
|
||||||
{% if debug %}
|
|
||||||
<script src="{% url 'js_reverse' %}"></script>
|
|
||||||
{% else %}
|
|
||||||
<script src="{% static 'django_js_reverse/reverse.js' %}"></script>
|
|
||||||
{% endif %}
|
|
||||||
|
|
||||||
<script type="application/javascript">
|
|
||||||
window.CUSTOM_LOCALE = '{{ request.LANGUAGE_CODE }}'
|
|
||||||
window.API_TOKEN = '{{ api_token }}'
|
|
||||||
window.BOOKMARKLET_IMPORT_ID = {{ bookmarklet_import_id }}
|
|
||||||
</script>
|
|
||||||
|
|
||||||
{% render_bundle 'import_view' %}
|
|
||||||
{% endblock %}
|
{% endblock %}
|
@ -4,7 +4,7 @@ from cookbook.helper.ingredient_parser import IngredientParser
|
|||||||
def test_ingredient_parser():
|
def test_ingredient_parser():
|
||||||
expectations = {
|
expectations = {
|
||||||
"2¼ l Wasser": (2.25, "l", "Wasser", ""),
|
"2¼ l Wasser": (2.25, "l", "Wasser", ""),
|
||||||
"2¼l Wasser": (2.25, "l", "Wasser", ""),
|
"3¼l Wasser": (3.25, "l", "Wasser", ""),
|
||||||
"¼ l Wasser": (0.25, "l", "Wasser", ""),
|
"¼ l Wasser": (0.25, "l", "Wasser", ""),
|
||||||
"3l Wasser": (3, "l", "Wasser", ""),
|
"3l Wasser": (3, "l", "Wasser", ""),
|
||||||
"4 l Wasser": (4, "l", "Wasser", ""),
|
"4 l Wasser": (4, "l", "Wasser", ""),
|
||||||
@ -58,7 +58,7 @@ def test_ingredient_parser():
|
|||||||
"2L Wasser": (2, "L", "Wasser", ""),
|
"2L Wasser": (2, "L", "Wasser", ""),
|
||||||
"1 (16 ounce) package dry lentils, rinsed": (1, "package", "dry lentils, rinsed", "16 ounce"),
|
"1 (16 ounce) package dry lentils, rinsed": (1, "package", "dry lentils, rinsed", "16 ounce"),
|
||||||
"2-3 c Water": (2, "c", "Water", "2-3"),
|
"2-3 c Water": (2, "c", "Water", "2-3"),
|
||||||
"Pane (raffermo o secco) 80 g": (0, "", "Pane 80 g", "raffermo o secco"), # TODO this is actually not a good result but currently expected
|
"Pane (raffermo o secco) 80 g": (0, None, "Pane 80 g", "raffermo o secco"), # TODO this is actually not a good result but currently expected
|
||||||
}
|
}
|
||||||
# for German you could say that if an ingredient does not have
|
# for German you could say that if an ingredient does not have
|
||||||
# an amount # and it starts with a lowercase letter, then that
|
# an amount # and it starts with a lowercase letter, then that
|
||||||
|
@ -662,10 +662,15 @@ def test(request):
|
|||||||
if not settings.DEBUG:
|
if not settings.DEBUG:
|
||||||
return HttpResponseRedirect(reverse('index'))
|
return HttpResponseRedirect(reverse('index'))
|
||||||
|
|
||||||
if (api_token := Token.objects.filter(user=request.user).first()) is None:
|
from cookbook.helper.ingredient_parser import IngredientParser
|
||||||
api_token = Token.objects.create(user=request.user)
|
parser = IngredientParser(request, False)
|
||||||
|
|
||||||
return render(request, 'test.html', {'api_token': api_token})
|
data = {
|
||||||
|
'original': 'Pane (raffermo o secco) 80 g'
|
||||||
|
}
|
||||||
|
data['parsed'] = parser.parse(data['original'])
|
||||||
|
|
||||||
|
return render(request, 'test.html', {'data': data})
|
||||||
|
|
||||||
|
|
||||||
def test2(request):
|
def test2(request):
|
||||||
|
Reference in New Issue
Block a user