handle special cases in ingredients

This commit is contained in:
Marcus Wolschon 2023-01-15 13:49:16 +01:00
parent c2a8214290
commit 394f24c29f

View File

@ -235,6 +235,19 @@ class IngredientParser:
# leading spaces before commas result in extra tokens, clean them out # leading spaces before commas result in extra tokens, clean them out
ingredient = ingredient.replace(' ,', ',') ingredient = ingredient.replace(' ,', ',')
# Handle special cases of units that contain a space in their name
# "2 geh TL XYZ" => "2 geh.TL XYZ" => [amount=2, unit="geh.TL", food="XYZ"]
ingredient = ingredient.replace("geh. TL", "geh.TL") \
.replace("geh. TL", "geh.TL") \
.replace("geh. EL", "geh.EL") \
.replace("ges. TL", "ges.TL") \
.replace("ges. EL", "ges.EL")
# add others here until we find a better way to handle these
# handle "(from) - (to)" amounts by using the minimum amount and adding the range to the description
# "10.5 - 200 g XYZ" => "100 g XYZ (10.5 - 200)"
ingredient = re.sub("(\d+|\d+[\\.,]\d+) - (\d+|\d+[\\.,]\d+) (.*)", "\\1 \\3 (\\1 - \\2)", ingredient)
# if amount and unit are connected add space in between # if amount and unit are connected add space in between
if re.match('([0-9])+([A-z])+\s', ingredient): if re.match('([0-9])+([A-z])+\s', ingredient):
ingredient = re.sub(r'(?<=([a-z])|\d)(?=(?(1)\d|[a-z]))', ' ', ingredient) ingredient = re.sub(r'(?<=([a-z])|\d)(?=(?(1)\d|[a-z]))', ' ', ingredient)