TandoorRecipes/cookbook/tests/other/test_ingredient_parser.py
Andrew Jayne cffa731106 fix: ingredient parsing for non-latin languages
Before this change the ingredient string for non-latin
languages was not being parsed into the correct amount
or units when the food is found at the start of the
ingredient string.

This was because the regex being used was restricted to
latin characters.

With this change the amount and units are correctly
parsed from such a string.

Fixes https://github.com/TandoorRecipes/recipes/issues/1983
2022-08-07 21:37:59 +01:00

86 lines
5.5 KiB
Python

from cookbook.helper.ingredient_parser import IngredientParser
def test_ingredient_parser():
expectations = {
"2¼ l Wasser": (2.25, "l", "Wasser", ""),
"3¼l Wasser": (3.25, "l", "Wasser", ""),
"¼ l Wasser": (0.25, "l", "Wasser", ""),
"3l Wasser": (3, "l", "Wasser", ""),
"4 l Wasser": (4, "l", "Wasser", ""),
"½l Wasser": (0.5, "l", "Wasser", ""),
"⅛ Liter Sauerrahm": (0.125, "Liter", "Sauerrahm", ""),
"5 Zwiebeln": (5, None, "Zwiebeln", ""),
"3 Zwiebeln, gehackt": (3, None, "Zwiebeln", "gehackt"),
"5 Zwiebeln (gehackt)": (5, None, "Zwiebeln", "gehackt"),
"1 Zwiebel(n)": (1, None, "Zwiebel(n)", ""),
"4 1/2 Zwiebeln": (4.5, None, "Zwiebeln", ""),
"4 ½ Zwiebeln": (4.5, None, "Zwiebeln", ""),
"1/2 EL Mehl": (0.5, "EL", "Mehl", ""),
"1/2 Zwiebel": (0.5, None, "Zwiebel", ""),
"1/5g Mehl, gesiebt": (0.2, "g", "Mehl", "gesiebt"),
"1/2 Zitrone, ausgepresst": (0.5, None, "Zitrone", "ausgepresst"),
"etwas Mehl": (0, None, "etwas Mehl", ""),
"Öl zum Anbraten": (0, None, "Öl zum Anbraten", ""),
"n. B. Knoblauch, zerdrückt": (0, None, "n. B. Knoblauch", "zerdrückt"),
"Kräuter, mediterrane (Oregano, Rosmarin, Basilikum)": (
0, None, "Kräuter, mediterrane", "Oregano, Rosmarin, Basilikum"),
"600 g Kürbisfleisch (Hokkaido), geschält, entkernt und geraspelt": (
600, "g", "Kürbisfleisch (Hokkaido)", "geschält, entkernt und geraspelt"),
"Muskat": (0, None, "Muskat", ""),
"200 g Mehl, glattes": (200, "g", "Mehl", "glattes"),
"1 Ei(er)": (1, None, "Ei(er)", ""),
"1 Prise(n) Salz": (1, "Prise(n)", "Salz", ""),
"etwas Wasser, lauwarmes": (0, None, "etwas Wasser", "lauwarmes"),
"Strudelblätter, fertige, für zwei Strudel": (0, None, "Strudelblätter", "fertige, für zwei Strudel"),
"barrel-aged Bourbon": (0, None, "barrel-aged Bourbon", ""),
"golden syrup": (0, None, "golden syrup", ""),
"unsalted butter, for greasing": (0, None, "unsalted butter", "for greasing"),
"unsalted butter , for greasing": (0, None, "unsalted butter", "for greasing"), # trim
"1 small sprig of fresh rosemary": (1, "small", "sprig of fresh rosemary", ""),
# does not always work perfectly!
"75 g fresh breadcrumbs": (75, "g", "fresh breadcrumbs", ""),
"4 acorn squash , or onion squash (600-800g)": (4, "acorn", "squash, or onion squash", "600-800g"),
"1 x 250 g packet of cooked mixed grains , such as spelt and wild rice": (
1, "x", "250 g packet of cooked mixed grains", "such as spelt and wild rice"),
"1 big bunch of fresh mint , (60g)": (1, "big", "bunch of fresh mint,", "60g"),
"1 large red onion": (1, "large", "red onion", ""),
# "2-3 TL Curry": (), # idk what it should use here either
"1 Zwiebel gehackt": (1, "Zwiebel", "gehackt", ""),
"1 EL Kokosöl": (1, "EL", "Kokosöl", ""),
"0.5 paket jäst (à 50 g)": (0.5, "paket", "jäst", "à 50 g"),
"ägg": (0, None, "ägg", ""),
"50 g smör eller margarin": (50, "g", "smör eller margarin", ""),
"3,5 l Wasser": (3.5, "l", "Wasser", ""),
"3.5 l Wasser": (3.5, "l", "Wasser", ""),
"400 g Karotte(n)": (400, "g", "Karotte(n)", ""),
"400g unsalted butter": (400, "g", "butter", "unsalted"),
"2L Wasser": (2, "L", "Wasser", ""),
"1 (16 ounce) package dry lentils, rinsed": (1, "package", "dry lentils, rinsed", "16 ounce"),
"2-3 c Water": (2, "c", "Water", "2-3"),
"Pane (raffermo o secco) 80 g": (80, "g", "Pane", "raffermo o secco"),
"1 Knoblauchzehe(n), gehackt oder gepresst": (1.0, None, 'Knoblauchzehe(n)', 'gehackt oder gepresst'),
"1 Porreestange(n) , ca. 200 g": (1.0, None, 'Porreestange(n)', 'ca. 200 g'), # leading space before comma
# test for over long food entries to get properly split into the note field
"1 Lorem ipsum dolor sit amet consetetur sadipscing elitr sed diam nonumy eirmod tempor invidunt ut l Lorem ipsum dolor sit amet consetetur sadipscing elitr sed diam nonumy eirmod tempor invidunt ut l": (
1.0, 'Lorem', 'ipsum', 'dolor sit amet consetetur sadipscing elitr sed diam nonumy eirmod tempor invidunt ut l Lorem ipsum dolor sit amet consetetur sadipscing elitr sed diam nonumy eirmod tempor invidunt ut l'),
"1 LoremipsumdolorsitametconsetetursadipscingelitrseddiamnonumyeirmodtemporinviduntutlLoremipsumdolorsitametconsetetursadipscingelitrseddiamnonumyeirmodtemporinviduntutl": (
1.0, None, 'LoremipsumdolorsitametconsetetursadipscingelitrseddiamnonumyeirmodtemporinviduntutlLoremipsumdolorsitametconsetetursadipscingeli',
'LoremipsumdolorsitametconsetetursadipscingelitrseddiamnonumyeirmodtemporinviduntutlLoremipsumdolorsitametconsetetursadipscingelitrseddiamnonumyeirmodtemporinviduntutl'),
"砂糖 50g": (50, "g", "砂糖", ""),
"卵 4個": (4, "", "", "")
}
# for German you could say that if an ingredient does not have
# an amount # and it starts with a lowercase letter, then that
# is a unit ("etwas", "evtl.") does not apply to English tho
ingredient_parser = IngredientParser(None, False, ignore_automations=True)
count = 0
for key, val in expectations.items():
count += 1
parsed = ingredient_parser.parse(key)
print(f'testing if {key} becomes {val}')
assert parsed == val