more improvements to the ingredient parser + tests

This commit is contained in:
vabene1111 2021-06-21 13:00:03 +02:00
parent 538e45d20c
commit fd1a399d03
4 changed files with 17 additions and 8 deletions

View File

@ -23,6 +23,7 @@ def parse_fraction(x):
def parse_amount(x): def parse_amount(x):
amount = 0 amount = 0
unit = '' unit = ''
note = ''
did_check_frac = False did_check_frac = False
end = 0 end = 0
@ -52,9 +53,10 @@ def parse_amount(x):
except ValueError: except ValueError:
unit = x[end:] unit = x[end:]
if unit.startswith('('): # i dont know any unit that starts with ( so its likely an alternative like 1L (500ml) Water if unit.startswith('(') or unit.startswith('-'): # i dont know any unit that starts with ( or - so its likely an alternative like 1L (500ml) Water or 2-3
unit = '' unit = ''
return amount, unit note = x
return amount, unit, note
def parse_ingredient_with_comma(tokens): def parse_ingredient_with_comma(tokens):
@ -105,6 +107,7 @@ def parse(x):
unit = '' unit = ''
ingredient = '' ingredient = ''
note = '' note = ''
unit_note = ''
# if the string contains parenthesis early on remove it and place it at the end # if the string contains parenthesis early on remove it and place it at the end
# because its likely some kind of note # because its likely some kind of note
@ -119,8 +122,7 @@ def parse(x):
else: else:
try: try:
# try to parse first argument as amount # try to parse first argument as amount
amount, unit = parse_amount(tokens[0]) amount, unit, unit_note = parse_amount(tokens[0])
print('test', unit)
# only try to parse second argument as amount if there are at least # only try to parse second argument as amount if there are at least
# three arguments if it already has a unit there can't be # three arguments if it already has a unit there can't be
# a fraction for the amount # a fraction for the amount
@ -167,6 +169,9 @@ def parse(x):
ingredient, note = parse_ingredient(tokens) ingredient, note = parse_ingredient(tokens)
except ValueError: except ValueError:
ingredient = ' '.join(tokens[1:]) ingredient = ' '.join(tokens[1:])
if unit_note not in note:
note += ' ' + unit_note
return amount, unit.strip(), ingredient.strip(), note.strip() return amount, unit.strip(), ingredient.strip(), note.strip()

View File

@ -773,7 +773,7 @@ COOKPAD = {
"text": "Water", "text": "Water",
"id": 49092 "id": 49092
}, },
"note": "", "note": "2-3",
"original": "2-3 c Water" "original": "2-3 c Water"
}, },
{ {
@ -1498,10 +1498,10 @@ GIALLOZAFFERANO = {
"id": 64900 "id": 64900
}, },
"ingredient": { "ingredient": {
"text": "Pane (raffermo o secco) 80 g", "text": "Pane 80 g",
"id": 24720 "id": 24720
}, },
"note": "", "note": "(raffermo o secco)",
"original": "Pane (raffermo o secco) 80 g" "original": "Pane (raffermo o secco) 80 g"
}, },
{ {

View File

@ -57,6 +57,8 @@ def test_ingredient_parser():
"400g unsalted butter": (400, "g", "butter", "unsalted"), "400g unsalted butter": (400, "g", "butter", "unsalted"),
"2L Wasser": (2, "L", "Wasser", ""), "2L Wasser": (2, "L", "Wasser", ""),
"1 (16 ounce) package dry lentils, rinsed": (1, "package", "dry lentils, rinsed", "16 ounce"), "1 (16 ounce) package dry lentils, rinsed": (1, "package", "dry lentils, rinsed", "16 ounce"),
"2-3 c Water": (2, "c", "Water", "2-3"),
"Pane (raffermo o secco) 80 g": (0, "", "Pane 80 g", "raffermo o secco"), #TODO this is actually not a good result but currently expected
} }
# for German you could say that if an ingredient does not have # for German you could say that if an ingredient does not have
# an amount # and it starts with a lowercase letter, then that # an amount # and it starts with a lowercase letter, then that

View File

@ -14,7 +14,7 @@ from django.contrib.auth.password_validation import validate_password
from django.core.exceptions import ValidationError from django.core.exceptions import ValidationError
from django.db import IntegrityError from django.db import IntegrityError
from django.db.models import Avg, Q, Sum from django.db.models import Avg, Q, Sum
from django.http import HttpResponseRedirect from django.http import HttpResponseRedirect, JsonResponse
from django.shortcuts import get_object_or_404, render, redirect from django.shortcuts import get_object_or_404, render, redirect
from django.urls import reverse, reverse_lazy from django.urls import reverse, reverse_lazy
from django.utils import timezone from django.utils import timezone
@ -27,6 +27,7 @@ from cookbook.filters import RecipeFilter
from cookbook.forms import (CommentForm, Recipe, RecipeBookEntryForm, User, from cookbook.forms import (CommentForm, Recipe, RecipeBookEntryForm, User,
UserCreateForm, UserNameForm, UserPreference, UserCreateForm, UserNameForm, UserPreference,
UserPreferenceForm, SpaceJoinForm, SpaceCreateForm, AllAuthSignupForm) UserPreferenceForm, SpaceJoinForm, SpaceCreateForm, AllAuthSignupForm)
from cookbook.helper.ingredient_parser import parse
from cookbook.helper.permission_helper import group_required, share_link_valid, has_group_permission from cookbook.helper.permission_helper import group_required, share_link_valid, has_group_permission
from cookbook.models import (Comment, CookLog, InviteLink, MealPlan, from cookbook.models import (Comment, CookLog, InviteLink, MealPlan,
RecipeBook, RecipeBookEntry, ViewLog, ShoppingList, Space, Keyword, RecipeImport, Unit, RecipeBook, RecipeBookEntry, ViewLog, ShoppingList, Space, Keyword, RecipeImport, Unit,
@ -542,6 +543,7 @@ def offline(request):
def test(request): def test(request):
if not settings.DEBUG: if not settings.DEBUG:
return HttpResponseRedirect(reverse('index')) return HttpResponseRedirect(reverse('index'))
return JsonResponse(parse('Pane (raffermo o secco) 80 g'), safe=False)
def test2(request): def test2(request):