more improvements to the ingredient parser + tests

This commit is contained in:
vabene1111 2021-06-21 13:00:03 +02:00
parent 538e45d20c
commit fd1a399d03
4 changed files with 17 additions and 8 deletions

View File

@ -23,6 +23,7 @@ def parse_fraction(x):
def parse_amount(x):
amount = 0
unit = ''
note = ''
did_check_frac = False
end = 0
@ -52,9 +53,10 @@ def parse_amount(x):
except ValueError:
unit = x[end:]
if unit.startswith('('): # i dont know any unit that starts with ( so its likely an alternative like 1L (500ml) Water
if unit.startswith('(') or unit.startswith('-'): # i dont know any unit that starts with ( or - so its likely an alternative like 1L (500ml) Water or 2-3
unit = ''
return amount, unit
note = x
return amount, unit, note
def parse_ingredient_with_comma(tokens):
@ -105,6 +107,7 @@ def parse(x):
unit = ''
ingredient = ''
note = ''
unit_note = ''
# if the string contains parenthesis early on remove it and place it at the end
# because its likely some kind of note
@ -119,8 +122,7 @@ def parse(x):
else:
try:
# try to parse first argument as amount
amount, unit = parse_amount(tokens[0])
print('test', unit)
amount, unit, unit_note = parse_amount(tokens[0])
# only try to parse second argument as amount if there are at least
# three arguments if it already has a unit there can't be
# a fraction for the amount
@ -167,6 +169,9 @@ def parse(x):
ingredient, note = parse_ingredient(tokens)
except ValueError:
ingredient = ' '.join(tokens[1:])
if unit_note not in note:
note += ' ' + unit_note
return amount, unit.strip(), ingredient.strip(), note.strip()

View File

@ -773,7 +773,7 @@ COOKPAD = {
"text": "Water",
"id": 49092
},
"note": "",
"note": "2-3",
"original": "2-3 c Water"
},
{
@ -1498,10 +1498,10 @@ GIALLOZAFFERANO = {
"id": 64900
},
"ingredient": {
"text": "Pane (raffermo o secco) 80 g",
"text": "Pane 80 g",
"id": 24720
},
"note": "",
"note": "(raffermo o secco)",
"original": "Pane (raffermo o secco) 80 g"
},
{

View File

@ -57,6 +57,8 @@ def test_ingredient_parser():
"400g unsalted butter": (400, "g", "butter", "unsalted"),
"2L Wasser": (2, "L", "Wasser", ""),
"1 (16 ounce) package dry lentils, rinsed": (1, "package", "dry lentils, rinsed", "16 ounce"),
"2-3 c Water": (2, "c", "Water", "2-3"),
"Pane (raffermo o secco) 80 g": (0, "", "Pane 80 g", "raffermo o secco"), #TODO this is actually not a good result but currently expected
}
# for German you could say that if an ingredient does not have
# an amount # and it starts with a lowercase letter, then that

View File

@ -14,7 +14,7 @@ from django.contrib.auth.password_validation import validate_password
from django.core.exceptions import ValidationError
from django.db import IntegrityError
from django.db.models import Avg, Q, Sum
from django.http import HttpResponseRedirect
from django.http import HttpResponseRedirect, JsonResponse
from django.shortcuts import get_object_or_404, render, redirect
from django.urls import reverse, reverse_lazy
from django.utils import timezone
@ -27,6 +27,7 @@ from cookbook.filters import RecipeFilter
from cookbook.forms import (CommentForm, Recipe, RecipeBookEntryForm, User,
UserCreateForm, UserNameForm, UserPreference,
UserPreferenceForm, SpaceJoinForm, SpaceCreateForm, AllAuthSignupForm)
from cookbook.helper.ingredient_parser import parse
from cookbook.helper.permission_helper import group_required, share_link_valid, has_group_permission
from cookbook.models import (Comment, CookLog, InviteLink, MealPlan,
RecipeBook, RecipeBookEntry, ViewLog, ShoppingList, Space, Keyword, RecipeImport, Unit,
@ -542,6 +543,7 @@ def offline(request):
def test(request):
if not settings.DEBUG:
return HttpResponseRedirect(reverse('index'))
return JsonResponse(parse('Pane (raffermo o secco) 80 g'), safe=False)
def test2(request):