changed source import to match field structure of recipe model
first imports working
This commit is contained in:
@ -11,6 +11,7 @@ from cookbook.helper import recipe_url_import as helper
|
||||
from cookbook.helper.ingredient_parser import IngredientParser
|
||||
from cookbook.models import Keyword
|
||||
|
||||
|
||||
# from recipe_scrapers._utils import get_minutes ## temporary until/unless upstream incorporates get_minutes() PR
|
||||
|
||||
|
||||
@ -33,6 +34,7 @@ def get_from_scraper(scrape, request):
|
||||
description = ''
|
||||
|
||||
recipe_json['description'] = parse_description(description)
|
||||
recipe_json['internal'] = True
|
||||
|
||||
try:
|
||||
servings = scrape.yields() or None
|
||||
@ -51,20 +53,20 @@ def get_from_scraper(scrape, request):
|
||||
recipe_json['servings'] = max(servings, 1)
|
||||
|
||||
try:
|
||||
recipe_json['prepTime'] = get_minutes(scrape.schema.data.get("prepTime")) or 0
|
||||
recipe_json['working_time'] = get_minutes(scrape.schema.data.get("prepTime")) or 0
|
||||
except Exception:
|
||||
recipe_json['prepTime'] = 0
|
||||
recipe_json['working_time'] = 0
|
||||
try:
|
||||
recipe_json['cookTime'] = get_minutes(scrape.schema.data.get("cookTime")) or 0
|
||||
recipe_json['waiting_time'] = get_minutes(scrape.schema.data.get("cookTime")) or 0
|
||||
except Exception:
|
||||
recipe_json['cookTime'] = 0
|
||||
recipe_json['waiting_time'] = 0
|
||||
|
||||
if recipe_json['cookTime'] + recipe_json['prepTime'] == 0:
|
||||
if recipe_json['working_time'] + recipe_json['waiting_time'] == 0:
|
||||
try:
|
||||
recipe_json['prepTime'] = get_minutes(scrape.total_time()) or 0
|
||||
recipe_json['working_time'] = get_minutes(scrape.total_time()) or 0
|
||||
except Exception:
|
||||
try:
|
||||
get_minutes(scrape.schema.data.get("totalTime")) or 0
|
||||
recipe_json['working_time'] = get_minutes(scrape.schema.data.get("totalTime")) or 0
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@ -101,54 +103,49 @@ def get_from_scraper(scrape, request):
|
||||
|
||||
ingredient_parser = IngredientParser(request, True)
|
||||
|
||||
ingredients = []
|
||||
recipe_json['steps'] = []
|
||||
|
||||
for i in parse_instructions(scrape.instructions()):
|
||||
recipe_json['steps'].append({'instruction': i, 'ingredients': [], })
|
||||
if len(recipe_json['steps']) == 0:
|
||||
recipe_json['steps'].append({'instruction': '', 'ingredients': [], })
|
||||
|
||||
try:
|
||||
for x in scrape.ingredients():
|
||||
try:
|
||||
amount, unit, ingredient, note = ingredient_parser.parse(x)
|
||||
ingredients.append(
|
||||
recipe_json['steps'][0]['ingredients'].append(
|
||||
{
|
||||
'amount': amount,
|
||||
'unit': {
|
||||
'text': unit,
|
||||
'id': random.randrange(10000, 99999)
|
||||
'name': unit,
|
||||
},
|
||||
'ingredient': {
|
||||
'text': ingredient,
|
||||
'id': random.randrange(10000, 99999)
|
||||
'food': {
|
||||
'name': ingredient,
|
||||
},
|
||||
'note': note,
|
||||
'original': x
|
||||
'original_text': x
|
||||
}
|
||||
)
|
||||
except Exception:
|
||||
ingredients.append(
|
||||
recipe_json['steps'][0]['ingredients'].append(
|
||||
{
|
||||
'amount': 0,
|
||||
'unit': {
|
||||
'text': '',
|
||||
'id': random.randrange(10000, 99999)
|
||||
'name': '',
|
||||
},
|
||||
'ingredient': {
|
||||
'text': x,
|
||||
'id': random.randrange(10000, 99999)
|
||||
'food': {
|
||||
'name': x,
|
||||
},
|
||||
'note': '',
|
||||
'original': x
|
||||
'original_text': x
|
||||
}
|
||||
)
|
||||
recipe_json['recipeIngredient'] = ingredients
|
||||
except Exception:
|
||||
recipe_json['recipeIngredient'] = ingredients
|
||||
|
||||
try:
|
||||
recipe_json['recipeInstructions'] = parse_instructions(scrape.instructions())
|
||||
except Exception:
|
||||
recipe_json['recipeInstructions'] = ""
|
||||
pass
|
||||
|
||||
if scrape.url:
|
||||
recipe_json['url'] = scrape.url
|
||||
recipe_json['recipeInstructions'] += "\n\nImported from " + scrape.url
|
||||
recipe_json['source_url'] = scrape.url
|
||||
return recipe_json
|
||||
|
||||
|
||||
@ -161,102 +158,46 @@ def parse_name(name):
|
||||
return normalize_string(name)
|
||||
|
||||
|
||||
def parse_ingredients(ingredients):
|
||||
# some pages have comma separated ingredients in a single array entry
|
||||
try:
|
||||
if type(ingredients[0]) == dict:
|
||||
return ingredients
|
||||
except (KeyError, IndexError):
|
||||
pass
|
||||
|
||||
if (len(ingredients) == 1 and type(ingredients) == list):
|
||||
ingredients = ingredients[0].split(',')
|
||||
elif type(ingredients) == str:
|
||||
ingredients = ingredients.split(',')
|
||||
|
||||
for x in ingredients:
|
||||
if '\n' in x:
|
||||
ingredients.remove(x)
|
||||
for i in x.split('\n'):
|
||||
ingredients.insert(0, i)
|
||||
|
||||
ingredient_list = []
|
||||
|
||||
for x in ingredients:
|
||||
if x.replace(' ', '') != '':
|
||||
x = x.replace('½', "0.5").replace('¼', "0.25").replace('¾', "0.75")
|
||||
try:
|
||||
amount, unit, ingredient, note = parse_single_ingredient(x)
|
||||
if ingredient:
|
||||
ingredient_list.append(
|
||||
{
|
||||
'amount': amount,
|
||||
'unit': {
|
||||
'text': unit,
|
||||
'id': random.randrange(10000, 99999)
|
||||
},
|
||||
'ingredient': {
|
||||
'text': ingredient,
|
||||
'id': random.randrange(10000, 99999)
|
||||
},
|
||||
'note': note,
|
||||
'original': x
|
||||
}
|
||||
)
|
||||
except Exception:
|
||||
ingredient_list.append(
|
||||
{
|
||||
'amount': 0,
|
||||
'unit': {
|
||||
'text': '',
|
||||
'id': random.randrange(10000, 99999)
|
||||
},
|
||||
'ingredient': {
|
||||
'text': x,
|
||||
'id': random.randrange(10000, 99999)
|
||||
},
|
||||
'note': '',
|
||||
'original': x
|
||||
}
|
||||
)
|
||||
|
||||
ingredients = ingredient_list
|
||||
else:
|
||||
ingredients = []
|
||||
return ingredients
|
||||
|
||||
|
||||
def parse_description(description):
|
||||
return normalize_string(description)
|
||||
|
||||
|
||||
def parse_instructions(instructions):
|
||||
instruction_text = ''
|
||||
|
||||
# flatten instructions if they are in a list
|
||||
if type(instructions) == list:
|
||||
for i in instructions:
|
||||
if type(i) == str:
|
||||
instruction_text += i
|
||||
else:
|
||||
if 'text' in i:
|
||||
instruction_text += i['text'] + '\n\n'
|
||||
elif 'itemListElement' in i:
|
||||
for ile in i['itemListElement']:
|
||||
if type(ile) == str:
|
||||
instruction_text += ile + '\n\n'
|
||||
elif 'text' in ile:
|
||||
instruction_text += ile['text'] + '\n\n'
|
||||
else:
|
||||
instruction_text += str(i)
|
||||
instructions = instruction_text
|
||||
|
||||
normalized_string = normalize_string(instructions)
|
||||
def clean_instruction_string(instruction):
|
||||
normalized_string = normalize_string(instruction)
|
||||
normalized_string = normalized_string.replace('\n', ' \n')
|
||||
normalized_string = normalized_string.replace(' \n \n', '\n\n')
|
||||
return normalized_string
|
||||
|
||||
|
||||
def parse_instructions(instructions):
|
||||
"""
|
||||
Convert arbitrary instructions object from website import and turn it into a flat list of strings
|
||||
:param instructions: any instructions object from import
|
||||
:return: list of strings (from one to many elements depending on website)
|
||||
"""
|
||||
instruction_list = []
|
||||
|
||||
if type(instructions) == list:
|
||||
for i in instructions:
|
||||
if type(i) == str:
|
||||
instruction_list.append(clean_instruction_string(i))
|
||||
else:
|
||||
if 'text' in i:
|
||||
instruction_list.append(clean_instruction_string(i['text']))
|
||||
elif 'itemListElement' in i:
|
||||
for ile in i['itemListElement']:
|
||||
if type(ile) == str:
|
||||
instruction_list.append(clean_instruction_string(ile))
|
||||
elif 'text' in ile:
|
||||
instruction_list.append(clean_instruction_string(ile['text']))
|
||||
else:
|
||||
instruction_list.append(clean_instruction_string(str(i)))
|
||||
else:
|
||||
instruction_list.append(clean_instruction_string(instructions))
|
||||
|
||||
return instruction_list
|
||||
|
||||
|
||||
def parse_image(image):
|
||||
# check if list of images is returned, take first if so
|
||||
if not image:
|
||||
@ -334,9 +275,9 @@ def parse_keywords(keyword_json, space):
|
||||
kw = normalize_string(kw)
|
||||
if len(kw) != 0:
|
||||
if k := Keyword.objects.filter(name=kw, space=space).first():
|
||||
keywords.append({'id': str(k.id), 'text': str(k)})
|
||||
keywords.append({'name': str(k)})
|
||||
else:
|
||||
keywords.append({'id': random.randrange(1111111, 9999999, 1), 'text': kw})
|
||||
keywords.append({'name': kw})
|
||||
|
||||
return keywords
|
||||
|
||||
@ -367,6 +308,7 @@ def normalize_string(string):
|
||||
unescaped_string = unescaped_string.replace("\xa0", " ").replace("\t", " ").strip()
|
||||
return unescaped_string
|
||||
|
||||
|
||||
# TODO deprecate when merged into recipe_scapers
|
||||
|
||||
|
||||
@ -408,9 +350,9 @@ def get_minutes(time_text):
|
||||
if "/" in (hours := matched.groupdict().get("hours") or ''):
|
||||
number = hours.split(" ")
|
||||
if len(number) == 2:
|
||||
minutes += 60*int(number[0])
|
||||
minutes += 60 * int(number[0])
|
||||
fraction = number[-1:][0].split("/")
|
||||
minutes += 60 * float(int(fraction[0])/int(fraction[1]))
|
||||
minutes += 60 * float(int(fraction[0]) / int(fraction[1]))
|
||||
else:
|
||||
minutes += 60 * float(hours)
|
||||
|
||||
|
Reference in New Issue
Block a user