changed source import to match field structure of recipe model

first imports working
This commit is contained in:
vabene1111
2022-02-19 17:54:00 +01:00
parent 89348f69f1
commit c8fc67fa2b
8 changed files with 138 additions and 345 deletions

View File

@ -11,6 +11,7 @@ from cookbook.helper import recipe_url_import as helper
from cookbook.helper.ingredient_parser import IngredientParser
from cookbook.models import Keyword
# from recipe_scrapers._utils import get_minutes ## temporary until/unless upstream incorporates get_minutes() PR
@ -33,6 +34,7 @@ def get_from_scraper(scrape, request):
description = ''
recipe_json['description'] = parse_description(description)
recipe_json['internal'] = True
try:
servings = scrape.yields() or None
@ -51,20 +53,20 @@ def get_from_scraper(scrape, request):
recipe_json['servings'] = max(servings, 1)
try:
recipe_json['prepTime'] = get_minutes(scrape.schema.data.get("prepTime")) or 0
recipe_json['working_time'] = get_minutes(scrape.schema.data.get("prepTime")) or 0
except Exception:
recipe_json['prepTime'] = 0
recipe_json['working_time'] = 0
try:
recipe_json['cookTime'] = get_minutes(scrape.schema.data.get("cookTime")) or 0
recipe_json['waiting_time'] = get_minutes(scrape.schema.data.get("cookTime")) or 0
except Exception:
recipe_json['cookTime'] = 0
recipe_json['waiting_time'] = 0
if recipe_json['cookTime'] + recipe_json['prepTime'] == 0:
if recipe_json['working_time'] + recipe_json['waiting_time'] == 0:
try:
recipe_json['prepTime'] = get_minutes(scrape.total_time()) or 0
recipe_json['working_time'] = get_minutes(scrape.total_time()) or 0
except Exception:
try:
get_minutes(scrape.schema.data.get("totalTime")) or 0
recipe_json['working_time'] = get_minutes(scrape.schema.data.get("totalTime")) or 0
except Exception:
pass
@ -101,54 +103,49 @@ def get_from_scraper(scrape, request):
ingredient_parser = IngredientParser(request, True)
ingredients = []
recipe_json['steps'] = []
for i in parse_instructions(scrape.instructions()):
recipe_json['steps'].append({'instruction': i, 'ingredients': [], })
if len(recipe_json['steps']) == 0:
recipe_json['steps'].append({'instruction': '', 'ingredients': [], })
try:
for x in scrape.ingredients():
try:
amount, unit, ingredient, note = ingredient_parser.parse(x)
ingredients.append(
recipe_json['steps'][0]['ingredients'].append(
{
'amount': amount,
'unit': {
'text': unit,
'id': random.randrange(10000, 99999)
'name': unit,
},
'ingredient': {
'text': ingredient,
'id': random.randrange(10000, 99999)
'food': {
'name': ingredient,
},
'note': note,
'original': x
'original_text': x
}
)
except Exception:
ingredients.append(
recipe_json['steps'][0]['ingredients'].append(
{
'amount': 0,
'unit': {
'text': '',
'id': random.randrange(10000, 99999)
'name': '',
},
'ingredient': {
'text': x,
'id': random.randrange(10000, 99999)
'food': {
'name': x,
},
'note': '',
'original': x
'original_text': x
}
)
recipe_json['recipeIngredient'] = ingredients
except Exception:
recipe_json['recipeIngredient'] = ingredients
try:
recipe_json['recipeInstructions'] = parse_instructions(scrape.instructions())
except Exception:
recipe_json['recipeInstructions'] = ""
pass
if scrape.url:
recipe_json['url'] = scrape.url
recipe_json['recipeInstructions'] += "\n\nImported from " + scrape.url
recipe_json['source_url'] = scrape.url
return recipe_json
@ -161,102 +158,46 @@ def parse_name(name):
return normalize_string(name)
def parse_ingredients(ingredients):
# some pages have comma separated ingredients in a single array entry
try:
if type(ingredients[0]) == dict:
return ingredients
except (KeyError, IndexError):
pass
if (len(ingredients) == 1 and type(ingredients) == list):
ingredients = ingredients[0].split(',')
elif type(ingredients) == str:
ingredients = ingredients.split(',')
for x in ingredients:
if '\n' in x:
ingredients.remove(x)
for i in x.split('\n'):
ingredients.insert(0, i)
ingredient_list = []
for x in ingredients:
if x.replace(' ', '') != '':
x = x.replace('½', "0.5").replace('¼', "0.25").replace('¾', "0.75")
try:
amount, unit, ingredient, note = parse_single_ingredient(x)
if ingredient:
ingredient_list.append(
{
'amount': amount,
'unit': {
'text': unit,
'id': random.randrange(10000, 99999)
},
'ingredient': {
'text': ingredient,
'id': random.randrange(10000, 99999)
},
'note': note,
'original': x
}
)
except Exception:
ingredient_list.append(
{
'amount': 0,
'unit': {
'text': '',
'id': random.randrange(10000, 99999)
},
'ingredient': {
'text': x,
'id': random.randrange(10000, 99999)
},
'note': '',
'original': x
}
)
ingredients = ingredient_list
else:
ingredients = []
return ingredients
def parse_description(description):
return normalize_string(description)
def parse_instructions(instructions):
instruction_text = ''
# flatten instructions if they are in a list
if type(instructions) == list:
for i in instructions:
if type(i) == str:
instruction_text += i
else:
if 'text' in i:
instruction_text += i['text'] + '\n\n'
elif 'itemListElement' in i:
for ile in i['itemListElement']:
if type(ile) == str:
instruction_text += ile + '\n\n'
elif 'text' in ile:
instruction_text += ile['text'] + '\n\n'
else:
instruction_text += str(i)
instructions = instruction_text
normalized_string = normalize_string(instructions)
def clean_instruction_string(instruction):
normalized_string = normalize_string(instruction)
normalized_string = normalized_string.replace('\n', ' \n')
normalized_string = normalized_string.replace(' \n \n', '\n\n')
return normalized_string
def parse_instructions(instructions):
"""
Convert arbitrary instructions object from website import and turn it into a flat list of strings
:param instructions: any instructions object from import
:return: list of strings (from one to many elements depending on website)
"""
instruction_list = []
if type(instructions) == list:
for i in instructions:
if type(i) == str:
instruction_list.append(clean_instruction_string(i))
else:
if 'text' in i:
instruction_list.append(clean_instruction_string(i['text']))
elif 'itemListElement' in i:
for ile in i['itemListElement']:
if type(ile) == str:
instruction_list.append(clean_instruction_string(ile))
elif 'text' in ile:
instruction_list.append(clean_instruction_string(ile['text']))
else:
instruction_list.append(clean_instruction_string(str(i)))
else:
instruction_list.append(clean_instruction_string(instructions))
return instruction_list
def parse_image(image):
# check if list of images is returned, take first if so
if not image:
@ -334,9 +275,9 @@ def parse_keywords(keyword_json, space):
kw = normalize_string(kw)
if len(kw) != 0:
if k := Keyword.objects.filter(name=kw, space=space).first():
keywords.append({'id': str(k.id), 'text': str(k)})
keywords.append({'name': str(k)})
else:
keywords.append({'id': random.randrange(1111111, 9999999, 1), 'text': kw})
keywords.append({'name': kw})
return keywords
@ -367,6 +308,7 @@ def normalize_string(string):
unescaped_string = unescaped_string.replace("\xa0", " ").replace("\t", " ").strip()
return unescaped_string
# TODO deprecate when merged into recipe_scapers
@ -408,9 +350,9 @@ def get_minutes(time_text):
if "/" in (hours := matched.groupdict().get("hours") or ''):
number = hours.split(" ")
if len(number) == 2:
minutes += 60*int(number[0])
minutes += 60 * int(number[0])
fraction = number[-1:][0].split("/")
minutes += 60 * float(int(fraction[0])/int(fraction[1]))
minutes += 60 * float(int(fraction[0]) / int(fraction[1]))
else:
minutes += 60 * float(hours)