changed source import to match field structure of recipe model

first imports working
2022-02-19 17:54:00 +01:00
parent 89348f69f1
commit c8fc67fa2b
8 changed files with 138 additions and 345 deletions
--- a/cookbook/helper/recipe_url_import.py
+++ b/cookbook/helper/recipe_url_import.py
@ -11,6 +11,7 @@ from cookbook.helper import recipe_url_import as helper
 from cookbook.helper.ingredient_parser import IngredientParser
 from cookbook.models import Keyword

+
 # from recipe_scrapers._utils import get_minutes  ## temporary until/unless upstream incorporates get_minutes() PR


@ -33,6 +34,7 @@ def get_from_scraper(scrape, request):
        description = ''

    recipe_json['description'] = parse_description(description)
+    recipe_json['internal'] = True

    try:
        servings = scrape.yields() or None
@ -51,20 +53,20 @@ def get_from_scraper(scrape, request):
    recipe_json['servings'] = max(servings, 1)

    try:
-        recipe_json['prepTime'] = get_minutes(scrape.schema.data.get("prepTime")) or 0
+        recipe_json['working_time'] = get_minutes(scrape.schema.data.get("prepTime")) or 0
    except Exception:
-        recipe_json['prepTime'] = 0
+        recipe_json['working_time'] = 0
    try:
-        recipe_json['cookTime'] = get_minutes(scrape.schema.data.get("cookTime")) or 0
+        recipe_json['waiting_time'] = get_minutes(scrape.schema.data.get("cookTime")) or 0
    except Exception:
-        recipe_json['cookTime'] = 0
+        recipe_json['waiting_time'] = 0

-    if recipe_json['cookTime'] + recipe_json['prepTime'] == 0:
+    if recipe_json['working_time'] + recipe_json['waiting_time'] == 0:
        try:
-            recipe_json['prepTime'] = get_minutes(scrape.total_time()) or 0
+            recipe_json['working_time'] = get_minutes(scrape.total_time()) or 0
        except Exception:
            try:
-                get_minutes(scrape.schema.data.get("totalTime")) or 0
+                recipe_json['working_time'] = get_minutes(scrape.schema.data.get("totalTime")) or 0
            except Exception:
                pass

@ -101,54 +103,49 @@ def get_from_scraper(scrape, request):

    ingredient_parser = IngredientParser(request, True)

-    ingredients = []
+    recipe_json['steps'] = []
+
+    for i in parse_instructions(scrape.instructions()):
+        recipe_json['steps'].append({'instruction': i, 'ingredients': [], })
+    if len(recipe_json['steps']) == 0:
+        recipe_json['steps'].append({'instruction': '', 'ingredients': [], })
+
    try:
        for x in scrape.ingredients():
            try:
                amount, unit, ingredient, note = ingredient_parser.parse(x)
-                ingredients.append(
+                recipe_json['steps'][0]['ingredients'].append(
                    {
                        'amount': amount,
                        'unit': {
-                            'text': unit,
-                            'id': random.randrange(10000, 99999)
+                            'name': unit,
                        },
-                        'ingredient': {
-                            'text': ingredient,
-                            'id': random.randrange(10000, 99999)
+                        'food': {
+                            'name': ingredient,
                        },
                        'note': note,
-                        'original': x
+                        'original_text': x
                    }
                )
            except Exception:
-                ingredients.append(
+                recipe_json['steps'][0]['ingredients'].append(
                    {
                        'amount': 0,
                        'unit': {
-                            'text': '',
-                            'id': random.randrange(10000, 99999)
+                            'name': '',
                        },
-                        'ingredient': {
-                            'text': x,
-                            'id': random.randrange(10000, 99999)
+                        'food': {
+                            'name': x,
                        },
                        'note': '',
-                        'original': x
+                        'original_text': x
                    }
                )
-        recipe_json['recipeIngredient'] = ingredients
    except Exception:
-        recipe_json['recipeIngredient'] = ingredients
-
-    try:
-        recipe_json['recipeInstructions'] = parse_instructions(scrape.instructions())
-    except Exception:
-        recipe_json['recipeInstructions'] = ""
+        pass

    if scrape.url:
-        recipe_json['url'] = scrape.url
-        recipe_json['recipeInstructions'] += "\n\nImported from " + scrape.url
+        recipe_json['source_url'] = scrape.url
    return recipe_json


@ -161,102 +158,46 @@ def parse_name(name):
    return normalize_string(name)


-def parse_ingredients(ingredients):
-    # some pages have comma separated ingredients in a single array entry
-    try:
-        if type(ingredients[0]) == dict:
-            return ingredients
-    except (KeyError, IndexError):
-        pass
-
-    if (len(ingredients) == 1 and type(ingredients) == list):
-        ingredients = ingredients[0].split(',')
-    elif type(ingredients) == str:
-        ingredients = ingredients.split(',')
-
-    for x in ingredients:
-        if '\n' in x:
-            ingredients.remove(x)
-            for i in x.split('\n'):
-                ingredients.insert(0, i)
-
-    ingredient_list = []
-
-    for x in ingredients:
-        if x.replace(' ', '') != '':
-            x = x.replace('&frac12;', "0.5").replace('&frac14;', "0.25").replace('&frac34;', "0.75")
-            try:
-                amount, unit, ingredient, note = parse_single_ingredient(x)
-                if ingredient:
-                    ingredient_list.append(
-                        {
-                            'amount': amount,
-                            'unit': {
-                                'text': unit,
-                                'id': random.randrange(10000, 99999)
-                            },
-                            'ingredient': {
-                                'text': ingredient,
-                                'id': random.randrange(10000, 99999)
-                            },
-                            'note': note,
-                            'original': x
-                        }
-                    )
-            except Exception:
-                ingredient_list.append(
-                    {
-                        'amount': 0,
-                        'unit': {
-                            'text': '',
-                            'id': random.randrange(10000, 99999)
-                        },
-                        'ingredient': {
-                            'text': x,
-                            'id': random.randrange(10000, 99999)
-                        },
-                        'note': '',
-                        'original': x
-                    }
-                )
-
-            ingredients = ingredient_list
-        else:
-            ingredients = []
-    return ingredients
-
-
 def parse_description(description):
    return normalize_string(description)


-def parse_instructions(instructions):
-    instruction_text = ''
-
-    # flatten instructions if they are in a list
-    if type(instructions) == list:
-        for i in instructions:
-            if type(i) == str:
-                instruction_text += i
-            else:
-                if 'text' in i:
-                    instruction_text += i['text'] + '\n\n'
-                elif 'itemListElement' in i:
-                    for ile in i['itemListElement']:
-                        if type(ile) == str:
-                            instruction_text += ile + '\n\n'
-                        elif 'text' in ile:
-                            instruction_text += ile['text'] + '\n\n'
-                else:
-                    instruction_text += str(i)
-        instructions = instruction_text
-
-    normalized_string = normalize_string(instructions)
+def clean_instruction_string(instruction):
+    normalized_string = normalize_string(instruction)
    normalized_string = normalized_string.replace('\n', '  \n')
    normalized_string = normalized_string.replace('  \n  \n', '\n\n')
    return normalized_string


+def parse_instructions(instructions):
+    """
+    Convert arbitrary instructions object from website import and turn it into a flat list of strings
+    :param instructions: any instructions object from import
+    :return: list of strings (from one to many elements depending on website)
+    """
+    instruction_list = []
+
+    if type(instructions) == list:
+        for i in instructions:
+            if type(i) == str:
+                instruction_list.append(clean_instruction_string(i))
+            else:
+                if 'text' in i:
+                    instruction_list.append(clean_instruction_string(i['text']))
+                elif 'itemListElement' in i:
+                    for ile in i['itemListElement']:
+                        if type(ile) == str:
+                            instruction_list.append(clean_instruction_string(ile))
+                        elif 'text' in ile:
+                            instruction_list.append(clean_instruction_string(ile['text']))
+                else:
+                    instruction_list.append(clean_instruction_string(str(i)))
+    else:
+        instruction_list.append(clean_instruction_string(instructions))
+
+    return instruction_list
+
+
 def parse_image(image):
    # check if list of images is returned, take first if so
    if not image:
@ -334,9 +275,9 @@ def parse_keywords(keyword_json, space):
        kw = normalize_string(kw)
        if len(kw) != 0:
            if k := Keyword.objects.filter(name=kw, space=space).first():
-                keywords.append({'id': str(k.id), 'text': str(k)})
+                keywords.append({'name': str(k)})
            else:
-                keywords.append({'id': random.randrange(1111111, 9999999, 1), 'text': kw})
+                keywords.append({'name': kw})

    return keywords

@ -367,6 +308,7 @@ def normalize_string(string):
    unescaped_string = unescaped_string.replace("\xa0", " ").replace("\t", " ").strip()
    return unescaped_string

+
 # TODO deprecate when merged into recipe_scapers


@ -408,9 +350,9 @@ def get_minutes(time_text):
    if "/" in (hours := matched.groupdict().get("hours") or ''):
        number = hours.split(" ")
        if len(number) == 2:
-            minutes += 60*int(number[0])
+            minutes += 60 * int(number[0])
        fraction = number[-1:][0].split("/")
-        minutes += 60 * float(int(fraction[0])/int(fraction[1]))
+        minutes += 60 * float(int(fraction[0]) / int(fraction[1]))
    else:
        minutes += 60 * float(hours)