refactor json ld code into helper

2020-06-22 21:35:56 +02:00
parent 9e748552b2
commit 71b41a9ca2
3 changed files with 72 additions and 55 deletions
--- a/cookbook/views/api.py
+++ b/cookbook/views/api.py
@ -2,6 +2,7 @@ import io
 import json
 import re

+import microdata
 import requests
 from annoying.decorators import ajax_request
 from annoying.functions import get_object_or_None
@ -18,6 +19,7 @@ from rest_framework.exceptions import APIException
 from rest_framework.mixins import RetrieveModelMixin, UpdateModelMixin, ListModelMixin

 from cookbook.helper.permission_helper import group_required, CustomIsOwner, CustomIsAdmin
+from cookbook.helper.recipe_url_import import find_ld_json
 from cookbook.models import Recipe, Sync, Storage, CookLog, MealPlan, MealType, ViewLog, UserPreference, RecipeBook, Keyword
 from cookbook.provider.dropbox import Dropbox
 from cookbook.provider.nextcloud import Nextcloud
@ -255,61 +257,16 @@ def recipe_from_url(request, url):
        return JsonResponse({'error': _('The requested page refused to provide any information (Status Code 403).')})

    soup = BeautifulSoup(response.text, "html.parser")
+
+    # first try finding ld+json as its most common
    for ld in soup.find_all('script', type='application/ld+json'):
-        ld_json = json.loads(ld.string)
+        if (r := find_ld_json(json.loads(ld.string))) is not None:
+            return r

-        # recipes type might be wrapped in @graph type
-        if '@graph' in ld_json:
-            for x in ld_json['@graph']:
-                if '@type' in x and x['@type'] == 'Recipe':
-                    ld_json = x
-
-        if '@type' in ld_json and ld_json['@type'] == 'Recipe':
-
-            if 'recipeIngredient' in ld_json:
-                ingredients = []
-
-                for x in ld_json['recipeIngredient']:
-                    ingredient_split = x.split()
-                    if len(ingredient_split) > 2:
-                        ingredients.append({'amount': ingredient_split[0], 'unit': ingredient_split[1], 'ingredient': " ".join(ingredient_split[2:])})
-                    if len(ingredient_split) == 2:
-                        ingredients.append({'amount': ingredient_split[0], 'unit': '', 'ingredient': " ".join(ingredient_split[1:])})
-                    if len(ingredient_split) == 1:
-                        ingredients.append({'amount': 0, 'unit': '', 'ingredient': " ".join(ingredient_split)})
-
-                ld_json['recipeIngredient'] = ingredients
-
-            if 'keywords' in ld_json:
-                keywords = []
-                if type(ld_json['keywords']) == str:
-                    ld_json['keywords'] = ld_json['keywords'].split(',')
-
-                for kw in ld_json['keywords']:
-                    if k := Keyword.objects.filter(name=kw).first():
-                        keywords.append({'id': str(k.id), 'text': str(k).strip()})
-                    else:
-                        keywords.append({'id': "null", 'text': kw.strip()})
-
-                ld_json['keywords'] = keywords
-
-            if 'recipeInstructions' in ld_json:
-                instructions = ''
-                if type(ld_json['recipeInstructions']) == list:
-                    for i in ld_json['recipeInstructions']:
-                        if type(i) == str:
-                            instructions += i
-                        else:
-                            instructions += i['text'] + '\n\n'
-                    ld_json['recipeInstructions'] = instructions
-
-            if 'image' in ld_json:
-                if (type(ld_json['image'])) == list:
-                    if type(ld_json['image'][0]) == str:
-                        ld_json['image'] = ld_json['image'][0]
-                    elif 'url' in ld_json['image'][0]:
-                        ld_json['image'] = ld_json['image'][0]['url']
-
-            return JsonResponse(ld_json)
+    # now try to find microdata
+    items = microdata.get_items(response)
+    for i in items:
+        js = i.json()
+        print('hi')

    return JsonResponse({'error': _('The requested site does not provide any recognized data format to import the recipe from.')})