diff --git a/cookbook/helper/recipe_url_import.py b/cookbook/helper/recipe_url_import.py index 90d1d96f..fc4dfbc6 100644 --- a/cookbook/helper/recipe_url_import.py +++ b/cookbook/helper/recipe_url_import.py @@ -1,59 +1,82 @@ +import re + from django.http import JsonResponse from cookbook.models import Keyword -def find_ld_json(ld_json): - # recipes type might be wrapped in @graph type - if '@graph' in ld_json: - for x in ld_json['@graph']: - if '@type' in x and x['@type'] == 'Recipe': - ld_json = x +def find_recipe_json(ld_json): + ld_json['org'] = str(ld_json) - if '@type' in ld_json and ld_json['@type'] == 'Recipe': + # some sites use ingredients instead of recipeIngredients + if 'recipeIngredient' not in ld_json and 'ingredients' in ld_json: + ld_json['recipeIngredient'] = ld_json['ingredients'] - if 'recipeIngredient' in ld_json: - ingredients = [] + if 'recipeIngredient' in ld_json: + # some pages have comma separated ingredients in a single array entry + if len(ld_json['recipeIngredient']) == 1 and len(ld_json['recipeIngredient'][0]) > 30: + ld_json['recipeIngredient'] = ld_json['recipeIngredient'][0].split(',') - for x in ld_json['recipeIngredient']: - ingredient_split = x.split() - if len(ingredient_split) > 2: - ingredients.append({'amount': ingredient_split[0], 'unit': ingredient_split[1], 'ingredient': " ".join(ingredient_split[2:])}) - if len(ingredient_split) == 2: - ingredients.append({'amount': ingredient_split[0], 'unit': '', 'ingredient': " ".join(ingredient_split[1:])}) - if len(ingredient_split) == 1: - ingredients.append({'amount': 0, 'unit': '', 'ingredient': " ".join(ingredient_split)}) + ingredients = [] - ld_json['recipeIngredient'] = ingredients + for x in ld_json['recipeIngredient']: + ingredient_split = x.split() + if len(ingredient_split) > 2: + ingredients.append({'amount': ingredient_split[0], 'unit': ingredient_split[1], 'ingredient': " ".join(ingredient_split[2:])}) + if len(ingredient_split) == 2: + ingredients.append({'amount': ingredient_split[0], 'unit': '', 'ingredient': " ".join(ingredient_split[1:])}) + if len(ingredient_split) == 1: + ingredients.append({'amount': 0, 'unit': '', 'ingredient': " ".join(ingredient_split)}) - if 'keywords' in ld_json: - keywords = [] - if type(ld_json['keywords']) == str: - ld_json['keywords'] = ld_json['keywords'].split(',') + ld_json['recipeIngredient'] = ingredients - for kw in ld_json['keywords']: - if k := Keyword.objects.filter(name=kw).first(): - keywords.append({'id': str(k.id), 'text': str(k).strip()}) + if 'keywords' in ld_json: + keywords = [] + + # keywords as string + if type(ld_json['keywords']) == str: + ld_json['keywords'] = ld_json['keywords'].split(',') + + # keywords as string in list + if type(ld_json['keywords']) == list and len(ld_json['keywords']) == 1 and ',' in ld_json['keywords'][0]: + ld_json['keywords'] = ld_json['keywords'][0].split(',') + + # keywords as list + for kw in ld_json['keywords']: + if k := Keyword.objects.filter(name=kw).first(): + keywords.append({'id': str(k.id), 'text': str(k).strip()}) + else: + keywords.append({'id': "null", 'text': kw.strip()}) + + ld_json['keywords'] = keywords + + if 'recipeInstructions' in ld_json: + instructions = '' + + # flatten instructions if they are in a list + if type(ld_json['recipeInstructions']) == list: + for i in ld_json['recipeInstructions']: + if type(i) == str: + instructions += i else: - keywords.append({'id': "null", 'text': kw.strip()}) + instructions += i['text'] + '\n\n' + ld_json['recipeInstructions'] = instructions - ld_json['keywords'] = keywords + ld_json['recipeInstructions'] = re.sub(r'\n\s*\n', '\n\n', ld_json['recipeInstructions']) + ld_json['recipeInstructions'] = re.sub(' +', ' ', ld_json['recipeInstructions']) + ld_json['recipeInstructions'] = ld_json['recipeInstructions'].replace('

', '') + ld_json['recipeInstructions'] = ld_json['recipeInstructions'].replace('

', '') - if 'recipeInstructions' in ld_json: - instructions = '' - if type(ld_json['recipeInstructions']) == list: - for i in ld_json['recipeInstructions']: - if type(i) == str: - instructions += i - else: - instructions += i['text'] + '\n\n' - ld_json['recipeInstructions'] = instructions + if 'image' in ld_json: + # check if list of images is returned, take first if so + if (type(ld_json['image'])) == list: + if type(ld_json['image'][0]) == str: + ld_json['image'] = ld_json['image'][0] + elif 'url' in ld_json['image'][0]: + ld_json['image'] = ld_json['image'][0]['url'] - if 'image' in ld_json: - if (type(ld_json['image'])) == list: - if type(ld_json['image'][0]) == str: - ld_json['image'] = ld_json['image'][0] - elif 'url' in ld_json['image'][0]: - ld_json['image'] = ld_json['image'][0]['url'] + # ignore relative image paths + if 'http' not in ld_json['image']: + ld_json['image'] = '' - return JsonResponse(ld_json) + return JsonResponse(ld_json) diff --git a/cookbook/templates/url_import.html b/cookbook/templates/url_import.html index 7e20178b..009c655c 100644 --- a/cookbook/templates/url_import.html +++ b/cookbook/templates/url_import.html @@ -17,6 +17,13 @@
+ https://www.inspirationforall.de/pudding-selber-machen-vanillepudding-schokopudding-rezept/
+ https://www.ichkoche.at/schokopudding-rezept-218012
+ https://www.gutekueche.de/mamis-feiner-schokopudding-rezept-4274
+ https://www.maizena.at/rezepte/schokopudding/13534
+ https://kochkino.de/schokoladen-pudding/2159
+ https://www.oetker.de/rezepte/r/schokopudding-mit-vanille-herzen
+
@@ -42,11 +49,11 @@
-
+
{% trans 'Recipe Image' %}
-
+
@@ -109,8 +116,10 @@ + + [[recipe_data]] - [[recipe_data]] +
@@ -131,14 +140,6 @@ Vue.component('vue-multiselect', window.VueMultiselect.default) - // micro data examples - // https://www.inspirationforall.de/pudding-selber-machen-vanillepudding-schokopudding-rezept/ - // https://www.ichkoche.at/schokopudding-rezept-218012 - // https://www.gutekueche.de/mamis-feiner-schokopudding-rezept-4274 - // https://www.maizena.at/rezepte/schokopudding/13534 - // https://kochkino.de/schokoladen-pudding/2159 - // https://www.oetker.de/rezepte/r/schokopudding-mit-vanille-herzen - let app = new Vue({ components: { Multiselect: window.VueMultiselect.default diff --git a/cookbook/views/api.py b/cookbook/views/api.py index 60255700..a3b8d61f 100644 --- a/cookbook/views/api.py +++ b/cookbook/views/api.py @@ -19,7 +19,7 @@ from rest_framework.exceptions import APIException from rest_framework.mixins import RetrieveModelMixin, UpdateModelMixin, ListModelMixin from cookbook.helper.permission_helper import group_required, CustomIsOwner, CustomIsAdmin -from cookbook.helper.recipe_url_import import find_ld_json +from cookbook.helper.recipe_url_import import find_recipe_json from cookbook.models import Recipe, Sync, Storage, CookLog, MealPlan, MealType, ViewLog, UserPreference, RecipeBook, Keyword from cookbook.provider.dropbox import Dropbox from cookbook.provider.nextcloud import Nextcloud @@ -260,13 +260,25 @@ def recipe_from_url(request, url): # first try finding ld+json as its most common for ld in soup.find_all('script', type='application/ld+json'): - if (r := find_ld_json(json.loads(ld.string))) is not None: - return r + ld_json = json.loads(ld.string) + if type(ld_json) != list: + ld_json = [ld_json] + + for ld_json_item in ld_json: + # recipes type might be wrapped in @graph type + if '@graph' in ld_json_item: + for x in ld_json_item['@graph']: + if '@type' in x and x['@type'] == 'Recipe': + ld_json_item = x + + if '@type' in ld_json_item and ld_json_item['@type'] == 'Recipe': + return find_recipe_json(ld_json_item) # now try to find microdata - items = microdata.get_items(response) + items = microdata.get_items(response.text) for i in items: - js = i.json() - print('hi') + md_json = json.loads(i.json()) + if 'schema.org/Recipe' in str(md_json['type']): + return find_recipe_json(md_json['properties']) return JsonResponse({'error': _('The requested site does not provide any recognized data format to import the recipe from.')})