diff --git a/cookbook/helper/recipe_url_import.py b/cookbook/helper/recipe_url_import.py new file mode 100644 index 00000000..90d1d96f --- /dev/null +++ b/cookbook/helper/recipe_url_import.py @@ -0,0 +1,59 @@ +from django.http import JsonResponse + +from cookbook.models import Keyword + + +def find_ld_json(ld_json): + # recipes type might be wrapped in @graph type + if '@graph' in ld_json: + for x in ld_json['@graph']: + if '@type' in x and x['@type'] == 'Recipe': + ld_json = x + + if '@type' in ld_json and ld_json['@type'] == 'Recipe': + + if 'recipeIngredient' in ld_json: + ingredients = [] + + for x in ld_json['recipeIngredient']: + ingredient_split = x.split() + if len(ingredient_split) > 2: + ingredients.append({'amount': ingredient_split[0], 'unit': ingredient_split[1], 'ingredient': " ".join(ingredient_split[2:])}) + if len(ingredient_split) == 2: + ingredients.append({'amount': ingredient_split[0], 'unit': '', 'ingredient': " ".join(ingredient_split[1:])}) + if len(ingredient_split) == 1: + ingredients.append({'amount': 0, 'unit': '', 'ingredient': " ".join(ingredient_split)}) + + ld_json['recipeIngredient'] = ingredients + + if 'keywords' in ld_json: + keywords = [] + if type(ld_json['keywords']) == str: + ld_json['keywords'] = ld_json['keywords'].split(',') + + for kw in ld_json['keywords']: + if k := Keyword.objects.filter(name=kw).first(): + keywords.append({'id': str(k.id), 'text': str(k).strip()}) + else: + keywords.append({'id': "null", 'text': kw.strip()}) + + ld_json['keywords'] = keywords + + if 'recipeInstructions' in ld_json: + instructions = '' + if type(ld_json['recipeInstructions']) == list: + for i in ld_json['recipeInstructions']: + if type(i) == str: + instructions += i + else: + instructions += i['text'] + '\n\n' + ld_json['recipeInstructions'] = instructions + + if 'image' in ld_json: + if (type(ld_json['image'])) == list: + if type(ld_json['image'][0]) == str: + ld_json['image'] = ld_json['image'][0] + elif 'url' in ld_json['image'][0]: + ld_json['image'] = ld_json['image'][0]['url'] + + return JsonResponse(ld_json) diff --git a/cookbook/views/api.py b/cookbook/views/api.py index c481a3ce..60255700 100644 --- a/cookbook/views/api.py +++ b/cookbook/views/api.py @@ -2,6 +2,7 @@ import io import json import re +import microdata import requests from annoying.decorators import ajax_request from annoying.functions import get_object_or_None @@ -18,6 +19,7 @@ from rest_framework.exceptions import APIException from rest_framework.mixins import RetrieveModelMixin, UpdateModelMixin, ListModelMixin from cookbook.helper.permission_helper import group_required, CustomIsOwner, CustomIsAdmin +from cookbook.helper.recipe_url_import import find_ld_json from cookbook.models import Recipe, Sync, Storage, CookLog, MealPlan, MealType, ViewLog, UserPreference, RecipeBook, Keyword from cookbook.provider.dropbox import Dropbox from cookbook.provider.nextcloud import Nextcloud @@ -255,61 +257,16 @@ def recipe_from_url(request, url): return JsonResponse({'error': _('The requested page refused to provide any information (Status Code 403).')}) soup = BeautifulSoup(response.text, "html.parser") + + # first try finding ld+json as its most common for ld in soup.find_all('script', type='application/ld+json'): - ld_json = json.loads(ld.string) + if (r := find_ld_json(json.loads(ld.string))) is not None: + return r - # recipes type might be wrapped in @graph type - if '@graph' in ld_json: - for x in ld_json['@graph']: - if '@type' in x and x['@type'] == 'Recipe': - ld_json = x - - if '@type' in ld_json and ld_json['@type'] == 'Recipe': - - if 'recipeIngredient' in ld_json: - ingredients = [] - - for x in ld_json['recipeIngredient']: - ingredient_split = x.split() - if len(ingredient_split) > 2: - ingredients.append({'amount': ingredient_split[0], 'unit': ingredient_split[1], 'ingredient': " ".join(ingredient_split[2:])}) - if len(ingredient_split) == 2: - ingredients.append({'amount': ingredient_split[0], 'unit': '', 'ingredient': " ".join(ingredient_split[1:])}) - if len(ingredient_split) == 1: - ingredients.append({'amount': 0, 'unit': '', 'ingredient': " ".join(ingredient_split)}) - - ld_json['recipeIngredient'] = ingredients - - if 'keywords' in ld_json: - keywords = [] - if type(ld_json['keywords']) == str: - ld_json['keywords'] = ld_json['keywords'].split(',') - - for kw in ld_json['keywords']: - if k := Keyword.objects.filter(name=kw).first(): - keywords.append({'id': str(k.id), 'text': str(k).strip()}) - else: - keywords.append({'id': "null", 'text': kw.strip()}) - - ld_json['keywords'] = keywords - - if 'recipeInstructions' in ld_json: - instructions = '' - if type(ld_json['recipeInstructions']) == list: - for i in ld_json['recipeInstructions']: - if type(i) == str: - instructions += i - else: - instructions += i['text'] + '\n\n' - ld_json['recipeInstructions'] = instructions - - if 'image' in ld_json: - if (type(ld_json['image'])) == list: - if type(ld_json['image'][0]) == str: - ld_json['image'] = ld_json['image'][0] - elif 'url' in ld_json['image'][0]: - ld_json['image'] = ld_json['image'][0]['url'] - - return JsonResponse(ld_json) + # now try to find microdata + items = microdata.get_items(response) + for i in items: + js = i.json() + print('hi') return JsonResponse({'error': _('The requested site does not provide any recognized data format to import the recipe from.')}) diff --git a/requirements.txt b/requirements.txt index 68635e67..7006b3cb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -23,4 +23,5 @@ whitenoise==5.1.0 icalendar==4.0.6 pyyaml==5.3.1 uritemplate==3.0.1 -beautifulsoup4==4.9.1 \ No newline at end of file +beautifulsoup4==4.9.1 +microdata==0.7.1 \ No newline at end of file