refactor json ld code into helper

This commit is contained in:
vabene1111
2020-06-22 21:35:56 +02:00
parent 9e748552b2
commit 71b41a9ca2
3 changed files with 72 additions and 55 deletions

View File

@ -2,6 +2,7 @@ import io
import json
import re
import microdata
import requests
from annoying.decorators import ajax_request
from annoying.functions import get_object_or_None
@ -18,6 +19,7 @@ from rest_framework.exceptions import APIException
from rest_framework.mixins import RetrieveModelMixin, UpdateModelMixin, ListModelMixin
from cookbook.helper.permission_helper import group_required, CustomIsOwner, CustomIsAdmin
from cookbook.helper.recipe_url_import import find_ld_json
from cookbook.models import Recipe, Sync, Storage, CookLog, MealPlan, MealType, ViewLog, UserPreference, RecipeBook, Keyword
from cookbook.provider.dropbox import Dropbox
from cookbook.provider.nextcloud import Nextcloud
@ -255,61 +257,16 @@ def recipe_from_url(request, url):
return JsonResponse({'error': _('The requested page refused to provide any information (Status Code 403).')})
soup = BeautifulSoup(response.text, "html.parser")
# first try finding ld+json as its most common
for ld in soup.find_all('script', type='application/ld+json'):
ld_json = json.loads(ld.string)
if (r := find_ld_json(json.loads(ld.string))) is not None:
return r
# recipes type might be wrapped in @graph type
if '@graph' in ld_json:
for x in ld_json['@graph']:
if '@type' in x and x['@type'] == 'Recipe':
ld_json = x
if '@type' in ld_json and ld_json['@type'] == 'Recipe':
if 'recipeIngredient' in ld_json:
ingredients = []
for x in ld_json['recipeIngredient']:
ingredient_split = x.split()
if len(ingredient_split) > 2:
ingredients.append({'amount': ingredient_split[0], 'unit': ingredient_split[1], 'ingredient': " ".join(ingredient_split[2:])})
if len(ingredient_split) == 2:
ingredients.append({'amount': ingredient_split[0], 'unit': '', 'ingredient': " ".join(ingredient_split[1:])})
if len(ingredient_split) == 1:
ingredients.append({'amount': 0, 'unit': '', 'ingredient': " ".join(ingredient_split)})
ld_json['recipeIngredient'] = ingredients
if 'keywords' in ld_json:
keywords = []
if type(ld_json['keywords']) == str:
ld_json['keywords'] = ld_json['keywords'].split(',')
for kw in ld_json['keywords']:
if k := Keyword.objects.filter(name=kw).first():
keywords.append({'id': str(k.id), 'text': str(k).strip()})
else:
keywords.append({'id': "null", 'text': kw.strip()})
ld_json['keywords'] = keywords
if 'recipeInstructions' in ld_json:
instructions = ''
if type(ld_json['recipeInstructions']) == list:
for i in ld_json['recipeInstructions']:
if type(i) == str:
instructions += i
else:
instructions += i['text'] + '\n\n'
ld_json['recipeInstructions'] = instructions
if 'image' in ld_json:
if (type(ld_json['image'])) == list:
if type(ld_json['image'][0]) == str:
ld_json['image'] = ld_json['image'][0]
elif 'url' in ld_json['image'][0]:
ld_json['image'] = ld_json['image'][0]['url']
return JsonResponse(ld_json)
# now try to find microdata
items = microdata.get_items(response)
for i in items:
js = i.json()
print('hi')
return JsonResponse({'error': _('The requested site does not provide any recognized data format to import the recipe from.')})