refactor json ld code into helper

This commit is contained in:
vabene1111
2020-06-22 21:35:56 +02:00
parent 9e748552b2
commit 71b41a9ca2
3 changed files with 72 additions and 55 deletions

View File

@ -0,0 +1,59 @@
from django.http import JsonResponse
from cookbook.models import Keyword
def find_ld_json(ld_json):
# recipes type might be wrapped in @graph type
if '@graph' in ld_json:
for x in ld_json['@graph']:
if '@type' in x and x['@type'] == 'Recipe':
ld_json = x
if '@type' in ld_json and ld_json['@type'] == 'Recipe':
if 'recipeIngredient' in ld_json:
ingredients = []
for x in ld_json['recipeIngredient']:
ingredient_split = x.split()
if len(ingredient_split) > 2:
ingredients.append({'amount': ingredient_split[0], 'unit': ingredient_split[1], 'ingredient': " ".join(ingredient_split[2:])})
if len(ingredient_split) == 2:
ingredients.append({'amount': ingredient_split[0], 'unit': '', 'ingredient': " ".join(ingredient_split[1:])})
if len(ingredient_split) == 1:
ingredients.append({'amount': 0, 'unit': '', 'ingredient': " ".join(ingredient_split)})
ld_json['recipeIngredient'] = ingredients
if 'keywords' in ld_json:
keywords = []
if type(ld_json['keywords']) == str:
ld_json['keywords'] = ld_json['keywords'].split(',')
for kw in ld_json['keywords']:
if k := Keyword.objects.filter(name=kw).first():
keywords.append({'id': str(k.id), 'text': str(k).strip()})
else:
keywords.append({'id': "null", 'text': kw.strip()})
ld_json['keywords'] = keywords
if 'recipeInstructions' in ld_json:
instructions = ''
if type(ld_json['recipeInstructions']) == list:
for i in ld_json['recipeInstructions']:
if type(i) == str:
instructions += i
else:
instructions += i['text'] + '\n\n'
ld_json['recipeInstructions'] = instructions
if 'image' in ld_json:
if (type(ld_json['image'])) == list:
if type(ld_json['image'][0]) == str:
ld_json['image'] = ld_json['image'][0]
elif 'url' in ld_json['image'][0]:
ld_json['image'] = ld_json['image'][0]['url']
return JsonResponse(ld_json)

View File

@ -2,6 +2,7 @@ import io
import json import json
import re import re
import microdata
import requests import requests
from annoying.decorators import ajax_request from annoying.decorators import ajax_request
from annoying.functions import get_object_or_None from annoying.functions import get_object_or_None
@ -18,6 +19,7 @@ from rest_framework.exceptions import APIException
from rest_framework.mixins import RetrieveModelMixin, UpdateModelMixin, ListModelMixin from rest_framework.mixins import RetrieveModelMixin, UpdateModelMixin, ListModelMixin
from cookbook.helper.permission_helper import group_required, CustomIsOwner, CustomIsAdmin from cookbook.helper.permission_helper import group_required, CustomIsOwner, CustomIsAdmin
from cookbook.helper.recipe_url_import import find_ld_json
from cookbook.models import Recipe, Sync, Storage, CookLog, MealPlan, MealType, ViewLog, UserPreference, RecipeBook, Keyword from cookbook.models import Recipe, Sync, Storage, CookLog, MealPlan, MealType, ViewLog, UserPreference, RecipeBook, Keyword
from cookbook.provider.dropbox import Dropbox from cookbook.provider.dropbox import Dropbox
from cookbook.provider.nextcloud import Nextcloud from cookbook.provider.nextcloud import Nextcloud
@ -255,61 +257,16 @@ def recipe_from_url(request, url):
return JsonResponse({'error': _('The requested page refused to provide any information (Status Code 403).')}) return JsonResponse({'error': _('The requested page refused to provide any information (Status Code 403).')})
soup = BeautifulSoup(response.text, "html.parser") soup = BeautifulSoup(response.text, "html.parser")
# first try finding ld+json as its most common
for ld in soup.find_all('script', type='application/ld+json'): for ld in soup.find_all('script', type='application/ld+json'):
ld_json = json.loads(ld.string) if (r := find_ld_json(json.loads(ld.string))) is not None:
return r
# recipes type might be wrapped in @graph type # now try to find microdata
if '@graph' in ld_json: items = microdata.get_items(response)
for x in ld_json['@graph']: for i in items:
if '@type' in x and x['@type'] == 'Recipe': js = i.json()
ld_json = x print('hi')
if '@type' in ld_json and ld_json['@type'] == 'Recipe':
if 'recipeIngredient' in ld_json:
ingredients = []
for x in ld_json['recipeIngredient']:
ingredient_split = x.split()
if len(ingredient_split) > 2:
ingredients.append({'amount': ingredient_split[0], 'unit': ingredient_split[1], 'ingredient': " ".join(ingredient_split[2:])})
if len(ingredient_split) == 2:
ingredients.append({'amount': ingredient_split[0], 'unit': '', 'ingredient': " ".join(ingredient_split[1:])})
if len(ingredient_split) == 1:
ingredients.append({'amount': 0, 'unit': '', 'ingredient': " ".join(ingredient_split)})
ld_json['recipeIngredient'] = ingredients
if 'keywords' in ld_json:
keywords = []
if type(ld_json['keywords']) == str:
ld_json['keywords'] = ld_json['keywords'].split(',')
for kw in ld_json['keywords']:
if k := Keyword.objects.filter(name=kw).first():
keywords.append({'id': str(k.id), 'text': str(k).strip()})
else:
keywords.append({'id': "null", 'text': kw.strip()})
ld_json['keywords'] = keywords
if 'recipeInstructions' in ld_json:
instructions = ''
if type(ld_json['recipeInstructions']) == list:
for i in ld_json['recipeInstructions']:
if type(i) == str:
instructions += i
else:
instructions += i['text'] + '\n\n'
ld_json['recipeInstructions'] = instructions
if 'image' in ld_json:
if (type(ld_json['image'])) == list:
if type(ld_json['image'][0]) == str:
ld_json['image'] = ld_json['image'][0]
elif 'url' in ld_json['image'][0]:
ld_json['image'] = ld_json['image'][0]['url']
return JsonResponse(ld_json)
return JsonResponse({'error': _('The requested site does not provide any recognized data format to import the recipe from.')}) return JsonResponse({'error': _('The requested site does not provide any recognized data format to import the recipe from.')})

View File

@ -24,3 +24,4 @@ icalendar==4.0.6
pyyaml==5.3.1 pyyaml==5.3.1
uritemplate==3.0.1 uritemplate==3.0.1
beautifulsoup4==4.9.1 beautifulsoup4==4.9.1
microdata==0.7.1