improved recipe parser, added tests, cleaned up resources
This commit is contained in:
@ -1,13 +1,10 @@
|
||||
import io
|
||||
import json
|
||||
import re
|
||||
from json import JSONDecodeError
|
||||
|
||||
import microdata
|
||||
import requests
|
||||
from annoying.decorators import ajax_request
|
||||
from annoying.functions import get_object_or_None
|
||||
from bs4 import BeautifulSoup
|
||||
from django.contrib import messages
|
||||
from django.contrib.auth.models import User
|
||||
from django.db.models import Q
|
||||
@ -18,11 +15,10 @@ from icalendar import Calendar, Event
|
||||
from rest_framework import viewsets, permissions
|
||||
from rest_framework.exceptions import APIException
|
||||
from rest_framework.mixins import RetrieveModelMixin, UpdateModelMixin, ListModelMixin
|
||||
from urllib3.exceptions import NewConnectionError
|
||||
|
||||
from cookbook.helper.permission_helper import group_required, CustomIsOwner, CustomIsAdmin, CustomIsUser
|
||||
from cookbook.helper.recipe_url_import import find_recipe_json
|
||||
from cookbook.models import Recipe, Sync, Storage, CookLog, MealPlan, MealType, ViewLog, UserPreference, RecipeBook, Keyword, RecipeIngredient, Ingredient
|
||||
from cookbook.helper.recipe_url_import import get_from_html
|
||||
from cookbook.models import Recipe, Sync, Storage, CookLog, MealPlan, MealType, ViewLog, UserPreference, RecipeBook, RecipeIngredient, Ingredient
|
||||
from cookbook.provider.dropbox import Dropbox
|
||||
from cookbook.provider.nextcloud import Nextcloud
|
||||
from cookbook.serializer import MealPlanSerializer, MealTypeSerializer, RecipeSerializer, ViewLogSerializer, UserNameSerializer, UserPreferenceSerializer, RecipeBookSerializer, RecipeIngredientSerializer, IngredientSerializer
|
||||
@ -265,7 +261,6 @@ def get_plan_ical(request, html_week):
|
||||
@group_required('user')
|
||||
def recipe_from_url(request, url):
|
||||
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36'}
|
||||
|
||||
try:
|
||||
response = requests.get(url, headers=headers)
|
||||
except requests.exceptions.ConnectionError:
|
||||
@ -273,33 +268,4 @@ def recipe_from_url(request, url):
|
||||
|
||||
if response.status_code == 403:
|
||||
return JsonResponse({'error': True, 'msg': _('The requested page refused to provide any information (Status Code 403).')}, status=400)
|
||||
|
||||
soup = BeautifulSoup(response.text, "html.parser")
|
||||
|
||||
# first try finding ld+json as its most common
|
||||
for ld in soup.find_all('script', type='application/ld+json'):
|
||||
try:
|
||||
ld_json = json.loads(ld.string)
|
||||
if type(ld_json) != list:
|
||||
ld_json = [ld_json]
|
||||
|
||||
for ld_json_item in ld_json:
|
||||
# recipes type might be wrapped in @graph type
|
||||
if '@graph' in ld_json_item:
|
||||
for x in ld_json_item['@graph']:
|
||||
if '@type' in x and x['@type'] == 'Recipe':
|
||||
ld_json_item = x
|
||||
|
||||
if '@type' in ld_json_item and ld_json_item['@type'] == 'Recipe':
|
||||
return find_recipe_json(ld_json_item, url)
|
||||
except JSONDecodeError:
|
||||
JsonResponse({'error': True, 'msg': _('The requested site does not provided malformed data and cannot be read.')}, status=400)
|
||||
|
||||
# now try to find microdata
|
||||
items = microdata.get_items(response.text)
|
||||
for i in items:
|
||||
md_json = json.loads(i.json())
|
||||
if 'schema.org/Recipe' in str(md_json['type']):
|
||||
return find_recipe_json(md_json['properties'], url)
|
||||
|
||||
return JsonResponse({'error': True, 'msg': _('The requested site does not provide any recognized data format to import the recipe from.')}, status=400)
|
||||
return get_from_html(response.text, url)
|
||||
|
Reference in New Issue
Block a user