refactored recipe from source endpoint
This commit is contained in:
parent
a6c81d8168
commit
cac72df7ba
@ -5,12 +5,12 @@ from urllib.parse import unquote
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from bs4.element import Tag
|
||||
from recipe_scrapers import scrape_html, scrape_me
|
||||
from recipe_scrapers._exceptions import NoSchemaFoundInWildMode
|
||||
from recipe_scrapers._utils import get_host_name, normalize_string
|
||||
|
||||
from cookbook.helper import recipe_url_import as helper
|
||||
from cookbook.helper.scrapers.scrapers import text_scraper
|
||||
from recipe_scrapers import scrape_me
|
||||
from recipe_scrapers._exceptions import NoSchemaFoundInWildMode
|
||||
|
||||
|
||||
def get_recipe_from_source(text, url, request):
|
||||
@ -62,8 +62,9 @@ def get_recipe_from_source(text, url, request):
|
||||
|
||||
recipe_tree = []
|
||||
parse_list = []
|
||||
html_data = []
|
||||
images = []
|
||||
soup = BeautifulSoup(text, "html.parser")
|
||||
html_data = get_from_html(soup)
|
||||
images = get_images_from_source(soup, url)
|
||||
text = unquote(text)
|
||||
scrape = None
|
||||
|
||||
@ -80,9 +81,6 @@ def get_recipe_from_source(text, url, request):
|
||||
scrape = text_scraper("<script type='application/ld+json'>" + text + "</script>", url=url)
|
||||
|
||||
except JSONDecodeError:
|
||||
soup = BeautifulSoup(text, "html.parser")
|
||||
html_data = get_from_html(soup)
|
||||
images += get_images_from_source(soup, url)
|
||||
for el in soup.find_all('script', type='application/ld+json'):
|
||||
el = remove_graph(el)
|
||||
if not url and 'url' in el:
|
||||
|
@ -645,7 +645,7 @@ class RecipeSerializer(RecipeBaseSerializer):
|
||||
model = Recipe
|
||||
fields = (
|
||||
'id', 'name', 'description', 'image', 'keywords', 'steps', 'working_time',
|
||||
'waiting_time', 'created_by', 'created_at', 'updated_at','source_url',
|
||||
'waiting_time', 'created_by', 'created_at', 'updated_at', 'source_url',
|
||||
'internal', 'nutrition', 'servings', 'file_path', 'servings_text', 'rating', 'last_cooked',
|
||||
)
|
||||
read_only_fields = ['image', 'created_by', 'created_at']
|
||||
@ -1099,3 +1099,11 @@ class FoodShoppingUpdateSerializer(serializers.ModelSerializer):
|
||||
class Meta:
|
||||
model = Recipe
|
||||
fields = ['id', 'amount', 'unit', 'delete', ]
|
||||
|
||||
|
||||
# non model serializers
|
||||
|
||||
class RecipeFromSourceSerializer(serializers.Serializer):
|
||||
url = serializers.CharField(max_length=4096, required=False, allow_null=True)
|
||||
data = serializers.CharField(required=False, allow_null=True, allow_blank=True)
|
||||
bookmarklet = serializers.IntegerField(required=False, allow_null=True, )
|
||||
|
@ -27,11 +27,15 @@ from django_scopes import scopes_disabled
|
||||
from icalendar import Calendar, Event
|
||||
from requests.exceptions import MissingSchema
|
||||
from rest_framework import decorators, status, viewsets
|
||||
from rest_framework.decorators import api_view, permission_classes, schema
|
||||
from rest_framework.exceptions import APIException, PermissionDenied
|
||||
from rest_framework.generics import CreateAPIView
|
||||
from rest_framework.pagination import PageNumberPagination
|
||||
from rest_framework.parsers import MultiPartParser
|
||||
from rest_framework.renderers import JSONRenderer, TemplateHTMLRenderer
|
||||
from rest_framework.response import Response
|
||||
from rest_framework.schemas import AutoSchema
|
||||
from rest_framework.views import APIView
|
||||
from rest_framework.viewsets import ViewSetMixin
|
||||
from treebeard.exceptions import InvalidMoveToDescendant, InvalidPosition, PathOverflow
|
||||
from validators import ValidationFailure
|
||||
@ -71,7 +75,7 @@ from cookbook.serializer import (AutomationSerializer, BookmarkletImportSerializ
|
||||
SupermarketCategorySerializer, SupermarketSerializer,
|
||||
SyncLogSerializer, SyncSerializer, UnitSerializer,
|
||||
UserFileSerializer, UserNameSerializer, UserPreferenceSerializer,
|
||||
ViewLogSerializer, IngredientSimpleSerializer, BookmarkletImportListSerializer)
|
||||
ViewLogSerializer, IngredientSimpleSerializer, BookmarkletImportListSerializer, RecipeFromSourceSerializer)
|
||||
from recipes import settings
|
||||
|
||||
|
||||
@ -1025,7 +1029,76 @@ class CustomFilterViewSet(viewsets.ModelViewSet, StandardFilterMixin):
|
||||
return super().get_queryset()
|
||||
|
||||
|
||||
# -------------- non django rest api views --------------------
|
||||
# -------------- DRF custom views --------------------
|
||||
|
||||
@api_view(['POST'])
|
||||
# @schema(AutoSchema()) #TODO add proper schema
|
||||
@permission_classes([CustomIsUser])
|
||||
# TODO add rate limiting
|
||||
def recipe_from_source(request):
|
||||
"""
|
||||
function to retrieve a recipe from a given url or source string
|
||||
:param request: standard request with additional post parameters
|
||||
- url: url to use for importing recipe
|
||||
- data: if no url is given recipe is imported from provided source data
|
||||
- (optional) bookmarklet: id of bookmarklet import to use, overrides URL and data attributes
|
||||
:return: JsonResponse containing the parsed json, original html,json and images
|
||||
"""
|
||||
serializer = RecipeFromSourceSerializer(data=request.data)
|
||||
if serializer.is_valid():
|
||||
try:
|
||||
if bookmarklet := BookmarkletImport.objects.filter(pk=serializer.validated_data['bookmarklet']).first():
|
||||
serializer.validated_data['url'] = bookmarklet.url
|
||||
serializer.validated_data['data'] = bookmarklet.html
|
||||
bookmarklet.delete()
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
# headers to use for request to external sites
|
||||
external_request_headers = {"User-Agent": "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7"}
|
||||
|
||||
if not 'url' in serializer.validated_data and not 'data' in serializer.validated_data:
|
||||
return Response({
|
||||
'error': True,
|
||||
'msg': _('Nothing to do.')
|
||||
}, status=status.HTTP_400_BAD_REQUEST)
|
||||
|
||||
# in manual mode request complete page to return it later
|
||||
if 'url' in serializer.validated_data:
|
||||
try:
|
||||
if validators.url(serializer.validated_data['url'], public=True):
|
||||
serializer.validated_data['data'] = requests.get(serializer.validated_data['url'], headers=external_request_headers).content
|
||||
else:
|
||||
return Response({
|
||||
'error': True,
|
||||
'msg': _('Invalid Url')
|
||||
}, status=status.HTTP_400_BAD_REQUEST)
|
||||
except requests.exceptions.ConnectionError:
|
||||
return Response({
|
||||
'error': True,
|
||||
'msg': _('Connection Refused.')
|
||||
}, status=status.HTTP_400_BAD_REQUEST)
|
||||
except requests.exceptions.MissingSchema:
|
||||
return Response({
|
||||
'error': True,
|
||||
'msg': _('Bad URL Schema.')
|
||||
}, status=status.HTTP_400_BAD_REQUEST)
|
||||
|
||||
recipe_json, recipe_tree, recipe_html, recipe_images = get_recipe_from_source(serializer.validated_data['data'], serializer.validated_data['url'], request)
|
||||
if len(recipe_tree) == 0 and len(recipe_json) == 0:
|
||||
return Response({
|
||||
'error': True,
|
||||
'msg': _('No usable data could be found.')
|
||||
}, status=status.HTTP_400_BAD_REQUEST)
|
||||
else:
|
||||
return Response({
|
||||
'recipe_json': recipe_json,
|
||||
'recipe_tree': recipe_tree,
|
||||
'recipe_html': recipe_html,
|
||||
'recipe_images': list(dict.fromkeys(recipe_images)),
|
||||
}, status=status.HTTP_200_OK)
|
||||
else:
|
||||
return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
|
||||
|
||||
|
||||
def get_recipe_provider(recipe):
|
||||
@ -1160,73 +1233,6 @@ def get_plan_ical(request, from_date, to_date):
|
||||
return response
|
||||
|
||||
|
||||
@group_required('user')
|
||||
def recipe_from_source(request):
|
||||
"""
|
||||
function to retrieve a recipe from a given url or source string
|
||||
:param request: standard request with additional post parameters
|
||||
- url: url to use for importing recipe
|
||||
- data: if no url is given recipe is imported from provided source data
|
||||
- (optional) bookmarklet: id of bookmarklet import to use, overrides URL and data attributes
|
||||
:return: JsonResponse containing the parsed json, original html,json and images
|
||||
"""
|
||||
if request.method == 'GET':
|
||||
return HttpResponse(status=405)
|
||||
request_payload = json.loads(request.body.decode('utf-8'))
|
||||
url = request_payload.get('url', None)
|
||||
data = request_payload.get('data', None)
|
||||
bookmarklet = request_payload.get('bookmarklet', None)
|
||||
|
||||
if bookmarklet := BookmarkletImport.objects.filter(pk=bookmarklet).first():
|
||||
url = bookmarklet.url
|
||||
data = bookmarklet.html
|
||||
bookmarklet.delete()
|
||||
|
||||
# headers to use for request to external sites
|
||||
external_request_headers = {"User-Agent": "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7"}
|
||||
|
||||
if not url and not data:
|
||||
return JsonResponse({
|
||||
'error': True,
|
||||
'msg': _('Nothing to do.')
|
||||
}, status=400)
|
||||
|
||||
# in manual mode request complete page to return it later
|
||||
if url:
|
||||
try:
|
||||
if validators.url(url, public=True):
|
||||
data = requests.get(url, headers=external_request_headers).content
|
||||
else:
|
||||
return JsonResponse({
|
||||
'error': True,
|
||||
'msg': _('Invalid Url')
|
||||
}, status=400)
|
||||
except requests.exceptions.ConnectionError:
|
||||
return JsonResponse({
|
||||
'error': True,
|
||||
'msg': _('Connection Refused.')
|
||||
}, status=400)
|
||||
except requests.exceptions.MissingSchema:
|
||||
return JsonResponse({
|
||||
'error': True,
|
||||
'msg': _('Bad URL Schema.')
|
||||
}, status=400)
|
||||
|
||||
recipe_json, recipe_tree, recipe_html, recipe_images = get_recipe_from_source(data, url, request)
|
||||
if len(recipe_tree) == 0 and len(recipe_json) == 0:
|
||||
return JsonResponse({
|
||||
'error': True,
|
||||
'msg': _('No usable data could be found.')
|
||||
}, status=400)
|
||||
else:
|
||||
return JsonResponse({
|
||||
'recipe_json': recipe_json,
|
||||
'recipe_tree': recipe_tree,
|
||||
'recipe_html': recipe_html,
|
||||
'recipe_images': list(dict.fromkeys(recipe_images)),
|
||||
})
|
||||
|
||||
|
||||
@group_required('admin')
|
||||
def get_backup(request):
|
||||
if not request.user.is_superuser:
|
||||
|
@ -7034,7 +7034,7 @@ material-colors@^1.2.6:
|
||||
resolved "https://registry.yarnpkg.com/material-colors/-/material-colors-1.2.6.tgz#6d1958871126992ceecc72f4bcc4d8f010865f46"
|
||||
integrity sha512-6qE4B9deFBIa9YSpOc9O0Sgc43zTeVYbgDT5veRKSlB2+ZuHNoVVxA1L/ckMUayV9Ay9y7Z/SZCLcGteW9i7bg==
|
||||
|
||||
mavon-editor@2.10.4:
|
||||
mavon-editor@^2.10.4:
|
||||
version "2.10.4"
|
||||
resolved "https://registry.yarnpkg.com/mavon-editor/-/mavon-editor-2.10.4.tgz#58d6c4dc208933f0ac4595c10c60655899ba8ba8"
|
||||
integrity sha512-CFsBLkgt/KZBDg+SJYe2fyYv4zClY149PiwpH0rDAiiP4ae1XNs0GC8nBsoTeipsHcebDLN1QMkt3bUsnMDjQw==
|
||||
|
Loading…
Reference in New Issue
Block a user