refactored recipe from source endpoint

This commit is contained in:
vabene1111 2022-05-31 15:22:27 +02:00
parent a6c81d8168
commit cac72df7ba
4 changed files with 90 additions and 78 deletions

View File

@ -5,12 +5,12 @@ from urllib.parse import unquote
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from bs4.element import Tag from bs4.element import Tag
from recipe_scrapers import scrape_html, scrape_me
from recipe_scrapers._exceptions import NoSchemaFoundInWildMode
from recipe_scrapers._utils import get_host_name, normalize_string from recipe_scrapers._utils import get_host_name, normalize_string
from cookbook.helper import recipe_url_import as helper from cookbook.helper import recipe_url_import as helper
from cookbook.helper.scrapers.scrapers import text_scraper from cookbook.helper.scrapers.scrapers import text_scraper
from recipe_scrapers import scrape_me
from recipe_scrapers._exceptions import NoSchemaFoundInWildMode
def get_recipe_from_source(text, url, request): def get_recipe_from_source(text, url, request):
@ -62,8 +62,9 @@ def get_recipe_from_source(text, url, request):
recipe_tree = [] recipe_tree = []
parse_list = [] parse_list = []
html_data = [] soup = BeautifulSoup(text, "html.parser")
images = [] html_data = get_from_html(soup)
images = get_images_from_source(soup, url)
text = unquote(text) text = unquote(text)
scrape = None scrape = None
@ -80,9 +81,6 @@ def get_recipe_from_source(text, url, request):
scrape = text_scraper("<script type='application/ld+json'>" + text + "</script>", url=url) scrape = text_scraper("<script type='application/ld+json'>" + text + "</script>", url=url)
except JSONDecodeError: except JSONDecodeError:
soup = BeautifulSoup(text, "html.parser")
html_data = get_from_html(soup)
images += get_images_from_source(soup, url)
for el in soup.find_all('script', type='application/ld+json'): for el in soup.find_all('script', type='application/ld+json'):
el = remove_graph(el) el = remove_graph(el)
if not url and 'url' in el: if not url and 'url' in el:

View File

@ -645,7 +645,7 @@ class RecipeSerializer(RecipeBaseSerializer):
model = Recipe model = Recipe
fields = ( fields = (
'id', 'name', 'description', 'image', 'keywords', 'steps', 'working_time', 'id', 'name', 'description', 'image', 'keywords', 'steps', 'working_time',
'waiting_time', 'created_by', 'created_at', 'updated_at','source_url', 'waiting_time', 'created_by', 'created_at', 'updated_at', 'source_url',
'internal', 'nutrition', 'servings', 'file_path', 'servings_text', 'rating', 'last_cooked', 'internal', 'nutrition', 'servings', 'file_path', 'servings_text', 'rating', 'last_cooked',
) )
read_only_fields = ['image', 'created_by', 'created_at'] read_only_fields = ['image', 'created_by', 'created_at']
@ -1099,3 +1099,11 @@ class FoodShoppingUpdateSerializer(serializers.ModelSerializer):
class Meta: class Meta:
model = Recipe model = Recipe
fields = ['id', 'amount', 'unit', 'delete', ] fields = ['id', 'amount', 'unit', 'delete', ]
# non model serializers
class RecipeFromSourceSerializer(serializers.Serializer):
url = serializers.CharField(max_length=4096, required=False, allow_null=True)
data = serializers.CharField(required=False, allow_null=True, allow_blank=True)
bookmarklet = serializers.IntegerField(required=False, allow_null=True, )

View File

@ -27,11 +27,15 @@ from django_scopes import scopes_disabled
from icalendar import Calendar, Event from icalendar import Calendar, Event
from requests.exceptions import MissingSchema from requests.exceptions import MissingSchema
from rest_framework import decorators, status, viewsets from rest_framework import decorators, status, viewsets
from rest_framework.decorators import api_view, permission_classes, schema
from rest_framework.exceptions import APIException, PermissionDenied from rest_framework.exceptions import APIException, PermissionDenied
from rest_framework.generics import CreateAPIView
from rest_framework.pagination import PageNumberPagination from rest_framework.pagination import PageNumberPagination
from rest_framework.parsers import MultiPartParser from rest_framework.parsers import MultiPartParser
from rest_framework.renderers import JSONRenderer, TemplateHTMLRenderer from rest_framework.renderers import JSONRenderer, TemplateHTMLRenderer
from rest_framework.response import Response from rest_framework.response import Response
from rest_framework.schemas import AutoSchema
from rest_framework.views import APIView
from rest_framework.viewsets import ViewSetMixin from rest_framework.viewsets import ViewSetMixin
from treebeard.exceptions import InvalidMoveToDescendant, InvalidPosition, PathOverflow from treebeard.exceptions import InvalidMoveToDescendant, InvalidPosition, PathOverflow
from validators import ValidationFailure from validators import ValidationFailure
@ -71,7 +75,7 @@ from cookbook.serializer import (AutomationSerializer, BookmarkletImportSerializ
SupermarketCategorySerializer, SupermarketSerializer, SupermarketCategorySerializer, SupermarketSerializer,
SyncLogSerializer, SyncSerializer, UnitSerializer, SyncLogSerializer, SyncSerializer, UnitSerializer,
UserFileSerializer, UserNameSerializer, UserPreferenceSerializer, UserFileSerializer, UserNameSerializer, UserPreferenceSerializer,
ViewLogSerializer, IngredientSimpleSerializer, BookmarkletImportListSerializer) ViewLogSerializer, IngredientSimpleSerializer, BookmarkletImportListSerializer, RecipeFromSourceSerializer)
from recipes import settings from recipes import settings
@ -1025,7 +1029,76 @@ class CustomFilterViewSet(viewsets.ModelViewSet, StandardFilterMixin):
return super().get_queryset() return super().get_queryset()
# -------------- non django rest api views -------------------- # -------------- DRF custom views --------------------
@api_view(['POST'])
# @schema(AutoSchema()) #TODO add proper schema
@permission_classes([CustomIsUser])
# TODO add rate limiting
def recipe_from_source(request):
"""
function to retrieve a recipe from a given url or source string
:param request: standard request with additional post parameters
- url: url to use for importing recipe
- data: if no url is given recipe is imported from provided source data
- (optional) bookmarklet: id of bookmarklet import to use, overrides URL and data attributes
:return: JsonResponse containing the parsed json, original html,json and images
"""
serializer = RecipeFromSourceSerializer(data=request.data)
if serializer.is_valid():
try:
if bookmarklet := BookmarkletImport.objects.filter(pk=serializer.validated_data['bookmarklet']).first():
serializer.validated_data['url'] = bookmarklet.url
serializer.validated_data['data'] = bookmarklet.html
bookmarklet.delete()
except KeyError:
pass
# headers to use for request to external sites
external_request_headers = {"User-Agent": "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7"}
if not 'url' in serializer.validated_data and not 'data' in serializer.validated_data:
return Response({
'error': True,
'msg': _('Nothing to do.')
}, status=status.HTTP_400_BAD_REQUEST)
# in manual mode request complete page to return it later
if 'url' in serializer.validated_data:
try:
if validators.url(serializer.validated_data['url'], public=True):
serializer.validated_data['data'] = requests.get(serializer.validated_data['url'], headers=external_request_headers).content
else:
return Response({
'error': True,
'msg': _('Invalid Url')
}, status=status.HTTP_400_BAD_REQUEST)
except requests.exceptions.ConnectionError:
return Response({
'error': True,
'msg': _('Connection Refused.')
}, status=status.HTTP_400_BAD_REQUEST)
except requests.exceptions.MissingSchema:
return Response({
'error': True,
'msg': _('Bad URL Schema.')
}, status=status.HTTP_400_BAD_REQUEST)
recipe_json, recipe_tree, recipe_html, recipe_images = get_recipe_from_source(serializer.validated_data['data'], serializer.validated_data['url'], request)
if len(recipe_tree) == 0 and len(recipe_json) == 0:
return Response({
'error': True,
'msg': _('No usable data could be found.')
}, status=status.HTTP_400_BAD_REQUEST)
else:
return Response({
'recipe_json': recipe_json,
'recipe_tree': recipe_tree,
'recipe_html': recipe_html,
'recipe_images': list(dict.fromkeys(recipe_images)),
}, status=status.HTTP_200_OK)
else:
return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
def get_recipe_provider(recipe): def get_recipe_provider(recipe):
@ -1160,73 +1233,6 @@ def get_plan_ical(request, from_date, to_date):
return response return response
@group_required('user')
def recipe_from_source(request):
"""
function to retrieve a recipe from a given url or source string
:param request: standard request with additional post parameters
- url: url to use for importing recipe
- data: if no url is given recipe is imported from provided source data
- (optional) bookmarklet: id of bookmarklet import to use, overrides URL and data attributes
:return: JsonResponse containing the parsed json, original html,json and images
"""
if request.method == 'GET':
return HttpResponse(status=405)
request_payload = json.loads(request.body.decode('utf-8'))
url = request_payload.get('url', None)
data = request_payload.get('data', None)
bookmarklet = request_payload.get('bookmarklet', None)
if bookmarklet := BookmarkletImport.objects.filter(pk=bookmarklet).first():
url = bookmarklet.url
data = bookmarklet.html
bookmarklet.delete()
# headers to use for request to external sites
external_request_headers = {"User-Agent": "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7"}
if not url and not data:
return JsonResponse({
'error': True,
'msg': _('Nothing to do.')
}, status=400)
# in manual mode request complete page to return it later
if url:
try:
if validators.url(url, public=True):
data = requests.get(url, headers=external_request_headers).content
else:
return JsonResponse({
'error': True,
'msg': _('Invalid Url')
}, status=400)
except requests.exceptions.ConnectionError:
return JsonResponse({
'error': True,
'msg': _('Connection Refused.')
}, status=400)
except requests.exceptions.MissingSchema:
return JsonResponse({
'error': True,
'msg': _('Bad URL Schema.')
}, status=400)
recipe_json, recipe_tree, recipe_html, recipe_images = get_recipe_from_source(data, url, request)
if len(recipe_tree) == 0 and len(recipe_json) == 0:
return JsonResponse({
'error': True,
'msg': _('No usable data could be found.')
}, status=400)
else:
return JsonResponse({
'recipe_json': recipe_json,
'recipe_tree': recipe_tree,
'recipe_html': recipe_html,
'recipe_images': list(dict.fromkeys(recipe_images)),
})
@group_required('admin') @group_required('admin')
def get_backup(request): def get_backup(request):
if not request.user.is_superuser: if not request.user.is_superuser:

View File

@ -7034,7 +7034,7 @@ material-colors@^1.2.6:
resolved "https://registry.yarnpkg.com/material-colors/-/material-colors-1.2.6.tgz#6d1958871126992ceecc72f4bcc4d8f010865f46" resolved "https://registry.yarnpkg.com/material-colors/-/material-colors-1.2.6.tgz#6d1958871126992ceecc72f4bcc4d8f010865f46"
integrity sha512-6qE4B9deFBIa9YSpOc9O0Sgc43zTeVYbgDT5veRKSlB2+ZuHNoVVxA1L/ckMUayV9Ay9y7Z/SZCLcGteW9i7bg== integrity sha512-6qE4B9deFBIa9YSpOc9O0Sgc43zTeVYbgDT5veRKSlB2+ZuHNoVVxA1L/ckMUayV9Ay9y7Z/SZCLcGteW9i7bg==
mavon-editor@2.10.4: mavon-editor@^2.10.4:
version "2.10.4" version "2.10.4"
resolved "https://registry.yarnpkg.com/mavon-editor/-/mavon-editor-2.10.4.tgz#58d6c4dc208933f0ac4595c10c60655899ba8ba8" resolved "https://registry.yarnpkg.com/mavon-editor/-/mavon-editor-2.10.4.tgz#58d6c4dc208933f0ac4595c10c60655899ba8ba8"
integrity sha512-CFsBLkgt/KZBDg+SJYe2fyYv4zClY149PiwpH0rDAiiP4ae1XNs0GC8nBsoTeipsHcebDLN1QMkt3bUsnMDjQw== integrity sha512-CFsBLkgt/KZBDg+SJYe2fyYv4zClY149PiwpH0rDAiiP4ae1XNs0GC8nBsoTeipsHcebDLN1QMkt3bUsnMDjQw==