fix bookmarklet

This commit is contained in:
smilerz 2022-07-06 16:16:53 -05:00
parent 946de2e7e3
commit e23d514d89
No known key found for this signature in database
GPG Key ID: 39444C7606D47126
4 changed files with 65 additions and 55 deletions

View File

@ -6,7 +6,7 @@ from urllib.parse import unquote
from bs4 import BeautifulSoup
from bs4.element import Tag
from recipe_scrapers import scrape_html, scrape_me
from recipe_scrapers._exceptions import NoSchemaFoundInWildMode
from recipe_scrapers._exceptions import NoSchemaFoundInWildMode, WebsiteNotImplementedError
from recipe_scrapers._utils import get_host_name, normalize_string
from cookbook.helper import recipe_url_import as helper
@ -68,11 +68,14 @@ def get_recipe_from_source(text, url, request):
text = unquote(text)
scrape = None
if url:
if url and not text:
try:
scrape = scrape_me(url_path=url, wild_mode=True)
except(NoSchemaFoundInWildMode):
pass
scrape = scrape_me(url_path=url)
except WebsiteNotImplementedError:
try:
scrape = scrape_me(url_path=url, wild_mode=True)
except(NoSchemaFoundInWildMode):
pass
if not scrape:
try:
parse_list.append(remove_graph(json.loads(text)))

View File

@ -1,6 +1,7 @@
from bs4 import BeautifulSoup
from json import JSONDecodeError
from recipe_scrapers import SCRAPERS
from bs4 import BeautifulSoup
from recipe_scrapers import SCRAPERS, get_host_name
from recipe_scrapers._factory import SchemaScraperFactory
from recipe_scrapers._schemaorg import SchemaOrg
@ -15,7 +16,13 @@ SCRAPERS.update(CUSTOM_SCRAPERS)
def text_scraper(text, url=None):
scraper_class = SchemaScraperFactory.SchemaScraper
domain = None
if url:
domain = get_host_name(url)
if domain in SCRAPERS:
scraper_class = SCRAPERS[domain]
else:
scraper_class = SchemaScraperFactory.SchemaScraper
class TextScraper(scraper_class):
def __init__(

View File

@ -1,12 +1,11 @@
import traceback
from datetime import timedelta, datetime
from datetime import datetime, timedelta
from decimal import Decimal
from gettext import gettext as _
from html import escape
from smtplib import SMTPException
from PIL import Image
from django.contrib.auth.models import User, Group
from django.contrib.auth.models import Group, User
from django.core.mail import send_mail
from django.db.models import Avg, Q, QuerySet, Sum
from django.http import BadHeaderError
@ -14,6 +13,7 @@ from django.urls import reverse
from django.utils import timezone
from django_scopes import scopes_disabled
from drf_writable_nested import UniqueFieldsMixin, WritableNestedModelSerializer
from PIL import Image
from rest_framework import serializers
from rest_framework.exceptions import NotFound, ValidationError
@ -22,14 +22,14 @@ from cookbook.helper.HelperFunctions import str2bool
from cookbook.helper.permission_helper import above_space_limit
from cookbook.helper.shopping_helper import RecipeShoppingEditor
from cookbook.models import (Automation, BookmarkletImport, Comment, CookLog, CustomFilter,
ExportLog, Food, FoodInheritField, ImportLog, Ingredient, Keyword,
MealPlan, MealType, NutritionInformation, Recipe, RecipeBook,
ExportLog, Food, FoodInheritField, ImportLog, Ingredient, InviteLink,
Keyword, MealPlan, MealType, NutritionInformation, Recipe, RecipeBook,
RecipeBookEntry, RecipeImport, ShareLink, ShoppingList,
ShoppingListEntry, ShoppingListRecipe, Step, Storage, Supermarket,
SupermarketCategory, SupermarketCategoryRelation, Sync, SyncLog, Unit,
UserFile, UserPreference, ViewLog, Space, UserSpace, InviteLink)
ShoppingListEntry, ShoppingListRecipe, Space, Step, Storage,
Supermarket, SupermarketCategory, SupermarketCategoryRelation, Sync,
SyncLog, Unit, UserFile, UserPreference, UserSpace, ViewLog)
from cookbook.templatetags.custom_tags import markdown
from recipes.settings import MEDIA_URL, AWS_ENABLED
from recipes.settings import AWS_ENABLED, MEDIA_URL
class ExtendedRecipeMixin(serializers.ModelSerializer):
@ -193,7 +193,8 @@ class SpaceSerializer(WritableNestedModelSerializer):
class Meta:
model = Space
fields = ('id', 'name', 'created_by', 'created_at', 'message', 'max_recipes', 'max_file_storage_mb', 'max_users', 'allow_sharing', 'demo', 'food_inherit', 'show_facet_count', 'user_count', 'recipe_count', 'file_size_mb',)
fields = ('id', 'name', 'created_by', 'created_at', 'message', 'max_recipes', 'max_file_storage_mb', 'max_users',
'allow_sharing', 'demo', 'food_inherit', 'show_facet_count', 'user_count', 'recipe_count', 'file_size_mb',)
read_only_fields = ('id', 'created_by', 'created_at', 'max_recipes', 'max_file_storage_mb', 'max_users', 'allow_sharing', 'demo',)
@ -815,7 +816,7 @@ class RecipeBookEntrySerializer(serializers.ModelSerializer):
book = validated_data['book']
recipe = validated_data['recipe']
if not book.get_owner() == self.context['request'].user and not self.context[
'request'].user in book.get_shared():
'request'].user in book.get_shared():
raise NotFound(detail=None, code=None)
obj, created = RecipeBookEntry.objects.get_or_create(book=book, recipe=recipe)
return obj
@ -871,11 +872,11 @@ class ShoppingListRecipeSerializer(serializers.ModelSerializer):
value = value.quantize(
Decimal(1)) if value == value.to_integral() else value.normalize() # strips trailing zero
return (
obj.name
or getattr(obj.mealplan, 'title', None)
or (d := getattr(obj.mealplan, 'date', None)) and ': '.join([obj.mealplan.recipe.name, str(d)])
or obj.recipe.name
) + f' ({value:.2g})'
obj.name
or getattr(obj.mealplan, 'title', None)
or (d := getattr(obj.mealplan, 'date', None)) and ': '.join([obj.mealplan.recipe.name, str(d)])
or obj.recipe.name
) + f' ({value:.2g})'
def update(self, instance, validated_data):
# TODO remove once old shopping list
@ -1232,6 +1233,6 @@ class FoodShoppingUpdateSerializer(serializers.ModelSerializer):
# non model serializers
class RecipeFromSourceSerializer(serializers.Serializer):
url = serializers.CharField(max_length=4096, required=False, allow_null=True)
url = serializers.CharField(max_length=4096, required=False, allow_null=True, allow_blank=True)
data = serializers.CharField(required=False, allow_null=True, allow_blank=True)
bookmarklet = serializers.IntegerField(required=False, allow_null=True, )

View File

@ -9,16 +9,14 @@ from zipfile import ZipFile
import requests
import validators
from PIL import UnidentifiedImageError
from annoying.decorators import ajax_request
from annoying.functions import get_object_or_None
from django.contrib import messages
from django.contrib.auth.models import User, Group
from django.contrib.auth.models import Group, User
from django.contrib.postgres.search import TrigramSimilarity
from django.core.exceptions import FieldError, ValidationError
from django.core.files import File
from django.db.models import (Case, Count, Exists, OuterRef, ProtectedError, Q,
Subquery, Value, When)
from django.db.models import Case, Count, Exists, OuterRef, ProtectedError, Q, Subquery, Value, When
from django.db.models.fields.related import ForeignObjectRel
from django.db.models.functions import Coalesce, Lower
from django.http import FileResponse, HttpResponse, JsonResponse
@ -27,6 +25,7 @@ from django.urls import reverse
from django.utils.translation import gettext as _
from django_scopes import scopes_disabled
from icalendar import Calendar, Event
from PIL import UnidentifiedImageError
from requests.exceptions import MissingSchema
from rest_framework import decorators, status, viewsets
from rest_framework.authtoken.models import Token
@ -45,39 +44,42 @@ from cookbook.helper.HelperFunctions import str2bool
from cookbook.helper.image_processing import handle_image
from cookbook.helper.ingredient_parser import IngredientParser
from cookbook.helper.permission_helper import (CustomIsAdmin, CustomIsGuest, CustomIsOwner,
CustomIsShare, CustomIsShared, CustomIsUser,
group_required, CustomIsSpaceOwner, switch_user_active_space, is_space_owner, CustomIsOwnerReadOnly)
CustomIsOwnerReadOnly, CustomIsShare, CustomIsShared,
CustomIsSpaceOwner, CustomIsUser, group_required,
is_space_owner, switch_user_active_space)
from cookbook.helper.recipe_html_import import get_recipe_from_source
from cookbook.helper.recipe_search import RecipeFacet, RecipeSearch, old_search
from cookbook.helper.recipe_url_import import get_from_youtube_scraper
from cookbook.helper.shopping_helper import RecipeShoppingEditor, shopping_helper
from cookbook.models import (Automation, BookmarkletImport, CookLog, CustomFilter, ExportLog, Food,
FoodInheritField, ImportLog, Ingredient, Keyword, MealPlan, MealType,
Recipe, RecipeBook, RecipeBookEntry, ShareLink, ShoppingList,
ShoppingListEntry, ShoppingListRecipe, Step, Storage, Supermarket,
SupermarketCategory, SupermarketCategoryRelation, Sync, SyncLog, Unit,
UserFile, UserPreference, ViewLog, Space, UserSpace, InviteLink)
FoodInheritField, ImportLog, Ingredient, InviteLink, Keyword, MealPlan,
MealType, Recipe, RecipeBook, RecipeBookEntry, ShareLink, ShoppingList,
ShoppingListEntry, ShoppingListRecipe, Space, Step, Storage,
Supermarket, SupermarketCategory, SupermarketCategoryRelation, Sync,
SyncLog, Unit, UserFile, UserPreference, UserSpace, ViewLog)
from cookbook.provider.dropbox import Dropbox
from cookbook.provider.local import Local
from cookbook.provider.nextcloud import Nextcloud
from cookbook.schemas import FilterSchema, QueryParam, QueryParamAutoSchema, TreeSchema
from cookbook.serializer import (AutomationSerializer, BookmarkletImportSerializer,
CookLogSerializer, CustomFilterSerializer, ExportLogSerializer,
from cookbook.serializer import (AutomationSerializer, BookmarkletImportListSerializer,
BookmarkletImportSerializer, CookLogSerializer,
CustomFilterSerializer, ExportLogSerializer,
FoodInheritFieldSerializer, FoodSerializer,
FoodShoppingUpdateSerializer, ImportLogSerializer,
IngredientSerializer, KeywordSerializer, MealPlanSerializer,
FoodShoppingUpdateSerializer, GroupSerializer, ImportLogSerializer,
IngredientSerializer, IngredientSimpleSerializer,
InviteLinkSerializer, KeywordSerializer, MealPlanSerializer,
MealTypeSerializer, RecipeBookEntrySerializer,
RecipeBookSerializer, RecipeImageSerializer,
RecipeOverviewSerializer, RecipeSerializer,
RecipeBookSerializer, RecipeFromSourceSerializer,
RecipeImageSerializer, RecipeOverviewSerializer, RecipeSerializer,
RecipeShoppingUpdateSerializer, RecipeSimpleSerializer,
ShoppingListAutoSyncSerializer, ShoppingListEntrySerializer,
ShoppingListRecipeSerializer, ShoppingListSerializer,
StepSerializer, StorageSerializer,
SpaceSerializer, StepSerializer, StorageSerializer,
SupermarketCategoryRelationSerializer,
SupermarketCategorySerializer, SupermarketSerializer,
SyncLogSerializer, SyncSerializer, UnitSerializer,
UserFileSerializer, UserNameSerializer, UserPreferenceSerializer,
ViewLogSerializer, IngredientSimpleSerializer, BookmarkletImportListSerializer, RecipeFromSourceSerializer, SpaceSerializer, UserSpaceSerializer, GroupSerializer, InviteLinkSerializer)
UserSpaceSerializer, ViewLogSerializer)
from recipes import settings
@ -713,7 +715,7 @@ class RecipeViewSet(viewsets.ModelViewSet):
'Query string matched (fuzzy) against recipe name. In the future also fulltext search.')),
QueryParam(name='keywords', description=_(
'ID of keyword a recipe should have. For multiple repeat parameter. Equivalent to keywords_or'),
qtype='int'),
qtype='int'),
QueryParam(name='keywords_or',
description=_('Keyword IDs, repeat for multiple. Return recipes with any of the keywords'),
qtype='int'),
@ -1118,25 +1120,22 @@ def recipe_from_source(request):
"""
serializer = RecipeFromSourceSerializer(data=request.data)
if serializer.is_valid():
try:
if bookmarklet := BookmarkletImport.objects.filter(pk=serializer.validated_data['bookmarklet']).first():
serializer.validated_data['url'] = bookmarklet.url
serializer.validated_data['data'] = bookmarklet.html
bookmarklet.delete()
except KeyError:
pass
# headers to use for request to external sites
external_request_headers = {"User-Agent": "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7"}
if not 'url' in serializer.validated_data and not 'data' in serializer.validated_data:
if (b_pk := serializer.validated_data.get('bookmarklet', None)) and (bookmarklet := BookmarkletImport.objects.filter(pk=b_pk).first()):
serializer.validated_data['url'] = bookmarklet.url
serializer.validated_data['data'] = bookmarklet.html
bookmarklet.delete()
elif not 'url' in serializer.validated_data and not 'data' in serializer.validated_data:
return Response({
'error': True,
'msg': _('Nothing to do.')
}, status=status.HTTP_400_BAD_REQUEST)
# in manual mode request complete page to return it later
if 'url' in serializer.validated_data:
elif 'url' in serializer.validated_data and serializer.validated_data['url'] != '':
if re.match('^(https?://)?(www\.youtube\.com|youtu\.be)/.+$', serializer.validated_data['url']):
if validators.url(serializer.validated_data['url'], public=True):
return Response({