refactored url_import to use recipe-scraper
This commit is contained in:
parent
47090ce863
commit
bfaed434cc
@ -10,6 +10,7 @@ from cookbook.models import Keyword
|
|||||||
from django.http import JsonResponse
|
from django.http import JsonResponse
|
||||||
from django.utils.dateparse import parse_duration
|
from django.utils.dateparse import parse_duration
|
||||||
from django.utils.translation import gettext as _
|
from django.utils.translation import gettext as _
|
||||||
|
from recipe_scrapers import _utils
|
||||||
|
|
||||||
|
|
||||||
def get_from_html(html_text, url):
|
def get_from_html(html_text, url):
|
||||||
@ -69,8 +70,10 @@ def find_recipe_json(ld_json, url):
|
|||||||
if 'recipeIngredient' in ld_json:
|
if 'recipeIngredient' in ld_json:
|
||||||
# some pages have comma separated ingredients in a single array entry
|
# some pages have comma separated ingredients in a single array entry
|
||||||
if (len(ld_json['recipeIngredient']) == 1
|
if (len(ld_json['recipeIngredient']) == 1
|
||||||
and len(ld_json['recipeIngredient'][0]) > 30):
|
and type(ld_json['recipeIngredient']) == list):
|
||||||
ld_json['recipeIngredient'] = ld_json['recipeIngredient'][0].split(',') # noqa: E501
|
ld_json['recipeIngredient'] = ld_json['recipeIngredient'][0].split(',') # noqa: E501
|
||||||
|
elif type(ld_json['recipeIngredient']) == str:
|
||||||
|
ld_json['recipeIngredient'] = ld_json['recipeIngredient'].split(',')
|
||||||
|
|
||||||
for x in ld_json['recipeIngredient']:
|
for x in ld_json['recipeIngredient']:
|
||||||
if '\n' in x:
|
if '\n' in x:
|
||||||
@ -122,28 +125,7 @@ def find_recipe_json(ld_json, url):
|
|||||||
ld_json['recipeIngredient'] = []
|
ld_json['recipeIngredient'] = []
|
||||||
|
|
||||||
if 'keywords' in ld_json:
|
if 'keywords' in ld_json:
|
||||||
keywords = []
|
ld_json['keywords'] = parse_keywords(listify_keywords(ld_json['keywords']))
|
||||||
|
|
||||||
# keywords as string
|
|
||||||
if type(ld_json['keywords']) == str:
|
|
||||||
ld_json['keywords'] = ld_json['keywords'].split(',')
|
|
||||||
|
|
||||||
# keywords as string in list
|
|
||||||
if (type(ld_json['keywords']) == list
|
|
||||||
and len(ld_json['keywords']) == 1
|
|
||||||
and ',' in ld_json['keywords'][0]):
|
|
||||||
ld_json['keywords'] = ld_json['keywords'][0].split(',')
|
|
||||||
|
|
||||||
# keywords as list
|
|
||||||
for kw in ld_json['keywords']:
|
|
||||||
if k := Keyword.objects.filter(name=kw).first():
|
|
||||||
keywords.append({'id': str(k.id), 'text': str(k).strip()})
|
|
||||||
else:
|
|
||||||
keywords.append({'id': random.randrange(1111111, 9999999, 1), 'text': kw.strip()})
|
|
||||||
|
|
||||||
ld_json['keywords'] = keywords
|
|
||||||
else:
|
|
||||||
ld_json['keywords'] = []
|
|
||||||
|
|
||||||
if 'recipeInstructions' in ld_json:
|
if 'recipeInstructions' in ld_json:
|
||||||
instructions = ''
|
instructions = ''
|
||||||
@ -218,6 +200,7 @@ def find_recipe_json(ld_json, url):
|
|||||||
else:
|
else:
|
||||||
ld_json['prepTime'] = 0
|
ld_json['prepTime'] = 0
|
||||||
|
|
||||||
|
ld_json['servings'] = 1
|
||||||
try:
|
try:
|
||||||
if 'recipeYield' in ld_json:
|
if 'recipeYield' in ld_json:
|
||||||
if type(ld_json['recipeYield']) == str:
|
if type(ld_json['recipeYield']) == str:
|
||||||
@ -226,7 +209,6 @@ def find_recipe_json(ld_json, url):
|
|||||||
ld_json['servings'] = int(re.findall(r'\b\d+\b', ld_json['recipeYield'][0])[0])
|
ld_json['servings'] = int(re.findall(r'\b\d+\b', ld_json['recipeYield'][0])[0])
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(e)
|
print(e)
|
||||||
ld_json['servings'] = 1
|
|
||||||
|
|
||||||
for key in list(ld_json):
|
for key in list(ld_json):
|
||||||
if key not in [
|
if key not in [
|
||||||
@ -236,3 +218,117 @@ def find_recipe_json(ld_json, url):
|
|||||||
ld_json.pop(key, None)
|
ld_json.pop(key, None)
|
||||||
|
|
||||||
return ld_json
|
return ld_json
|
||||||
|
|
||||||
|
|
||||||
|
def get_from_scraper(scrape):
|
||||||
|
# converting the scrape_me object to the existing json format based on ld+json
|
||||||
|
|
||||||
|
recipe_json = {}
|
||||||
|
recipe_json['name'] = scrape.title()
|
||||||
|
|
||||||
|
recipe_json['description'] = ''
|
||||||
|
description = scrape.schema.data.get("description")
|
||||||
|
description += "\n\nImported from " + scrape.url
|
||||||
|
recipe_json['description'] = description
|
||||||
|
|
||||||
|
try:
|
||||||
|
servings = scrape.yields()
|
||||||
|
servings = int(re.findall(r'\b\d+\b', servings)[0])
|
||||||
|
except (AttributeError, ValueError):
|
||||||
|
servings = 1
|
||||||
|
recipe_json['servings'] = servings
|
||||||
|
|
||||||
|
recipe_json['prepTime'] = _utils.get_minutes(scrape.schema.data.get("prepTime")) or 0
|
||||||
|
recipe_json['cookTime'] = _utils.get_minutes(scrape.schema.data.get("cookTime")) or 0
|
||||||
|
if recipe_json['cookTime'] + recipe_json['prepTime'] == 0:
|
||||||
|
try:
|
||||||
|
recipe_json['prepTime'] = scrape.total_time()
|
||||||
|
except AttributeError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
recipe_json['image'] = scrape.image()
|
||||||
|
except AttributeError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
keywords = []
|
||||||
|
if scrape.schema.data.get("keywords"):
|
||||||
|
keywords += listify_keywords(scrape.schema.data.get("keywords"))
|
||||||
|
if scrape.schema.data.get('recipeCategory'):
|
||||||
|
keywords += listify_keywords(scrape.schema.data.get("recipeCategory"))
|
||||||
|
if scrape.schema.data.get('recipeCuisine'):
|
||||||
|
keywords += listify_keywords(scrape.schema.data.get("recipeCuisine"))
|
||||||
|
recipe_json['keywords'] = parse_keywords(list(set(map(str.casefold, keywords))))
|
||||||
|
|
||||||
|
try:
|
||||||
|
ingredients = []
|
||||||
|
for x in scrape.ingredients():
|
||||||
|
try:
|
||||||
|
amount, unit, ingredient, note = parse_ingredient(x)
|
||||||
|
if ingredient:
|
||||||
|
ingredients.append(
|
||||||
|
{
|
||||||
|
'amount': amount,
|
||||||
|
'unit': {
|
||||||
|
'text': unit,
|
||||||
|
'id': random.randrange(10000, 99999)
|
||||||
|
},
|
||||||
|
'ingredient': {
|
||||||
|
'text': ingredient,
|
||||||
|
'id': random.randrange(10000, 99999)
|
||||||
|
},
|
||||||
|
'note': note,
|
||||||
|
'original': x
|
||||||
|
}
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
ingredients.append(
|
||||||
|
{
|
||||||
|
'amount': 0,
|
||||||
|
'unit': {
|
||||||
|
'text': '',
|
||||||
|
'id': random.randrange(10000, 99999)
|
||||||
|
},
|
||||||
|
'ingredient': {
|
||||||
|
'text': x,
|
||||||
|
'id': random.randrange(10000, 99999)
|
||||||
|
},
|
||||||
|
'note': '',
|
||||||
|
'original': x
|
||||||
|
}
|
||||||
|
)
|
||||||
|
recipe_json['recipeIngredient'] = ingredients
|
||||||
|
except AttributeError:
|
||||||
|
recipe_json['recipeIngredient'] = ingredients
|
||||||
|
|
||||||
|
try:
|
||||||
|
recipe_json['recipeInstructions'] = scrape.instructions()
|
||||||
|
except AttributeError:
|
||||||
|
recipe_json['recipeInstructions'] = ""
|
||||||
|
|
||||||
|
return recipe_json
|
||||||
|
|
||||||
|
|
||||||
|
def parse_keywords(keyword_json):
|
||||||
|
keywords = []
|
||||||
|
# keywords as list
|
||||||
|
for kw in keyword_json:
|
||||||
|
if k := Keyword.objects.filter(name=kw).first():
|
||||||
|
keywords.append({'id': str(k.id), 'text': str(k)})
|
||||||
|
else:
|
||||||
|
keywords.append({'id': random.randrange(1111111, 9999999, 1), 'text': kw})
|
||||||
|
|
||||||
|
return keywords
|
||||||
|
|
||||||
|
|
||||||
|
def listify_keywords(keyword_list):
|
||||||
|
# keywords as string
|
||||||
|
if type(keyword_list) == str:
|
||||||
|
keyword_list = keyword_list.split(',')
|
||||||
|
|
||||||
|
# keywords as string in list
|
||||||
|
if (type(keyword_list) == list
|
||||||
|
and len(keyword_list) == 1
|
||||||
|
and ',' in keyword_list[0]):
|
||||||
|
keyword_list = keyword_list[0].split(',')
|
||||||
|
return [x.strip() for x in keyword_list]
|
||||||
|
@ -45,6 +45,12 @@
|
|||||||
<input id="id_name" class="form-control" v-model="recipe_data.name">
|
<input id="id_name" class="form-control" v-model="recipe_data.name">
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<div class="form-group">
|
||||||
|
<label for="id_description">{% trans 'Recipe Description' %}</label>
|
||||||
|
<textarea id="id_description" class="form-control" v-model="recipe_data.description"
|
||||||
|
rows="4"></textarea>
|
||||||
|
</div>
|
||||||
|
|
||||||
<div class="row">
|
<div class="row">
|
||||||
<div class="col col-md-6" v-if="recipe_data.image !== ''">
|
<div class="col col-md-6" v-if="recipe_data.image !== ''">
|
||||||
<img v-bind:src="recipe_data.image" alt="{% trans 'Recipe Image' %}"
|
<img v-bind:src="recipe_data.image" alt="{% trans 'Recipe Image' %}"
|
||||||
|
@ -9,7 +9,7 @@ from annoying.functions import get_object_or_None
|
|||||||
from django.contrib import messages
|
from django.contrib import messages
|
||||||
from django.contrib.auth.models import User
|
from django.contrib.auth.models import User
|
||||||
from django.core import management
|
from django.core import management
|
||||||
from django.core.exceptions import FieldError
|
from django.core.exceptions import FieldError, ValidationError
|
||||||
from django.core.files import File
|
from django.core.files import File
|
||||||
from django.db.models import Q
|
from django.db.models import Q
|
||||||
from django.http import FileResponse, HttpResponse, JsonResponse
|
from django.http import FileResponse, HttpResponse, JsonResponse
|
||||||
@ -32,7 +32,7 @@ from cookbook.helper.permission_helper import (CustomIsAdmin, CustomIsGuest,
|
|||||||
CustomIsOwner, CustomIsShare,
|
CustomIsOwner, CustomIsShare,
|
||||||
CustomIsShared, CustomIsUser,
|
CustomIsShared, CustomIsUser,
|
||||||
group_required)
|
group_required)
|
||||||
from cookbook.helper.recipe_url_import import get_from_html
|
from cookbook.helper.recipe_url_import import get_from_html, get_from_scraper
|
||||||
from cookbook.models import (CookLog, Food, Ingredient, Keyword, MealPlan,
|
from cookbook.models import (CookLog, Food, Ingredient, Keyword, MealPlan,
|
||||||
MealType, Recipe, RecipeBook, ShoppingList,
|
MealType, Recipe, RecipeBook, ShoppingList,
|
||||||
ShoppingListEntry, ShoppingListRecipe, Step,
|
ShoppingListEntry, ShoppingListRecipe, Step,
|
||||||
@ -54,6 +54,7 @@ from cookbook.serializer import (FoodSerializer, IngredientSerializer,
|
|||||||
UserNameSerializer, UserPreferenceSerializer,
|
UserNameSerializer, UserPreferenceSerializer,
|
||||||
ViewLogSerializer, CookLogSerializer, RecipeBookEntrySerializer, RecipeOverviewSerializer, SupermarketSerializer)
|
ViewLogSerializer, CookLogSerializer, RecipeBookEntrySerializer, RecipeOverviewSerializer, SupermarketSerializer)
|
||||||
from recipes.settings import DEMO
|
from recipes.settings import DEMO
|
||||||
|
from recipe_scrapers import scrape_me, WebsiteNotImplementedError, NoSchemaFoundInWildMode
|
||||||
|
|
||||||
|
|
||||||
class StandardFilterMixin(ViewSetMixin):
|
class StandardFilterMixin(ViewSetMixin):
|
||||||
@ -498,6 +499,33 @@ def get_plan_ical(request, from_date, to_date):
|
|||||||
def recipe_from_url(request):
|
def recipe_from_url(request):
|
||||||
url = request.POST['url']
|
url = request.POST['url']
|
||||||
|
|
||||||
|
try:
|
||||||
|
scrape = scrape_me(url)
|
||||||
|
except WebsiteNotImplementedError:
|
||||||
|
try:
|
||||||
|
scrape = scrape_me(url, wild_mode=True)
|
||||||
|
except NoSchemaFoundInWildMode:
|
||||||
|
return JsonResponse(
|
||||||
|
{
|
||||||
|
'error': True,
|
||||||
|
'msg': _('The requested site provided malformed data and cannot be read.') # noqa: E501
|
||||||
|
},
|
||||||
|
status=400)
|
||||||
|
except ConnectionError:
|
||||||
|
return JsonResponse(
|
||||||
|
{
|
||||||
|
'error': True,
|
||||||
|
'msg': _('The requested page could not be found.')
|
||||||
|
},
|
||||||
|
status=400
|
||||||
|
)
|
||||||
|
return JsonResponse(get_from_scraper(scrape))
|
||||||
|
|
||||||
|
|
||||||
|
@group_required('user')
|
||||||
|
def recipe_from_url_old(request):
|
||||||
|
url = request.POST['url']
|
||||||
|
|
||||||
headers = {
|
headers = {
|
||||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36' # noqa: E501
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36' # noqa: E501
|
||||||
}
|
}
|
||||||
|
@ -31,3 +31,4 @@ Jinja2==2.11.3
|
|||||||
django-webpack-loader==0.7.0
|
django-webpack-loader==0.7.0
|
||||||
django-js-reverse==0.9.1
|
django-js-reverse==0.9.1
|
||||||
django-allauth==0.44.0
|
django-allauth==0.44.0
|
||||||
|
recipe-scrapers==12.2.0
|
||||||
|
Loading…
Reference in New Issue
Block a user