microdata import
This commit is contained in:
parent
71b41a9ca2
commit
f066b7097c
@ -1,59 +1,82 @@
|
|||||||
|
import re
|
||||||
|
|
||||||
from django.http import JsonResponse
|
from django.http import JsonResponse
|
||||||
|
|
||||||
from cookbook.models import Keyword
|
from cookbook.models import Keyword
|
||||||
|
|
||||||
|
|
||||||
def find_ld_json(ld_json):
|
def find_recipe_json(ld_json):
|
||||||
# recipes type might be wrapped in @graph type
|
ld_json['org'] = str(ld_json)
|
||||||
if '@graph' in ld_json:
|
|
||||||
for x in ld_json['@graph']:
|
|
||||||
if '@type' in x and x['@type'] == 'Recipe':
|
|
||||||
ld_json = x
|
|
||||||
|
|
||||||
if '@type' in ld_json and ld_json['@type'] == 'Recipe':
|
# some sites use ingredients instead of recipeIngredients
|
||||||
|
if 'recipeIngredient' not in ld_json and 'ingredients' in ld_json:
|
||||||
|
ld_json['recipeIngredient'] = ld_json['ingredients']
|
||||||
|
|
||||||
if 'recipeIngredient' in ld_json:
|
if 'recipeIngredient' in ld_json:
|
||||||
ingredients = []
|
# some pages have comma separated ingredients in a single array entry
|
||||||
|
if len(ld_json['recipeIngredient']) == 1 and len(ld_json['recipeIngredient'][0]) > 30:
|
||||||
|
ld_json['recipeIngredient'] = ld_json['recipeIngredient'][0].split(',')
|
||||||
|
|
||||||
for x in ld_json['recipeIngredient']:
|
ingredients = []
|
||||||
ingredient_split = x.split()
|
|
||||||
if len(ingredient_split) > 2:
|
|
||||||
ingredients.append({'amount': ingredient_split[0], 'unit': ingredient_split[1], 'ingredient': " ".join(ingredient_split[2:])})
|
|
||||||
if len(ingredient_split) == 2:
|
|
||||||
ingredients.append({'amount': ingredient_split[0], 'unit': '', 'ingredient': " ".join(ingredient_split[1:])})
|
|
||||||
if len(ingredient_split) == 1:
|
|
||||||
ingredients.append({'amount': 0, 'unit': '', 'ingredient': " ".join(ingredient_split)})
|
|
||||||
|
|
||||||
ld_json['recipeIngredient'] = ingredients
|
for x in ld_json['recipeIngredient']:
|
||||||
|
ingredient_split = x.split()
|
||||||
|
if len(ingredient_split) > 2:
|
||||||
|
ingredients.append({'amount': ingredient_split[0], 'unit': ingredient_split[1], 'ingredient': " ".join(ingredient_split[2:])})
|
||||||
|
if len(ingredient_split) == 2:
|
||||||
|
ingredients.append({'amount': ingredient_split[0], 'unit': '', 'ingredient': " ".join(ingredient_split[1:])})
|
||||||
|
if len(ingredient_split) == 1:
|
||||||
|
ingredients.append({'amount': 0, 'unit': '', 'ingredient': " ".join(ingredient_split)})
|
||||||
|
|
||||||
if 'keywords' in ld_json:
|
ld_json['recipeIngredient'] = ingredients
|
||||||
keywords = []
|
|
||||||
if type(ld_json['keywords']) == str:
|
|
||||||
ld_json['keywords'] = ld_json['keywords'].split(',')
|
|
||||||
|
|
||||||
for kw in ld_json['keywords']:
|
if 'keywords' in ld_json:
|
||||||
if k := Keyword.objects.filter(name=kw).first():
|
keywords = []
|
||||||
keywords.append({'id': str(k.id), 'text': str(k).strip()})
|
|
||||||
|
# keywords as string
|
||||||
|
if type(ld_json['keywords']) == str:
|
||||||
|
ld_json['keywords'] = ld_json['keywords'].split(',')
|
||||||
|
|
||||||
|
# keywords as string in list
|
||||||
|
if type(ld_json['keywords']) == list and len(ld_json['keywords']) == 1 and ',' in ld_json['keywords'][0]:
|
||||||
|
ld_json['keywords'] = ld_json['keywords'][0].split(',')
|
||||||
|
|
||||||
|
# keywords as list
|
||||||
|
for kw in ld_json['keywords']:
|
||||||
|
if k := Keyword.objects.filter(name=kw).first():
|
||||||
|
keywords.append({'id': str(k.id), 'text': str(k).strip()})
|
||||||
|
else:
|
||||||
|
keywords.append({'id': "null", 'text': kw.strip()})
|
||||||
|
|
||||||
|
ld_json['keywords'] = keywords
|
||||||
|
|
||||||
|
if 'recipeInstructions' in ld_json:
|
||||||
|
instructions = ''
|
||||||
|
|
||||||
|
# flatten instructions if they are in a list
|
||||||
|
if type(ld_json['recipeInstructions']) == list:
|
||||||
|
for i in ld_json['recipeInstructions']:
|
||||||
|
if type(i) == str:
|
||||||
|
instructions += i
|
||||||
else:
|
else:
|
||||||
keywords.append({'id': "null", 'text': kw.strip()})
|
instructions += i['text'] + '\n\n'
|
||||||
|
ld_json['recipeInstructions'] = instructions
|
||||||
|
|
||||||
ld_json['keywords'] = keywords
|
ld_json['recipeInstructions'] = re.sub(r'\n\s*\n', '\n\n', ld_json['recipeInstructions'])
|
||||||
|
ld_json['recipeInstructions'] = re.sub(' +', ' ', ld_json['recipeInstructions'])
|
||||||
|
ld_json['recipeInstructions'] = ld_json['recipeInstructions'].replace('<p>', '')
|
||||||
|
ld_json['recipeInstructions'] = ld_json['recipeInstructions'].replace('</p>', '')
|
||||||
|
|
||||||
if 'recipeInstructions' in ld_json:
|
if 'image' in ld_json:
|
||||||
instructions = ''
|
# check if list of images is returned, take first if so
|
||||||
if type(ld_json['recipeInstructions']) == list:
|
if (type(ld_json['image'])) == list:
|
||||||
for i in ld_json['recipeInstructions']:
|
if type(ld_json['image'][0]) == str:
|
||||||
if type(i) == str:
|
ld_json['image'] = ld_json['image'][0]
|
||||||
instructions += i
|
elif 'url' in ld_json['image'][0]:
|
||||||
else:
|
ld_json['image'] = ld_json['image'][0]['url']
|
||||||
instructions += i['text'] + '\n\n'
|
|
||||||
ld_json['recipeInstructions'] = instructions
|
|
||||||
|
|
||||||
if 'image' in ld_json:
|
# ignore relative image paths
|
||||||
if (type(ld_json['image'])) == list:
|
if 'http' not in ld_json['image']:
|
||||||
if type(ld_json['image'][0]) == str:
|
ld_json['image'] = ''
|
||||||
ld_json['image'] = ld_json['image'][0]
|
|
||||||
elif 'url' in ld_json['image'][0]:
|
|
||||||
ld_json['image'] = ld_json['image'][0]['url']
|
|
||||||
|
|
||||||
return JsonResponse(ld_json)
|
return JsonResponse(ld_json)
|
||||||
|
@ -17,6 +17,13 @@
|
|||||||
|
|
||||||
<div id="app">
|
<div id="app">
|
||||||
|
|
||||||
|
https://www.inspirationforall.de/pudding-selber-machen-vanillepudding-schokopudding-rezept/<br/>
|
||||||
|
https://www.ichkoche.at/schokopudding-rezept-218012<br/>
|
||||||
|
https://www.gutekueche.de/mamis-feiner-schokopudding-rezept-4274<br/>
|
||||||
|
https://www.maizena.at/rezepte/schokopudding/13534<br/>
|
||||||
|
https://kochkino.de/schokoladen-pudding/2159<br/>
|
||||||
|
https://www.oetker.de/rezepte/r/schokopudding-mit-vanille-herzen<br/>
|
||||||
|
|
||||||
<div class="row">
|
<div class="row">
|
||||||
<div class="col-md-12">
|
<div class="col-md-12">
|
||||||
<div class="input-group mb-3">
|
<div class="input-group mb-3">
|
||||||
@ -42,11 +49,11 @@
|
|||||||
|
|
||||||
|
|
||||||
<div class="row">
|
<div class="row">
|
||||||
<div class="col col-md-6">
|
<div class="col col-md-6" v-if="recipe_data.image !== ''">
|
||||||
<img v-bind:src="recipe_data.image" alt="{% trans 'Recipe Image' %}"
|
<img v-bind:src="recipe_data.image" alt="{% trans 'Recipe Image' %}"
|
||||||
class="img-fluid img-responsive img-rounded">
|
class="img-fluid img-responsive img-rounded">
|
||||||
</div>
|
</div>
|
||||||
<div>
|
<div class="col col-md-6">
|
||||||
<div class="form-group">
|
<div class="form-group">
|
||||||
<label for="id_prep_time">{% trans 'Preparation time ca.' %}</label>
|
<label for="id_prep_time">{% trans 'Preparation time ca.' %}</label>
|
||||||
<input id="id_prep_time" class="form-control" v-model="recipe_data.prepTime">
|
<input id="id_prep_time" class="form-control" v-model="recipe_data.prepTime">
|
||||||
@ -109,8 +116,10 @@
|
|||||||
|
|
||||||
|
|
||||||
</form>
|
</form>
|
||||||
|
|
||||||
|
[[recipe_data]]
|
||||||
</template>
|
</template>
|
||||||
[[recipe_data]]
|
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
|
||||||
@ -131,14 +140,6 @@
|
|||||||
|
|
||||||
Vue.component('vue-multiselect', window.VueMultiselect.default)
|
Vue.component('vue-multiselect', window.VueMultiselect.default)
|
||||||
|
|
||||||
// micro data examples
|
|
||||||
// https://www.inspirationforall.de/pudding-selber-machen-vanillepudding-schokopudding-rezept/
|
|
||||||
// https://www.ichkoche.at/schokopudding-rezept-218012
|
|
||||||
// https://www.gutekueche.de/mamis-feiner-schokopudding-rezept-4274
|
|
||||||
// https://www.maizena.at/rezepte/schokopudding/13534
|
|
||||||
// https://kochkino.de/schokoladen-pudding/2159
|
|
||||||
// https://www.oetker.de/rezepte/r/schokopudding-mit-vanille-herzen
|
|
||||||
|
|
||||||
let app = new Vue({
|
let app = new Vue({
|
||||||
components: {
|
components: {
|
||||||
Multiselect: window.VueMultiselect.default
|
Multiselect: window.VueMultiselect.default
|
||||||
|
@ -19,7 +19,7 @@ from rest_framework.exceptions import APIException
|
|||||||
from rest_framework.mixins import RetrieveModelMixin, UpdateModelMixin, ListModelMixin
|
from rest_framework.mixins import RetrieveModelMixin, UpdateModelMixin, ListModelMixin
|
||||||
|
|
||||||
from cookbook.helper.permission_helper import group_required, CustomIsOwner, CustomIsAdmin
|
from cookbook.helper.permission_helper import group_required, CustomIsOwner, CustomIsAdmin
|
||||||
from cookbook.helper.recipe_url_import import find_ld_json
|
from cookbook.helper.recipe_url_import import find_recipe_json
|
||||||
from cookbook.models import Recipe, Sync, Storage, CookLog, MealPlan, MealType, ViewLog, UserPreference, RecipeBook, Keyword
|
from cookbook.models import Recipe, Sync, Storage, CookLog, MealPlan, MealType, ViewLog, UserPreference, RecipeBook, Keyword
|
||||||
from cookbook.provider.dropbox import Dropbox
|
from cookbook.provider.dropbox import Dropbox
|
||||||
from cookbook.provider.nextcloud import Nextcloud
|
from cookbook.provider.nextcloud import Nextcloud
|
||||||
@ -260,13 +260,25 @@ def recipe_from_url(request, url):
|
|||||||
|
|
||||||
# first try finding ld+json as its most common
|
# first try finding ld+json as its most common
|
||||||
for ld in soup.find_all('script', type='application/ld+json'):
|
for ld in soup.find_all('script', type='application/ld+json'):
|
||||||
if (r := find_ld_json(json.loads(ld.string))) is not None:
|
ld_json = json.loads(ld.string)
|
||||||
return r
|
if type(ld_json) != list:
|
||||||
|
ld_json = [ld_json]
|
||||||
|
|
||||||
|
for ld_json_item in ld_json:
|
||||||
|
# recipes type might be wrapped in @graph type
|
||||||
|
if '@graph' in ld_json_item:
|
||||||
|
for x in ld_json_item['@graph']:
|
||||||
|
if '@type' in x and x['@type'] == 'Recipe':
|
||||||
|
ld_json_item = x
|
||||||
|
|
||||||
|
if '@type' in ld_json_item and ld_json_item['@type'] == 'Recipe':
|
||||||
|
return find_recipe_json(ld_json_item)
|
||||||
|
|
||||||
# now try to find microdata
|
# now try to find microdata
|
||||||
items = microdata.get_items(response)
|
items = microdata.get_items(response.text)
|
||||||
for i in items:
|
for i in items:
|
||||||
js = i.json()
|
md_json = json.loads(i.json())
|
||||||
print('hi')
|
if 'schema.org/Recipe' in str(md_json['type']):
|
||||||
|
return find_recipe_json(md_json['properties'])
|
||||||
|
|
||||||
return JsonResponse({'error': _('The requested site does not provide any recognized data format to import the recipe from.')})
|
return JsonResponse({'error': _('The requested site does not provide any recognized data format to import the recipe from.')})
|
||||||
|
Loading…
Reference in New Issue
Block a user