diff --git a/cookbook/helper/recipe_html_import.py b/cookbook/helper/recipe_html_import.py
index d2264721..6df0ce1a 100644
--- a/cookbook/helper/recipe_html_import.py
+++ b/cookbook/helper/recipe_html_import.py
@@ -5,13 +5,10 @@ from bs4 import BeautifulSoup
from bs4.element import Tag
from cookbook.helper import recipe_url_import as helper
from cookbook.helper.scrapers.scrapers import text_scraper
+from json import JSONDecodeError
from recipe_scrapers._utils import get_host_name, normalize_string
-from bs4 import BeautifulSoup
-from json import JSONDecodeError
-from json.decoder import JSONDecodeError
-
def get_recipe_from_source(text, url, space):
def build_node(k, v):
if isinstance(v, dict):
@@ -78,11 +75,9 @@ def get_recipe_from_source(text, url, space):
text = normalize_string(text)
try:
parse_list.append(remove_graph(json.loads(text)))
- scrape = text_scraper("")
-
+
except JSONDecodeError:
soup = BeautifulSoup(text, "html.parser")
- scrape = text_scraper(text)
html_data = get_from_html(soup)
images += get_images_from_source(soup, url)
for el in soup.find_all('script', type='application/ld+json'):
@@ -94,7 +89,11 @@ def get_recipe_from_source(text, url, space):
if not url and len(parse_list) > 0:
if 'url' in parse_list[0]:
url = parse_list[0]['url']
-
+
+ if type(text) == dict:
+ scrape = text_scraper("", url=url)
+ elif type(text) == str:
+ scrape = text_scraper(text, url=url)
recipe_json = helper.get_from_scraper(scrape, space)
@@ -130,7 +129,6 @@ def get_recipe_from_source(text, url, space):
recipe_tree += [{'name': 'ld+json', 'children': temp_tree}]
else:
recipe_tree += [{'name': 'json', 'children': temp_tree}]
-
return recipe_json, recipe_tree, html_data, images
diff --git a/cookbook/helper/recipe_url_import.py b/cookbook/helper/recipe_url_import.py
index f8b25e07..7e0eade9 100644
--- a/cookbook/helper/recipe_url_import.py
+++ b/cookbook/helper/recipe_url_import.py
@@ -1,86 +1,14 @@
-import json
import random
import re
-from json import JSONDecodeError
from isodate import parse_duration as iso_parse_duration
from isodate.isoerror import ISO8601Error
-import microdata
-from bs4 import BeautifulSoup
from cookbook.helper.ingredient_parser import parse as parse_single_ingredient
from cookbook.models import Keyword
-from django.http import JsonResponse
from django.utils.dateparse import parse_duration
-from django.utils.translation import gettext as _
from recipe_scrapers._utils import get_minutes, normalize_string
-# def find_recipe_json(ld_json, url, space):
-# ld_json['name'] = parse_name(ld_json['name'])
-
-# # some sites use ingredients instead of recipeIngredients
-# if 'recipeIngredient' not in ld_json and 'ingredients' in ld_json:
-# ld_json['recipeIngredient'] = ld_json['ingredients']
-
-# if 'recipeIngredient' in ld_json:
-# ld_json['recipeIngredient'] = parse_ingredients(ld_json['recipeIngredient'])
-# else:
-# ld_json['recipeIngredient'] = ""
-
-# keywords = []
-# if 'keywords' in ld_json:
-# keywords += listify_keywords(ld_json['keywords'])
-# if 'recipeCategory' in ld_json:
-# keywords += listify_keywords(ld_json['recipeCategory'])
-# if 'recipeCuisine' in ld_json:
-# keywords += listify_keywords(ld_json['recipeCuisine'])
-# try:
-# ld_json['keywords'] = parse_keywords(list(set(map(str.casefold, keywords))), space)
-# except TypeError:
-# pass
-
-# if 'recipeInstructions' in ld_json:
-# ld_json['recipeInstructions'] = parse_instructions(ld_json['recipeInstructions'])
-# else:
-# ld_json['recipeInstructions'] = ""
-
-# if 'image' in ld_json:
-# ld_json['image'] = parse_image(ld_json['image'])
-# else:
-# ld_json['image'] = ""
-
-# if 'description' in ld_json:
-# ld_json['description'] = normalize_string(ld_json['description'])
-# else:
-# ld_json['description'] = ""
-
-# if 'cookTime' in ld_json:
-# ld_json['cookTime'] = parse_cooktime(ld_json['cookTime'])
-# else:
-# ld_json['cookTime'] = 0
-
-# if 'prepTime' in ld_json:
-# ld_json['prepTime'] = parse_cooktime(ld_json['prepTime'])
-# else:
-# ld_json['prepTime'] = 0
-
-# if 'servings' in ld_json:
-# ld_json['servings'] = parse_servings(ld_json['servings'])
-# elif 'recipeYield' in ld_json:
-# ld_json['servings'] = parse_servings(ld_json['recipeYield'])
-# else:
-# ld_json['servings'] = 1
-
-# for key in list(ld_json):
-# if key not in [
-# 'prepTime', 'cookTime', 'image', 'recipeInstructions',
-# 'keywords', 'name', 'recipeIngredient', 'servings', 'description'
-# ]:
-# ld_json.pop(key, None)
-
-# return ld_json
-
-
def get_from_scraper(scrape, space):
# converting the scrape_me object to the existing json format based on ld+json
@@ -89,12 +17,9 @@ def get_from_scraper(scrape, space):
try:
description = scrape.schema.data.get("description") or ''
- recipe_json['prepTime'] = get_minutes(scrape.schema.data.get("prepTime")) or 0
- recipe_json['cookTime'] = get_minutes(scrape.schema.data.get("cookTime")) or 0
+
except AttributeError:
description = ''
- recipe_json['prepTime'] = 0
- recipe_json['cookTime'] = 0
recipe_json['description'] = normalize_string(description)
@@ -105,9 +30,11 @@ def get_from_scraper(scrape, space):
servings = 1
recipe_json['servings'] = servings
+ recipe_json['prepTime'] = get_minutes(scrape.schema.data.get("prepTime")) or 0
+ recipe_json['cookTime'] = get_minutes(scrape.schema.data.get("cookTime")) or 0
if recipe_json['cookTime'] + recipe_json['prepTime'] == 0:
try:
- recipe_json['prepTime'] = scrape.total_time()
+ recipe_json['prepTime'] = get_minutes(scrape.total_time()) or 0
except AttributeError:
pass
diff --git a/cookbook/helper/scrapers/scrapers.py b/cookbook/helper/scrapers/scrapers.py
index baf4bf9d..93e35b23 100644
--- a/cookbook/helper/scrapers/scrapers.py
+++ b/cookbook/helper/scrapers/scrapers.py
@@ -1,4 +1,5 @@
from bs4 import BeautifulSoup
+from json import JSONDecodeError
from recipe_scrapers import SCRAPERS, get_domain, _exception_handling
from recipe_scrapers._factory import SchemaScraperFactory
from recipe_scrapers._schemaorg import SchemaOrg
@@ -8,9 +9,9 @@ from .cooksillustrated import CooksIllustrated
CUSTOM_SCRAPERS = {
CooksIllustrated.host(): CooksIllustrated,
}
+SCRAPERS.update(CUSTOM_SCRAPERS)
+
-SCRAPERS = SCRAPERS.update(CUSTOM_SCRAPERS)
-#%%
def text_scraper(text, url=None):
domain = None
if url:
@@ -19,7 +20,7 @@ def text_scraper(text, url=None):
scraper_class = SCRAPERS[domain]
else:
scraper_class = SchemaScraperFactory.SchemaScraper
-
+
class TextScraper(scraper_class):
def __init__(
self,
@@ -31,11 +32,10 @@ def text_scraper(text, url=None):
self.meta_http_equiv = False
self.soup = BeautifulSoup(page_data, "html.parser")
self.url = url
+ self.recipe = None
try:
self.schema = SchemaOrg(page_data)
except JSONDecodeError:
pass
return TextScraper(text, url)
-
-# %%