diff --git a/cookbook/helper/recipe_html_import.py b/cookbook/helper/recipe_html_import.py
index 7fa7beaf..f78ccb79 100644
--- a/cookbook/helper/recipe_html_import.py
+++ b/cookbook/helper/recipe_html_import.py
@@ -9,6 +9,8 @@ from recipe_scrapers._utils import get_host_name, normalize_string
from cookbook.helper import recipe_url_import as helper
from cookbook.helper.scrapers.scrapers import text_scraper
+from recipe_scrapers import scrape_me
+from recipe_scrapers._exceptions import NoSchemaFoundInWildMode
def get_recipe_from_source(text, url, request):
@@ -63,34 +65,41 @@ def get_recipe_from_source(text, url, request):
html_data = []
images = []
text = unquote(text)
+ scrape = None
- try:
- parse_list.append(remove_graph(json.loads(text)))
- if not url and 'url' in parse_list[0]:
- url = parse_list[0]['url']
- scrape = text_scraper("", url=url)
+ if url:
+ try:
+ scrape = scrape_me(url_path=url, wild_mode=True)
+ except(NoSchemaFoundInWildMode):
+ pass
+ if not scrape:
+ try:
+ parse_list.append(remove_graph(json.loads(text)))
+ if not url and 'url' in parse_list[0]:
+ url = parse_list[0]['url']
+ scrape = text_scraper("", url=url)
- except JSONDecodeError:
- soup = BeautifulSoup(text, "html.parser")
- html_data = get_from_html(soup)
- images += get_images_from_source(soup, url)
- for el in soup.find_all('script', type='application/ld+json'):
- el = remove_graph(el)
- if not url and 'url' in el:
- url = el['url']
- if type(el) == list:
- for le in el:
- parse_list.append(le)
- elif type(el) == dict:
- parse_list.append(el)
- for el in soup.find_all(type='application/json'):
- el = remove_graph(el)
- if type(el) == list:
- for le in el:
- parse_list.append(le)
- elif type(el) == dict:
- parse_list.append(el)
- scrape = text_scraper(text, url=url)
+ except JSONDecodeError:
+ soup = BeautifulSoup(text, "html.parser")
+ html_data = get_from_html(soup)
+ images += get_images_from_source(soup, url)
+ for el in soup.find_all('script', type='application/ld+json'):
+ el = remove_graph(el)
+ if not url and 'url' in el:
+ url = el['url']
+ if type(el) == list:
+ for le in el:
+ parse_list.append(le)
+ elif type(el) == dict:
+ parse_list.append(el)
+ for el in soup.find_all(type='application/json'):
+ el = remove_graph(el)
+ if type(el) == list:
+ for le in el:
+ parse_list.append(le)
+ elif type(el) == dict:
+ parse_list.append(el)
+ scrape = text_scraper(text, url=url)
recipe_json = helper.get_from_scraper(scrape, request)
diff --git a/cookbook/helper/recipe_url_import.py b/cookbook/helper/recipe_url_import.py
index 9ad34528..0d8688b2 100644
--- a/cookbook/helper/recipe_url_import.py
+++ b/cookbook/helper/recipe_url_import.py
@@ -114,7 +114,14 @@ def get_from_scraper(scrape, request):
except Exception:
pass
- if source_url := scrape.url:
+ try:
+ source_url = scrape.canonical_url()
+ except Exception:
+ try:
+ source_url = scrape.url
+ except Exception:
+ pass
+ if source_url:
recipe_json['source_url'] = source_url
try:
keywords.append(source_url.replace('http://', '').replace('https://', '').split('/')[0])
@@ -129,9 +136,11 @@ def get_from_scraper(scrape, request):
ingredient_parser = IngredientParser(request, True)
recipe_json['steps'] = []
-
- for i in parse_instructions(scrape.instructions()):
- recipe_json['steps'].append({'instruction': i, 'ingredients': [], })
+ try:
+ for i in parse_instructions(scrape.instructions()):
+ recipe_json['steps'].append({'instruction': i, 'ingredients': [], })
+ except Exception:
+ pass
if len(recipe_json['steps']) == 0:
recipe_json['steps'].append({'instruction': '', 'ingredients': [], })
diff --git a/cookbook/helper/scrapers/scrapers.py b/cookbook/helper/scrapers/scrapers.py
index 6d785a5e..eb93cc2c 100644
--- a/cookbook/helper/scrapers/scrapers.py
+++ b/cookbook/helper/scrapers/scrapers.py
@@ -1,6 +1,6 @@
from bs4 import BeautifulSoup
from json import JSONDecodeError
-from recipe_scrapers import SCRAPERS, get_host_name
+from recipe_scrapers import SCRAPERS
from recipe_scrapers._factory import SchemaScraperFactory
from recipe_scrapers._schemaorg import SchemaOrg
@@ -15,13 +15,7 @@ SCRAPERS.update(CUSTOM_SCRAPERS)
def text_scraper(text, url=None):
- domain = None
- if url:
- domain = get_host_name(url)
- if domain in SCRAPERS:
- scraper_class = SCRAPERS[domain]
- else:
- scraper_class = SchemaScraperFactory.SchemaScraper
+ scraper_class = SchemaScraperFactory.SchemaScraper
class TextScraper(scraper_class):
def __init__(