From f811f5996e0dd038efb3274a89b82d5a61a305d7 Mon Sep 17 00:00:00 2001
From: smilerz <smilerz@gmail.com>
Date: Thu, 1 Apr 2021 15:07:51 -0500
Subject: [PATCH] wrapper for recipe_scrapers to parse text input

---
 cookbook/helper/recipe_html_import.py | 37 ++++++++++++++++++++++++++-
 1 file changed, 36 insertions(+), 1 deletion(-)

diff --git a/cookbook/helper/recipe_html_import.py b/cookbook/helper/recipe_html_import.py
index 9140d076..ab34fdad 100644
--- a/cookbook/helper/recipe_html_import.py
+++ b/cookbook/helper/recipe_html_import.py
@@ -1,12 +1,17 @@
 import json
 import re
-from json.decoder import JSONDecodeError
 
 from bs4 import BeautifulSoup
 from bs4.element import Tag
 from cookbook.helper import recipe_url_import as helper
+from recipe_scrapers import SCRAPERS, get_domain, _exception_handling
 from recipe_scrapers._utils import get_host_name, normalize_string
+from recipe_scrapers._factory import SchemaScraperFactory
+from recipe_scrapers._schemaorg import SchemaOrg
 
+from bs4 import BeautifulSoup
+from json import JSONDecodeError
+from json.decoder import JSONDecodeError
 
 def get_recipe_from_source(text, url, space):
     def build_node(k, v):
@@ -165,6 +170,36 @@ def get_images_from_source(soup, url):
     return images
 
 
+def text_scraper(text, url=None):
+    domain = get_domain(url)
+    if domain in SCRAPERS:
+        scraper_class = SCRAPERS[domain]
+    else:
+        scraper_class = SchemaScraperFactory
+    
+    class TextScraper(scraper_class):
+        def __init__(
+            self,
+            page_data,
+            url=None
+        ):
+            self.wild_mode = False
+            self.exception_handling = _exception_handling
+            self.meta_http_equiv = False
+            self.soup = BeautifulSoup(page_data, "html.parser")
+            self.url = url
+            try:
+                self.schema = SchemaOrg(page_data)
+            except JSONDecodeError:
+                pass
+        
+        @classmethod
+        def generate(cls, page_data, url, **options):
+            return cls.TextScraper(page_data, url, **options)
+
+    return TextScraper.generate(text, url)
+
+
 def remove_graph(el):
     # recipes type might be wrapped in @graph type
     if isinstance(el, Tag):