commit 707d862e01a7497a1f22879d314b865a35e0e85b
Author: smilerz <smilerz@gmail.com>
Date: Wed Apr 14 10:35:00 2021 -0500
works now
commit 3942a445ed4f2ccec57de25eacd86ea4e4dd6bdb
Author: smilerz <smilerz@gmail.com>
Date: Wed Apr 14 10:25:24 2021 -0500
updated serializer and api
commit 10dc746eb175c7f805a8a8ffa7ce49977a7ce97e
Author: smilerz <smilerz@gmail.com>
Date: Wed Apr 14 10:20:19 2021 -0500
fixed bookmarklet
commit 9779104902d3be0258c95cd2eeebcba0d5d48892
Merge: bb8262c 0cb3928
Author: smilerz <smilerz@gmail.com>
Date: Wed Apr 14 09:56:27 2021 -0500
Merge branch 'bookmarklet' into json_import
commit 0cb39284bb835ffc6cfee3e4306aadc4a64a25be
Author: smilerz <smilerz@gmail.com>
Date: Wed Apr 14 09:42:53 2021 -0500
retrieve bookmarklet ID from get
commit e89e0218de684d40b2e2bfb6ba833891206c828e
Author: smilerz <smilerz@gmail.com>
Date: Wed Apr 14 09:29:33 2021 -0500
Revert "fixed broken tab"
This reverts commit ca0a1aede3cc6cb3912bc1fe30c0aa22e3f481a6.
commit bb8262ccabb93c56fbc18c407d5a0653b8b3ca79
Merge: b1e73aa 35a7f62
Author: smilerz <smilerz@gmail.com>
Date: Sun Apr 11 20:35:57 2021 -0500
Merge branch 'main_fork' into json_import
44 lines
1.3 KiB
Python
44 lines
1.3 KiB
Python
from bs4 import BeautifulSoup
|
|
from json import JSONDecodeError
|
|
from recipe_scrapers import SCRAPERS, get_domain, _exception_handling
|
|
from recipe_scrapers._factory import SchemaScraperFactory
|
|
from recipe_scrapers._schemaorg import SchemaOrg
|
|
|
|
from .cooksillustrated import CooksIllustrated
|
|
|
|
CUSTOM_SCRAPERS = {
|
|
CooksIllustrated.host(site="cooksillustrated"): CooksIllustrated,
|
|
CooksIllustrated.host(site="americastestkitchen"): CooksIllustrated,
|
|
CooksIllustrated.host(site="cookscountry"): CooksIllustrated,
|
|
}
|
|
SCRAPERS.update(CUSTOM_SCRAPERS)
|
|
|
|
|
|
def text_scraper(text, url=None):
|
|
domain = None
|
|
if url:
|
|
domain = get_host_name(url)
|
|
if domain in SCRAPERS:
|
|
scraper_class = SCRAPERS[domain]
|
|
else:
|
|
scraper_class = SchemaScraperFactory.SchemaScraper
|
|
|
|
class TextScraper(scraper_class):
|
|
def __init__(
|
|
self,
|
|
page_data,
|
|
url=None
|
|
):
|
|
self.wild_mode = False
|
|
self.exception_handling = None
|
|
self.meta_http_equiv = False
|
|
self.soup = BeautifulSoup(page_data, "html.parser")
|
|
self.url = url
|
|
self.recipe = None
|
|
try:
|
|
self.schema = SchemaOrg(page_data)
|
|
except JSONDecodeError:
|
|
pass
|
|
|
|
return TextScraper(text, url)
|