ensure time is always a number

2021-04-03 11:12:01 -05:00 · 2021-04-03 11:12:01 -05:00 · ec6a10ca0a
commit ec6a10ca0a
parent 3cf949bf8d
3 changed files with 16 additions and 91 deletions
--- a/cookbook/helper/recipe_html_import.py
+++ b/cookbook/helper/recipe_html_import.py
@ -5,13 +5,10 @@ from bs4 import BeautifulSoup
 from bs4.element import Tag
 from cookbook.helper import recipe_url_import as helper
 from cookbook.helper.scrapers.scrapers import text_scraper
 from json import JSONDecodeError
 from recipe_scrapers._utils import get_host_name, normalize_string
 from bs4 import BeautifulSoup
 from json import JSONDecodeError
 from json.decoder import JSONDecodeError
 def get_recipe_from_source(text, url, space):
    def build_node(k, v):
        if isinstance(v, dict):
@ -78,11 +75,9 @@ def get_recipe_from_source(text, url, space):
    text = normalize_string(text)
    try:
        parse_list.append(remove_graph(json.loads(text)))
        scrape = text_scraper("<script type='application/ld+json'>"+text+"</script>")
    except JSONDecodeError:
        soup = BeautifulSoup(text, "html.parser")
        scrape = text_scraper(text)
        html_data = get_from_html(soup)
        images += get_images_from_source(soup, url)
        for el in soup.find_all('script', type='application/ld+json'):
@ -95,6 +90,10 @@ def get_recipe_from_source(text, url, space):
        if 'url' in parse_list[0]:
            url = parse_list[0]['url']
    if type(text) == dict:
        scrape = text_scraper("<script type='application/ld+json'>" + text + "</script>", url=url)
    elif type(text) == str:
        scrape = text_scraper(text, url=url)
    recipe_json = helper.get_from_scraper(scrape, space)
@ -131,7 +130,6 @@ def get_recipe_from_source(text, url, space):
        else:
            recipe_tree += [{'name': 'json', 'children': temp_tree}]
    return recipe_json, recipe_tree, html_data, images
--- a/cookbook/helper/recipe_url_import.py
+++ b/cookbook/helper/recipe_url_import.py
@ -1,86 +1,14 @@
 import json
 import random
 import re
 from json import JSONDecodeError
 from isodate import parse_duration as iso_parse_duration
 from isodate.isoerror import ISO8601Error
 import microdata
 from bs4 import BeautifulSoup
 from cookbook.helper.ingredient_parser import parse as parse_single_ingredient
 from cookbook.models import Keyword
 from django.http import JsonResponse
 from django.utils.dateparse import parse_duration
 from django.utils.translation import gettext as _
 from recipe_scrapers._utils import get_minutes, normalize_string
 # def find_recipe_json(ld_json, url, space):
 #     ld_json['name'] = parse_name(ld_json['name'])
 #     # some sites use ingredients instead of recipeIngredients
 #     if 'recipeIngredient' not in ld_json and 'ingredients' in ld_json:
 #         ld_json['recipeIngredient'] = ld_json['ingredients']
 #     if 'recipeIngredient' in ld_json:
 #         ld_json['recipeIngredient'] = parse_ingredients(ld_json['recipeIngredient'])
 #     else:
 #         ld_json['recipeIngredient'] = ""
 #     keywords = []
 #     if 'keywords' in ld_json:
 #         keywords += listify_keywords(ld_json['keywords'])
 #     if 'recipeCategory' in ld_json:
 #         keywords += listify_keywords(ld_json['recipeCategory'])
 #     if 'recipeCuisine' in ld_json:
 #         keywords += listify_keywords(ld_json['recipeCuisine'])
 #     try:
 #         ld_json['keywords'] = parse_keywords(list(set(map(str.casefold, keywords))), space)
 #     except TypeError:
 #         pass
 #     if 'recipeInstructions' in ld_json:
 #         ld_json['recipeInstructions'] = parse_instructions(ld_json['recipeInstructions'])
 #     else:
 #         ld_json['recipeInstructions'] = ""
 #     if 'image' in ld_json:
 #         ld_json['image'] = parse_image(ld_json['image'])
 #     else:
 #         ld_json['image'] = ""
 #     if 'description' in ld_json:
 #         ld_json['description'] = normalize_string(ld_json['description'])
 #     else:
 #         ld_json['description'] = ""
 #     if 'cookTime' in ld_json:
 #         ld_json['cookTime'] = parse_cooktime(ld_json['cookTime'])
 #     else:
 #         ld_json['cookTime'] = 0
 #     if 'prepTime' in ld_json:
 #         ld_json['prepTime'] = parse_cooktime(ld_json['prepTime'])
 #     else:
 #         ld_json['prepTime'] = 0
 #     if 'servings' in ld_json:
 #         ld_json['servings'] = parse_servings(ld_json['servings'])
 #     elif 'recipeYield' in ld_json:
 #         ld_json['servings'] = parse_servings(ld_json['recipeYield'])
 #     else:
 #         ld_json['servings'] = 1
 #     for key in list(ld_json):
 #         if key not in [
 #             'prepTime', 'cookTime', 'image', 'recipeInstructions',
 #             'keywords', 'name', 'recipeIngredient', 'servings', 'description'
 #         ]:
 #             ld_json.pop(key, None)
 #     return ld_json
 def get_from_scraper(scrape, space):
    # converting the scrape_me object to the existing json format based on ld+json
@ -89,12 +17,9 @@ def get_from_scraper(scrape, space):
    try:
        description = scrape.schema.data.get("description") or ''
-        recipe_json['prepTime'] = get_minutes(scrape.schema.data.get("prepTime")) or 0
+
        recipe_json['cookTime'] = get_minutes(scrape.schema.data.get("cookTime")) or 0
    except AttributeError:
        description = ''
        recipe_json['prepTime'] = 0
        recipe_json['cookTime'] = 0
    recipe_json['description'] = normalize_string(description)
@ -105,9 +30,11 @@ def get_from_scraper(scrape, space):
        servings = 1
    recipe_json['servings'] = servings
    recipe_json['prepTime'] = get_minutes(scrape.schema.data.get("prepTime")) or 0
    recipe_json['cookTime'] = get_minutes(scrape.schema.data.get("cookTime")) or 0
    if recipe_json['cookTime'] + recipe_json['prepTime'] == 0:
        try:
-            recipe_json['prepTime'] = scrape.total_time()
+            recipe_json['prepTime'] = get_minutes(scrape.total_time()) or 0
        except AttributeError:
            pass
--- a/cookbook/helper/scrapers/scrapers.py
+++ b/cookbook/helper/scrapers/scrapers.py
@ -1,4 +1,5 @@
 from bs4 import BeautifulSoup
 from json import JSONDecodeError
 from recipe_scrapers import SCRAPERS, get_domain, _exception_handling
 from recipe_scrapers._factory import SchemaScraperFactory
 from recipe_scrapers._schemaorg import SchemaOrg
@ -8,9 +9,9 @@ from .cooksillustrated import CooksIllustrated
 CUSTOM_SCRAPERS = {
    CooksIllustrated.host(): CooksIllustrated,
 }
 SCRAPERS.update(CUSTOM_SCRAPERS)
 SCRAPERS = SCRAPERS.update(CUSTOM_SCRAPERS)
 #%%
 def text_scraper(text, url=None):
    domain = None
    if url:
@ -31,11 +32,10 @@ def text_scraper(text, url=None):
            self.meta_http_equiv = False
            self.soup = BeautifulSoup(page_data, "html.parser")
            self.url = url
            self.recipe = None
            try:
                self.schema = SchemaOrg(page_data)
            except JSONDecodeError:
                pass
    return TextScraper(text, url)
 # %%