diff --git a/cookbook/helper/recipe_url_import.py b/cookbook/helper/recipe_url_import.py index 3091023a..7c94a43e 100644 --- a/cookbook/helper/recipe_url_import.py +++ b/cookbook/helper/recipe_url_import.py @@ -138,7 +138,7 @@ def get_from_scraper(scrape, request): recipe_json['steps'] = [] try: - for i in parse_instructions(scrape.instructions()): + for i in parse_instructions(scrape.instructions_list()): recipe_json['steps'].append({'instruction': i, 'ingredients': [], }) except Exception: pass @@ -248,6 +248,22 @@ def clean_instruction_string(instruction): normalized_string = normalize_string(instruction) normalized_string = normalized_string.replace('\n', ' \n') normalized_string = normalized_string.replace(' \n \n', '\n\n') + + # handle unsupported, special UTF8 character in Thermomix-specific instructions, + # that happen in nearly every receipe on Cookidoo, Zaubertopf Club, Rezeptwelt + # and in thermomix-spefici recipes on many other sites + return normalized_string \ + .replace("", "**") \ + .replace("", "**") \ + .replace("", _('Linkslauf')) \ + .replace("", _('Kochlöffel')) \ + .replace("", _('Kneten')) \ + .replace("Andicken ", _('Andicken')) \ + .replace("Erwärmen ", _('Erwärmen')) \ + .replace("Fermentieren ", _('Fermentieren')) \ + .replace("Rühraufsatz einsetzen", "**Rühraufsatz einsetzen**") \ + .replace("Rühraufsatz entfernen", "**Rühraufsatz entfernen**") + return normalized_string diff --git a/cookbook/helper/scrapers/cookidoo.py b/cookbook/helper/scrapers/cookidoo.py deleted file mode 100644 index 99f1ea08..00000000 --- a/cookbook/helper/scrapers/cookidoo.py +++ /dev/null @@ -1,62 +0,0 @@ -from recipe_scrapers._abstract import AbstractScraper -from gettext import gettext as _ - - -class Cookidoo(AbstractScraper): - - def normalize_instruction(self, instruction): - if instruction is None: - return "" - # handle Thermomix-specific instructions that happen in nearly every receipe on Cookidoo - return instruction \ - .replace("", "**") \ - .replace("", "**") \ - .replace("", _('Linkslauf')) \ - .replace("", _('Kochlöffel')) \ - .replace("", _('Kneten')) \ - .replace("Andicken ", _('Andicken')) \ - .replace("Erwärmen ", _('Erwärmen')) \ - .replace("Fermentieren ", _('Fermentieren')) \ - .replace("Rühraufsatz einsetzen", "**Rühraufsatz einsetzen**") \ - .replace("Rühraufsatz entfernen", "**Rühraufsatz entfernen**") - - def instructions(self): - instructions = self.schema.data.get("recipeInstructions") or "" - - if isinstance(instructions, list): - instructions_gist = [] - for schema_instruction_item in instructions: - # combine lists of instructions per section into a flat list - instructions_gist += self.extract_instructions_text(schema_instruction_item, "",) - - steps = [] - for instruction in instructions_gist: - steps.append(self.normalize_instruction(instruction)) - - return steps - - return instructions - - def extract_instructions_text(self, schema_item, prefix): - instructions_gist = [] - if type(schema_item) is str: - instructions_gist.append(prefix + schema_item) - elif schema_item.get("@type") == "HowToStep": - # steps make up simple recipes or a section of a more complex recipe - if schema_item.get("name", False): - # name may be the text in full or truncated - if not schema_item.get("text").startswith( - schema_item.get("name").rstrip(".") - ): - instructions_gist.append(schema_item.get("name")) - instructions_gist.append(schema_item.get("text")) - elif schema_item.get("@type") == "HowToSection": - # complex recipes are made up of named sections that are made up of steps - section_name = schema_item.get("name") or schema_item.get("Name") or _("Instructions") - instructions_gist.append("**" + section_name + "**") - for item in schema_item.get("itemListElement"): - instructions_gist += self.extract_instructions_text(item, "#" + prefix) - return instructions_gist - - def ingredients(self): - return self.schema.ingredients() diff --git a/cookbook/helper/scrapers/scrapers.py b/cookbook/helper/scrapers/scrapers.py index 01dfa374..7d6c08b1 100644 --- a/cookbook/helper/scrapers/scrapers.py +++ b/cookbook/helper/scrapers/scrapers.py @@ -6,15 +6,11 @@ from recipe_scrapers._factory import SchemaScraperFactory from recipe_scrapers._schemaorg import SchemaOrg from .cooksillustrated import CooksIllustrated -from .cookidoo import Cookidoo CUSTOM_SCRAPERS = { CooksIllustrated.host(site="cooksillustrated"): CooksIllustrated, CooksIllustrated.host(site="americastestkitchen"): CooksIllustrated, CooksIllustrated.host(site="cookscountry"): CooksIllustrated, - "cookidoo.de": Cookidoo, - "cookidoo.at": Cookidoo, - "cookidoo.ch": Cookidoo, } SCRAPERS.update(CUSTOM_SCRAPERS)