#1552 use instructions_list() instead of instructions() and move Thermomix-Handling to clean_instruction_string()
This commit is contained in:
parent
5a0f07a6b2
commit
54d0b70f01
@ -138,7 +138,7 @@ def get_from_scraper(scrape, request):
|
|||||||
|
|
||||||
recipe_json['steps'] = []
|
recipe_json['steps'] = []
|
||||||
try:
|
try:
|
||||||
for i in parse_instructions(scrape.instructions()):
|
for i in parse_instructions(scrape.instructions_list()):
|
||||||
recipe_json['steps'].append({'instruction': i, 'ingredients': [], })
|
recipe_json['steps'].append({'instruction': i, 'ingredients': [], })
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
@ -248,6 +248,22 @@ def clean_instruction_string(instruction):
|
|||||||
normalized_string = normalize_string(instruction)
|
normalized_string = normalize_string(instruction)
|
||||||
normalized_string = normalized_string.replace('\n', ' \n')
|
normalized_string = normalized_string.replace('\n', ' \n')
|
||||||
normalized_string = normalized_string.replace(' \n \n', '\n\n')
|
normalized_string = normalized_string.replace(' \n \n', '\n\n')
|
||||||
|
|
||||||
|
# handle unsupported, special UTF8 character in Thermomix-specific instructions,
|
||||||
|
# that happen in nearly every receipe on Cookidoo, Zaubertopf Club, Rezeptwelt
|
||||||
|
# and in thermomix-spefici recipes on many other sites
|
||||||
|
return normalized_string \
|
||||||
|
.replace("<nobr>", "**") \
|
||||||
|
.replace("</nobr>", "**") \
|
||||||
|
.replace("", _('Linkslauf')) \
|
||||||
|
.replace("", _('Kochlöffel')) \
|
||||||
|
.replace("", _('Kneten')) \
|
||||||
|
.replace("Andicken ", _('Andicken')) \
|
||||||
|
.replace("Erwärmen ", _('Erwärmen')) \
|
||||||
|
.replace("Fermentieren ", _('Fermentieren')) \
|
||||||
|
.replace("Rühraufsatz einsetzen", "**Rühraufsatz einsetzen**") \
|
||||||
|
.replace("Rühraufsatz entfernen", "**Rühraufsatz entfernen**")
|
||||||
|
|
||||||
return normalized_string
|
return normalized_string
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,62 +0,0 @@
|
|||||||
from recipe_scrapers._abstract import AbstractScraper
|
|
||||||
from gettext import gettext as _
|
|
||||||
|
|
||||||
|
|
||||||
class Cookidoo(AbstractScraper):
|
|
||||||
|
|
||||||
def normalize_instruction(self, instruction):
|
|
||||||
if instruction is None:
|
|
||||||
return ""
|
|
||||||
# handle Thermomix-specific instructions that happen in nearly every receipe on Cookidoo
|
|
||||||
return instruction \
|
|
||||||
.replace("<nobr>", "**") \
|
|
||||||
.replace("</nobr>", "**") \
|
|
||||||
.replace("", _('Linkslauf')) \
|
|
||||||
.replace("", _('Kochlöffel')) \
|
|
||||||
.replace("", _('Kneten')) \
|
|
||||||
.replace("Andicken ", _('Andicken')) \
|
|
||||||
.replace("Erwärmen ", _('Erwärmen')) \
|
|
||||||
.replace("Fermentieren ", _('Fermentieren')) \
|
|
||||||
.replace("Rühraufsatz einsetzen", "**Rühraufsatz einsetzen**") \
|
|
||||||
.replace("Rühraufsatz entfernen", "**Rühraufsatz entfernen**")
|
|
||||||
|
|
||||||
def instructions(self):
|
|
||||||
instructions = self.schema.data.get("recipeInstructions") or ""
|
|
||||||
|
|
||||||
if isinstance(instructions, list):
|
|
||||||
instructions_gist = []
|
|
||||||
for schema_instruction_item in instructions:
|
|
||||||
# combine lists of instructions per section into a flat list
|
|
||||||
instructions_gist += self.extract_instructions_text(schema_instruction_item, "",)
|
|
||||||
|
|
||||||
steps = []
|
|
||||||
for instruction in instructions_gist:
|
|
||||||
steps.append(self.normalize_instruction(instruction))
|
|
||||||
|
|
||||||
return steps
|
|
||||||
|
|
||||||
return instructions
|
|
||||||
|
|
||||||
def extract_instructions_text(self, schema_item, prefix):
|
|
||||||
instructions_gist = []
|
|
||||||
if type(schema_item) is str:
|
|
||||||
instructions_gist.append(prefix + schema_item)
|
|
||||||
elif schema_item.get("@type") == "HowToStep":
|
|
||||||
# steps make up simple recipes or a section of a more complex recipe
|
|
||||||
if schema_item.get("name", False):
|
|
||||||
# name may be the text in full or truncated
|
|
||||||
if not schema_item.get("text").startswith(
|
|
||||||
schema_item.get("name").rstrip(".")
|
|
||||||
):
|
|
||||||
instructions_gist.append(schema_item.get("name"))
|
|
||||||
instructions_gist.append(schema_item.get("text"))
|
|
||||||
elif schema_item.get("@type") == "HowToSection":
|
|
||||||
# complex recipes are made up of named sections that are made up of steps
|
|
||||||
section_name = schema_item.get("name") or schema_item.get("Name") or _("Instructions")
|
|
||||||
instructions_gist.append("**" + section_name + "**")
|
|
||||||
for item in schema_item.get("itemListElement"):
|
|
||||||
instructions_gist += self.extract_instructions_text(item, "#" + prefix)
|
|
||||||
return instructions_gist
|
|
||||||
|
|
||||||
def ingredients(self):
|
|
||||||
return self.schema.ingredients()
|
|
@ -6,15 +6,11 @@ from recipe_scrapers._factory import SchemaScraperFactory
|
|||||||
from recipe_scrapers._schemaorg import SchemaOrg
|
from recipe_scrapers._schemaorg import SchemaOrg
|
||||||
|
|
||||||
from .cooksillustrated import CooksIllustrated
|
from .cooksillustrated import CooksIllustrated
|
||||||
from .cookidoo import Cookidoo
|
|
||||||
|
|
||||||
CUSTOM_SCRAPERS = {
|
CUSTOM_SCRAPERS = {
|
||||||
CooksIllustrated.host(site="cooksillustrated"): CooksIllustrated,
|
CooksIllustrated.host(site="cooksillustrated"): CooksIllustrated,
|
||||||
CooksIllustrated.host(site="americastestkitchen"): CooksIllustrated,
|
CooksIllustrated.host(site="americastestkitchen"): CooksIllustrated,
|
||||||
CooksIllustrated.host(site="cookscountry"): CooksIllustrated,
|
CooksIllustrated.host(site="cookscountry"): CooksIllustrated,
|
||||||
"cookidoo.de": Cookidoo,
|
|
||||||
"cookidoo.at": Cookidoo,
|
|
||||||
"cookidoo.ch": Cookidoo,
|
|
||||||
}
|
}
|
||||||
SCRAPERS.update(CUSTOM_SCRAPERS)
|
SCRAPERS.update(CUSTOM_SCRAPERS)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user