From a531d135b5964e673d8c4af70564faa209f8498e Mon Sep 17 00:00:00 2001 From: smilerz Date: Wed, 14 Apr 2021 10:42:31 -0500 Subject: [PATCH] remove html tags from description --- cookbook/helper/recipe_url_import.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/cookbook/helper/recipe_url_import.py b/cookbook/helper/recipe_url_import.py index de1fb624..cd9e33a7 100644 --- a/cookbook/helper/recipe_url_import.py +++ b/cookbook/helper/recipe_url_import.py @@ -21,7 +21,7 @@ def get_from_scraper(scrape, space): except AttributeError: description = '' - recipe_json['description'] = normalize_string(description) + recipe_json['description'] = parse_description(description) try: servings = scrape.yields() @@ -181,6 +181,14 @@ def parse_ingredients(ingredients): return ingredients +def parse_description(description): + description = re.sub(r'\n\s*\n', '\n\n', description) + description = re.sub(' +', ' ', description) + description = re.sub('

', '\n', description) + description = re.sub('<[^<]+?>', '', description) + return normalize_string(description) + + def parse_instructions(instructions): instruction_text = ''