Merge pull request #1564 from smilerz/retain_original_ingredient_text

retain original text when parsing ingredients
2022-02-25 16:17:44 +01:00
parent 425a38f030 e52054e732
commit fc6268b7ff
27 changed files with 294 additions and 313 deletions
--- a/cookbook/helper/recipe_url_import.py
+++ b/cookbook/helper/recipe_url_import.py
@ -6,13 +6,12 @@ from unicodedata import decomposition
 from django.utils.dateparse import parse_duration
 from isodate import parse_duration as iso_parse_duration
 from isodate.isoerror import ISO8601Error
+from recipe_scrapers._utils import get_minutes

 from cookbook.helper import recipe_url_import as helper
 from cookbook.helper.ingredient_parser import IngredientParser
 from cookbook.models import Keyword

-# from recipe_scrapers._utils import get_minutes  ## temporary until/unless upstream incorporates get_minutes() PR
-

 def get_from_scraper(scrape, request):
    # converting the scrape_me object to the existing json format based on ld+json
@ -118,7 +117,7 @@ def get_from_scraper(scrape, request):
                            'id': random.randrange(10000, 99999)
                        },
                        'note': note,
-                        'original': x
+                        'original_text': x
                    }
                )
            except Exception:
@ -134,7 +133,7 @@ def get_from_scraper(scrape, request):
                            'id': random.randrange(10000, 99999)
                        },
                        'note': '',
-                        'original': x
+                        'original_text': x
                    }
                )
        recipe_json['recipeIngredient'] = ingredients
@ -200,7 +199,7 @@ def parse_ingredients(ingredients):
                                'id': random.randrange(10000, 99999)
                            },
                            'note': note,
-                            'original': x
+                            'original_text': x
                        }
                    )
            except Exception:
@ -216,7 +215,7 @@ def parse_ingredients(ingredients):
                            'id': random.randrange(10000, 99999)
                        },
                        'note': '',
-                        'original': x
+                        'original_text': x
                    }
                )

@ -367,55 +366,6 @@ def normalize_string(string):
    unescaped_string = unescaped_string.replace("\xa0", " ").replace("\t", " ").strip()
    return unescaped_string

-# TODO deprecate when merged into recipe_scapers
-
-
-def get_minutes(time_text):
-    if time_text is None:
-        return 0
-    TIME_REGEX = re.compile(
-        r"(\D*(?P<hours>\d*.?(\s\d)?\/?\d+)\s*(hours|hrs|hr|h|óra))?(\D*(?P<minutes>\d+)\s*(minutes|mins|min|m|perc))?",
-        re.IGNORECASE,
-    )
-    try:
-        return int(time_text)
-    except Exception:
-        pass
-
-    if time_text.startswith("P") and "T" in time_text:
-        time_text = time_text.split("T", 2)[1]
-    if "-" in time_text:
-        time_text = time_text.split("-", 2)[
-            1
-        ]  # sometimes formats are like this: '12-15 minutes'
-    if " to " in time_text:
-        time_text = time_text.split("to", 2)[
-            1
-        ]  # sometimes formats are like this: '12 to 15 minutes'
-
-    empty = ''
-    for x in time_text:
-        if 'fraction' in decomposition(x):
-            f = decomposition(x[-1:]).split()
-            empty += f" {f[1].replace('003', '')}/{f[3].replace('003', '')}"
-        else:
-            empty += x
-    time_text = empty
-    matched = TIME_REGEX.search(time_text)
-
-    minutes = int(matched.groupdict().get("minutes") or 0)
-
-    if "/" in (hours := matched.groupdict().get("hours") or ''):
-        number = hours.split(" ")
-        if len(number) == 2:
-            minutes += 60*int(number[0])
-        fraction = number[-1:][0].split("/")
-        minutes += 60 * float(int(fraction[0])/int(fraction[1]))
-    else:
-        minutes += 60 * float(hours)
-
-    return int(minutes)
-

 def iso_duration_to_minutes(string):
    match = re.match(