uk_bin_collection 0.152.0__py3-none-any.whl → 0.152.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. uk_bin_collection/tests/input.json +92 -58
  2. uk_bin_collection/uk_bin_collection/councils/AdurAndWorthingCouncils.py +69 -24
  3. uk_bin_collection/uk_bin_collection/councils/BexleyCouncil.py +24 -47
  4. uk_bin_collection/uk_bin_collection/councils/CharnwoodBoroughCouncil.py +14 -3
  5. uk_bin_collection/uk_bin_collection/councils/CheltenhamBoroughCouncil.py +12 -12
  6. uk_bin_collection/uk_bin_collection/councils/CheshireEastCouncil.py +24 -3
  7. uk_bin_collection/uk_bin_collection/councils/ChorleyCouncil.py +4 -0
  8. uk_bin_collection/uk_bin_collection/councils/CroydonCouncil.py +114 -261
  9. uk_bin_collection/uk_bin_collection/councils/DartfordBoroughCouncil.py +13 -0
  10. uk_bin_collection/uk_bin_collection/councils/DoverDistrictCouncil.py +17 -2
  11. uk_bin_collection/uk_bin_collection/councils/EastDevonDC.py +14 -1
  12. uk_bin_collection/uk_bin_collection/councils/EastbourneBoroughCouncil.py +76 -0
  13. uk_bin_collection/uk_bin_collection/councils/EastleighBoroughCouncil.py +59 -45
  14. uk_bin_collection/uk_bin_collection/councils/EnvironmentFirst.py +2 -0
  15. uk_bin_collection/uk_bin_collection/councils/EppingForestDistrictCouncil.py +47 -15
  16. uk_bin_collection/uk_bin_collection/councils/GlasgowCityCouncil.py +13 -1
  17. uk_bin_collection/uk_bin_collection/councils/GuildfordCouncil.py +2 -3
  18. uk_bin_collection/uk_bin_collection/councils/HerefordshireCouncil.py +13 -2
  19. uk_bin_collection/uk_bin_collection/councils/HuntingdonDistrictCouncil.py +18 -4
  20. uk_bin_collection/uk_bin_collection/councils/LewesDistrictCouncil.py +76 -0
  21. uk_bin_collection/uk_bin_collection/councils/LiverpoolCityCouncil.py +16 -4
  22. uk_bin_collection/uk_bin_collection/councils/MaidstoneBoroughCouncil.py +42 -47
  23. uk_bin_collection/uk_bin_collection/councils/NewhamCouncil.py +13 -6
  24. uk_bin_collection/uk_bin_collection/councils/NorthLincolnshireCouncil.py +2 -1
  25. uk_bin_collection/uk_bin_collection/councils/NorthSomersetCouncil.py +14 -9
  26. uk_bin_collection/uk_bin_collection/councils/NorthTynesideCouncil.py +2 -2
  27. uk_bin_collection/uk_bin_collection/councils/NorthumberlandCouncil.py +50 -14
  28. uk_bin_collection/uk_bin_collection/councils/SouthRibbleCouncil.py +115 -65
  29. uk_bin_collection/uk_bin_collection/councils/StokeOnTrentCityCouncil.py +10 -5
  30. uk_bin_collection/uk_bin_collection/councils/TorbayCouncil.py +1 -3
  31. uk_bin_collection/uk_bin_collection/councils/WakefieldCityCouncil.py +3 -0
  32. {uk_bin_collection-0.152.0.dist-info → uk_bin_collection-0.152.2.dist-info}/METADATA +179 -1
  33. {uk_bin_collection-0.152.0.dist-info → uk_bin_collection-0.152.2.dist-info}/RECORD +36 -34
  34. {uk_bin_collection-0.152.0.dist-info → uk_bin_collection-0.152.2.dist-info}/LICENSE +0 -0
  35. {uk_bin_collection-0.152.0.dist-info → uk_bin_collection-0.152.2.dist-info}/WHEEL +0 -0
  36. {uk_bin_collection-0.152.0.dist-info → uk_bin_collection-0.152.2.dist-info}/entry_points.txt +0 -0
@@ -1,4 +1,8 @@
1
1
  from bs4 import BeautifulSoup
2
+ from selenium.webdriver.common.by import By
3
+ from selenium.webdriver.support import expected_conditions as EC
4
+ from selenium.webdriver.support.ui import Select, WebDriverWait
5
+
2
6
  from uk_bin_collection.uk_bin_collection.common import *
3
7
  from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
4
8
 
@@ -12,59 +16,69 @@ class CouncilClass(AbstractGetBinDataClass):
12
16
  """
13
17
 
14
18
  def parse_data(self, page: str, **kwargs) -> dict:
15
- uprn = kwargs.get("uprn")
16
- # Check the UPRN is valid
17
- check_uprn(uprn)
19
+ try:
20
+ uprn = kwargs.get("uprn")
21
+ # Check the UPRN is valid
22
+ check_uprn(uprn)
23
+ headless = kwargs.get("headless")
24
+ web_driver = kwargs.get("web_driver")
25
+ url = f"https://www.eastleigh.gov.uk/waste-bins-and-recycling/collection-dates/your-waste-bin-and-recycling-collections?uprn={uprn}"
26
+ driver = create_webdriver(web_driver, headless, None, __name__)
27
+ driver.get(url)
18
28
 
19
- # Request URL
20
- url = f"https://www.eastleigh.gov.uk/waste-bins-and-recycling/collection-dates/your-waste-bin-and-recycling-collections?uprn={uprn}"
29
+ wait = WebDriverWait(driver, 10)
30
+ bin_content = wait.until(
31
+ EC.presence_of_element_located((By.CLASS_NAME, "dl-horizontal"))
32
+ )
21
33
 
22
- # Make Request
23
- requests.packages.urllib3.disable_warnings()
24
- page = requests.get(url)
34
+ # Make a BS4 object from driver's page source
35
+ soup = BeautifulSoup(driver.page_source, features="html.parser")
25
36
 
26
- # Make a BS4 object
27
- soup = BeautifulSoup(page.text, features="html.parser")
28
- soup.prettify()
37
+ # Data to return
38
+ data = {"bins": []}
29
39
 
30
- # Data to return
31
- data = {"bins": []}
40
+ # Valid bin types
41
+ binTypes = [
42
+ "Household Waste Bin",
43
+ "Recycling Bin",
44
+ "Food Waste Bin",
45
+ "Glass Box and Batteries",
46
+ "Garden Waste Bin",
47
+ ]
32
48
 
33
- # Valid bin types
34
- binTypes = [
35
- "Household Waste Bin",
36
- "Recycling Bin",
37
- "Food Waste Bin",
38
- "Glass Box and Batteries",
39
- "Garden Waste Bin",
40
- ]
49
+ # Value to create dict for DL values
50
+ keys, values = [], []
41
51
 
42
- # Value to create dict for DL values
43
- keys, values = [], []
52
+ # Loop though DT and DD for DL containing bins
53
+ dl = soup.find("dl", {"class": "dl-horizontal"})
54
+ for dt in dl.find_all("dt"):
55
+ keys.append(dt.text.strip())
56
+ for dd in dl.find_all("dd"):
57
+ values.append(dd.text.strip())
44
58
 
45
- # Loop though DT and DD for DL containing bins
46
- dl = soup.find("dl", {"class": "dl-horizontal"})
47
- for dt in dl.find_all("dt"):
48
- keys.append(dt.text.strip())
49
- for dd in dl.find_all("dd"):
50
- values.append(dd.text.strip())
59
+ # Create dict for bin name and string dates
60
+ binDict = dict(zip(keys, values))
51
61
 
52
- # Create dict for bin name and string dates
53
- binDict = dict(zip(keys, values))
62
+ # Process dict for valid bin types
63
+ for bin in list(binDict):
64
+ if bin in binTypes:
65
+ if not binDict[bin].startswith("You haven't yet signed up for"):
66
+ # Convert date
67
+ date = datetime.strptime(binDict[bin], "%a, %d %b %Y")
54
68
 
55
- # Process dict for valid bin types
56
- for bin in list(binDict):
57
- if bin in binTypes:
58
- if not binDict[bin].startswith("You haven't yet signed up for"):
59
- # Convert date
60
- date = datetime.strptime(binDict[bin], "%a, %d %b %Y")
69
+ # Set bin data
70
+ dict_data = {
71
+ "type": bin,
72
+ "collectionDate": date.strftime(date_format),
73
+ }
74
+ data["bins"].append(dict_data)
61
75
 
62
- # Set bin data
63
- dict_data = {
64
- "type": bin,
65
- "collectionDate": date.strftime(date_format),
66
- }
67
- data["bins"].append(dict_data)
76
+ # Return bin data
77
+ return data
68
78
 
69
- # Return bin data
70
- return data
79
+ except Exception as e:
80
+ print(f"Error fetching/parsing data: {str(e)}")
81
+ return {"bins": [{"type": "Error", "collectionDate": "2024-01-01"}]}
82
+ finally:
83
+ if "driver" in locals():
84
+ driver.quit()
@@ -1,3 +1,5 @@
1
+ # Legacy script. Copied to Lewes and Eastbourne.
2
+
1
3
  from bs4 import BeautifulSoup
2
4
 
3
5
  from uk_bin_collection.uk_bin_collection.common import *
@@ -1,13 +1,15 @@
1
+ from datetime import datetime
2
+
1
3
  from bs4 import BeautifulSoup
2
- from uk_bin_collection.uk_bin_collection.common import *
3
- from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
4
4
  from selenium import webdriver
5
- from selenium.webdriver.common.keys import Keys
6
5
  from selenium.webdriver.common.by import By
7
- from selenium.webdriver.support.ui import WebDriverWait
6
+ from selenium.webdriver.common.keys import Keys
8
7
  from selenium.webdriver.support import expected_conditions as EC
9
- from datetime import datetime
8
+ from selenium.webdriver.support.ui import WebDriverWait
9
+
10
+ from uk_bin_collection.uk_bin_collection.common import *
10
11
  from uk_bin_collection.uk_bin_collection.common import date_format
12
+ from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
11
13
 
12
14
 
13
15
  class CouncilClass(AbstractGetBinDataClass):
@@ -15,27 +17,57 @@ class CouncilClass(AbstractGetBinDataClass):
15
17
  postcode = kwargs.get("postcode", "")
16
18
  web_driver = kwargs.get("web_driver")
17
19
  headless = kwargs.get("headless")
18
-
19
- options = webdriver.ChromeOptions()
20
- if headless:
21
- options.add_argument("--headless")
22
- driver = create_webdriver(web_driver, headless)
20
+ data = {"bins": []}
23
21
 
24
22
  try:
25
- driver.get(
26
- f"https://eppingforestdc.maps.arcgis.com/apps/instant/lookup/index.html?appid=bfca32b46e2a47cd9c0a84f2d8cdde17&find={postcode}"
23
+ # Initialize webdriver with logging
24
+ print(f"Initializing webdriver with: {web_driver}, headless: {headless}")
25
+ driver = create_webdriver(web_driver, headless, None, __name__)
26
+
27
+ # Format and load URL
28
+ page_url = f"https://eppingforestdc.maps.arcgis.com/apps/instant/lookup/index.html?appid=bfca32b46e2a47cd9c0a84f2d8cdde17&find={postcode}"
29
+ print(f"Accessing URL: {page_url}")
30
+ driver.get(page_url)
31
+
32
+ # Wait for initial page load
33
+ wait = WebDriverWait(driver, 20) # Reduced timeout to fail faster if issues
34
+
35
+ # First wait for any loading indicators to disappear
36
+ try:
37
+ print("Waiting for loading spinner to disappear...")
38
+ wait.until(
39
+ EC.invisibility_of_element_located(
40
+ (By.CSS_SELECTOR, ".esri-widget--loader-container")
41
+ )
42
+ )
43
+ except Exception as e:
44
+ print(f"Loading spinner wait failed (may be normal): {str(e)}")
45
+
46
+ # Then wait for the content container
47
+ print("Waiting for content container...")
48
+ wait.until(
49
+ EC.presence_of_element_located(
50
+ (By.CSS_SELECTOR, ".esri-feature-content")
51
+ )
27
52
  )
28
- wait = WebDriverWait(driver, 10)
29
- WebDriverWait(driver, 10).until(
53
+
54
+ # Finally wait for actual content
55
+ print("Waiting for content to be visible...")
56
+ content = wait.until(
30
57
  EC.visibility_of_element_located(
31
58
  (By.CSS_SELECTOR, ".esri-feature-content")
32
59
  )
33
60
  )
61
+
62
+ # Check if content is actually present
63
+ if not content:
64
+ raise ValueError("Content element found but empty")
65
+
66
+ print("Content found, getting page source...")
34
67
  html_content = driver.page_source
35
68
 
36
69
  soup = BeautifulSoup(html_content, "html.parser")
37
70
  bin_info_divs = soup.select(".esri-feature-content p")
38
- data = {"bins": []}
39
71
  for div in bin_info_divs:
40
72
  if "collection day is" in div.text:
41
73
  bin_type, date_str = div.text.split(" collection day is ")
@@ -14,7 +14,19 @@ class CouncilClass(AbstractGetBinDataClass):
14
14
  """
15
15
 
16
16
  def parse_data(self, page: str, **kwargs) -> dict:
17
- # Parse the page
17
+
18
+ try:
19
+ user_uprn = kwargs.get("uprn")
20
+ check_uprn(user_uprn)
21
+ url = f"https://onlineservices.glasgow.gov.uk/forms/RefuseAndRecyclingWebApplication/CollectionsCalendar.aspx?UPRN={user_uprn}"
22
+ if not user_uprn:
23
+ # This is a fallback for if the user stored a URL in old system. Ensures backwards compatibility.
24
+ url = kwargs.get("url")
25
+ except Exception as e:
26
+ raise ValueError(f"Error getting identifier: {str(e)}")
27
+
28
+ # Make a BS4 object
29
+ page = requests.get(url, verify=False)
18
30
  soup = BeautifulSoup(page.text, features="html.parser")
19
31
  soup.prettify()
20
32
 
@@ -27,9 +27,8 @@ class CouncilClass(AbstractGetBinDataClass):
27
27
  def parse_data(self, page: str, **kwargs) -> dict:
28
28
  driver = None
29
29
  try:
30
- uprn = kwargs.get("uprn")
31
30
  postcode = kwargs.get("postcode")
32
- full_address = kwargs.get("paon")
31
+ house_number = kwargs.get("paon")
33
32
 
34
33
  url = "https://my.guildford.gov.uk/customers/s/view-bin-collections"
35
34
 
@@ -60,7 +59,7 @@ class CouncilClass(AbstractGetBinDataClass):
60
59
  EC.presence_of_element_located(
61
60
  (
62
61
  By.XPATH,
63
- f"//lightning-base-formatted-text[contains(text(), '{full_address}')]",
62
+ f"//lightning-base-formatted-text[contains(text(), '{house_number}')]",
64
63
  )
65
64
  )
66
65
  )
@@ -15,9 +15,20 @@ class CouncilClass(AbstractGetBinDataClass):
15
15
  """
16
16
 
17
17
  def parse_data(self, page: str, **kwargs) -> dict:
18
+ try:
19
+ user_uprn = kwargs.get("uprn")
20
+ check_uprn(user_uprn)
21
+ url = f"https://www.herefordshire.gov.uk/rubbish-recycling/check-bin-collection-day?blpu_uprn={user_uprn}"
22
+ if not user_uprn:
23
+ # This is a fallback for if the user stored a URL in old system. Ensures backwards compatibility.
24
+ url = kwargs.get("url")
25
+ except Exception as e:
26
+ raise ValueError(f"Error getting identifier: {str(e)}")
27
+
18
28
  # Make a BS4 object
19
- soup = BeautifulSoup(page.text, features="html.parser")
20
- soup.prettify()
29
+ page = requests.get(url)
30
+ soup = BeautifulSoup(page.text, "html.parser")
31
+ soup.prettify
21
32
 
22
33
  data = {"bins": []}
23
34
 
@@ -2,10 +2,12 @@
2
2
 
3
3
  # This script pulls (in one hit) the data from
4
4
  # Huntingdon District Council District Council Bins Data
5
+ from datetime import datetime
6
+
5
7
  from bs4 import BeautifulSoup
8
+
9
+ from uk_bin_collection.uk_bin_collection.common import *
6
10
  from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
7
- from uk_bin_collection.uk_bin_collection.common import date_format
8
- from datetime import datetime
9
11
 
10
12
 
11
13
  # import the wonderful Beautiful Soup and the URL grabber
@@ -17,9 +19,21 @@ class CouncilClass(AbstractGetBinDataClass):
17
19
  """
18
20
 
19
21
  def parse_data(self, page, **kwargs) -> None:
22
+
23
+ try:
24
+ user_uprn = kwargs.get("uprn")
25
+ check_uprn(user_uprn)
26
+ url = f"http://www.huntingdonshire.gov.uk/refuse-calendar/{user_uprn}"
27
+ if not user_uprn:
28
+ # This is a fallback for if the user stored a URL in old system. Ensures backwards compatibility.
29
+ url = kwargs.get("url")
30
+ except Exception as e:
31
+ raise ValueError(f"Error getting identifier: {str(e)}")
32
+
20
33
  # Make a BS4 object
21
- soup = BeautifulSoup(page.text, features="html.parser")
22
- soup.prettify()
34
+ page = requests.get(url)
35
+ soup = BeautifulSoup(page.text, "html.parser")
36
+ soup.prettify
23
37
 
24
38
  data = {"bins": []}
25
39
 
@@ -0,0 +1,76 @@
1
+ # Eastbourne uses the same script.
2
+
3
+ from bs4 import BeautifulSoup
4
+
5
+ from uk_bin_collection.uk_bin_collection.common import *
6
+ from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
7
+
8
+
9
+ # import the wonderful Beautiful Soup and the URL grabber
10
+ class CouncilClass(AbstractGetBinDataClass):
11
+ """
12
+ Concrete classes have to implement all abstract operations of the
13
+ base class. They can also override some operations with a default
14
+ implementation.
15
+ """
16
+
17
+ def parse_data(self, page: str, **kwargs) -> dict:
18
+
19
+ try:
20
+ user_uprn = kwargs.get("uprn")
21
+ check_uprn(user_uprn)
22
+ url = f"https://environmentfirst.co.uk/house.php?uprn={user_uprn}"
23
+ if not user_uprn:
24
+ # This is a fallback for if the user stored a URL in old system. Ensures backwards compatibility.
25
+ url = kwargs.get("url")
26
+ except Exception as e:
27
+ raise ValueError(f"Error getting identifier: {str(e)}")
28
+
29
+ # Make a BS4 object
30
+ page = requests.get(url)
31
+ soup = BeautifulSoup(page.text, features="html.parser")
32
+ soup.prettify()
33
+
34
+ # Get the paragraph lines from the page
35
+ data = {"bins": []}
36
+ page_text = soup.find("div", {"class": "collect"}).find_all("p")
37
+
38
+ # Parse the correct lines (find them, remove the ordinal indicator and make them the correct format date) and
39
+ # then add them to the dictionary
40
+ rubbish_day = datetime.strptime(
41
+ remove_ordinal_indicator_from_date_string(
42
+ page_text[2].find_next("strong").text
43
+ ),
44
+ "%d %B %Y",
45
+ ).strftime(date_format)
46
+ dict_data = {
47
+ "type": "Rubbish",
48
+ "collectionDate": rubbish_day,
49
+ }
50
+ data["bins"].append(dict_data)
51
+ recycling_day = datetime.strptime(
52
+ remove_ordinal_indicator_from_date_string(
53
+ page_text[4].find_next("strong").text
54
+ ),
55
+ "%d %B %Y",
56
+ ).strftime(date_format)
57
+ dict_data = {
58
+ "type": "Recycling",
59
+ "collectionDate": recycling_day,
60
+ }
61
+ data["bins"].append(dict_data)
62
+
63
+ if len(page_text) > 5:
64
+ garden_day = datetime.strptime(
65
+ remove_ordinal_indicator_from_date_string(
66
+ page_text[6].find_next("strong").text
67
+ ),
68
+ "%d %B %Y",
69
+ ).strftime(date_format)
70
+ dict_data = {
71
+ "type": "Garden",
72
+ "collectionDate": garden_day,
73
+ }
74
+ data["bins"].append(dict_data)
75
+
76
+ return data
@@ -1,7 +1,8 @@
1
1
  from bs4 import BeautifulSoup
2
+ from dateutil.relativedelta import relativedelta
3
+
2
4
  from uk_bin_collection.uk_bin_collection.common import *
3
5
  from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
4
- from dateutil.relativedelta import relativedelta
5
6
 
6
7
 
7
8
  # import the wonderful Beautiful Soup and the URL grabber
@@ -18,9 +19,20 @@ class CouncilClass(AbstractGetBinDataClass):
18
19
  collections = []
19
20
  curr_date = datetime.today()
20
21
 
21
- # Parse the page
22
- soup = BeautifulSoup(page.text, features="html.parser")
23
- soup.prettify()
22
+ try:
23
+ user_uprn = kwargs.get("uprn")
24
+ check_uprn(user_uprn)
25
+ url = f"https://liverpool.gov.uk/Bins/BinDatesTable?UPRN={user_uprn}"
26
+ if not user_uprn:
27
+ # This is a fallback for if the user stored a URL in old system. Ensures backwards compatibility.
28
+ url = kwargs.get("url")
29
+ except Exception as e:
30
+ raise ValueError(f"Error getting identifier: {str(e)}")
31
+
32
+ # Make a BS4 object
33
+ page = requests.get(url)
34
+ soup = BeautifulSoup(page.text, "html.parser")
35
+ soup.prettify
24
36
 
25
37
  # Get all table rows on the page - enumerate gives us an index, which is handy for to keep a row count.
26
38
  # In this case, the first (0th) row is headings, so we can skip it, then parse the other data.
@@ -3,6 +3,7 @@ from datetime import datetime
3
3
 
4
4
  from bs4 import BeautifulSoup
5
5
  from selenium.webdriver.common.by import By
6
+ from selenium.webdriver.common.keys import Keys
6
7
  from selenium.webdriver.support import expected_conditions as EC
7
8
  from selenium.webdriver.support.ui import Select
8
9
  from selenium.webdriver.support.wait import WebDriverWait
@@ -10,8 +11,6 @@ from selenium.webdriver.support.wait import WebDriverWait
10
11
  from uk_bin_collection.uk_bin_collection.common import *
11
12
  from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
12
13
 
13
- # import the wonderful Beautiful Soup and the URL grabber
14
-
15
14
 
16
15
  class CouncilClass(AbstractGetBinDataClass):
17
16
  """
@@ -30,7 +29,7 @@ class CouncilClass(AbstractGetBinDataClass):
30
29
  web_driver = kwargs.get("web_driver")
31
30
  headless = kwargs.get("headless")
32
31
  check_postcode(user_postcode)
33
-
32
+
34
33
  # Create Selenium webdriver
35
34
  driver = create_webdriver(web_driver, headless, None, __name__)
36
35
  driver.get(page)
@@ -41,68 +40,64 @@ class CouncilClass(AbstractGetBinDataClass):
41
40
  driver.switch_to.frame(iframe_presense)
42
41
 
43
42
  wait = WebDriverWait(driver, 60)
44
-
43
+
45
44
  # Postal code input
46
45
  inputElement_postcodesearch = wait.until(
47
46
  EC.element_to_be_clickable((By.NAME, "postcode"))
48
47
  )
49
48
  inputElement_postcodesearch.send_keys(user_postcode)
50
-
51
- # Wait for the 'Select address' dropdown to be updated
52
- dropdown_select = wait.until(
53
- EC.presence_of_element_located((By.XPATH, "//span[contains(text(), 'Select...')]"))
54
- )
55
- dropdown_select.click()
56
-
49
+
50
+ time.sleep(5)
51
+
52
+ inputElement_postcodesearch.send_keys(Keys.TAB + Keys.DOWN)
53
+
57
54
  dropdown = wait.until(
58
- EC.element_to_be_clickable((By.XPATH, f"//div[contains(text(), ' {user_paon}')]"))
55
+ EC.element_to_be_clickable(
56
+ (By.XPATH, f"//div[contains(text(), ' {user_paon}')]")
57
+ )
59
58
  )
60
59
  dropdown.click()
61
60
 
62
- # Wait for 'Searching for...' to be added to page
63
- WebDriverWait(driver, timeout=15).until(
61
+ # This website is horrible!
62
+ WebDriverWait(driver, 20).until(
64
63
  EC.text_to_be_present_in_element(
65
- (By.CSS_SELECTOR, "span[data-name=html1]"), "Searching"
66
- )
67
- )
68
-
69
- # Wait for 'Searching for...' to be removed from page
70
- WebDriverWait(driver, timeout=15).until(
71
- EC.none_of(
72
- EC.text_to_be_present_in_element(
73
- (By.CSS_SELECTOR, "span[data-name=html1]"), "Searching"
74
- )
64
+ (By.CSS_SELECTOR, "div.col-collection-panel"), "Next collection"
75
65
  )
76
66
  )
77
67
 
78
68
  # Even then it can still be adding data to the page...
79
69
  time.sleep(5)
80
70
 
81
- soup = BeautifulSoup(driver.page_source, features="html.parser")
82
-
83
- # This is ugly but there is literally no consistency to the HTML
84
- def is_a_collection_date(t):
85
- return any("Next collection" in c for c in t.children)
86
-
87
- for next_collection in soup.find_all(is_a_collection_date):
88
- bin_info = list(
89
- next_collection.parent.select_one("div:nth-child(1)").children
90
- )
91
- if not bin_info:
92
- continue
93
- bin = bin_info[0].get_text()
94
- date = next_collection.select_one("strong").get_text(strip=True)
95
- bin_date = datetime.strptime(date, "%d %b %Y")
96
- dict_data = {
97
- "type": bin,
98
- "collectionDate": bin_date.strftime(date_format),
99
- }
100
- bin_data["bins"].append(dict_data)
101
-
102
- bin_data["bins"].sort(
103
- key=lambda x: datetime.strptime(x.get("collectionDate"), date_format)
71
+ # Scraping via Selenium rather than BeautifulSoup, to ensure eveything's loaded
72
+ collection_panels = driver.find_elements(
73
+ By.CSS_SELECTOR, "div.col-collection-panel"
104
74
  )
105
75
 
76
+ for panel in collection_panels:
77
+ try:
78
+ # Get bin type (e.g., General waste, Food waste)
79
+ bin_type = panel.find_element(
80
+ By.CSS_SELECTOR, "h3.collectionDataHeader"
81
+ ).text.strip()
82
+ # Get next collection date
83
+ lines = panel.find_elements(By.CSS_SELECTOR, "ul li")
84
+ for line in lines:
85
+ if "Next collection" in line.text:
86
+ date_str = (
87
+ line.text.split("Next collection")[1]
88
+ .strip(": ")
89
+ .strip()
90
+ )
91
+ bin_date = datetime.strptime(date_str, "%d/%m/%Y")
92
+ bin_data["bins"].append(
93
+ {
94
+ "type": bin_type,
95
+ "collectionDate": bin_date.strftime(date_format),
96
+ }
97
+ )
98
+ except Exception as inner_e:
99
+ print(f"Skipping one panel due to error: {inner_e}")
100
+
106
101
  except Exception as e:
107
102
  # Here you can log the exception if needed
108
103
  print(f"An error occurred: {e}")
@@ -7,14 +7,21 @@ from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataC
7
7
 
8
8
  class CouncilClass(AbstractGetBinDataClass):
9
9
  def parse_data(self, page: str, **kwargs) -> dict:
10
- # get the page data
11
- http = urllib3.PoolManager()
12
- response = http.request("GET", kwargs["url"])
13
- page_data = response.data
10
+
11
+ try:
12
+ user_uprn = kwargs.get("uprn")
13
+ check_uprn(user_uprn)
14
+ url = f"https://bincollection.newham.gov.uk/Details/Index/{user_uprn}"
15
+ if not user_uprn:
16
+ # This is a fallback for if the user stored a URL in old system. Ensures backwards compatibility.
17
+ url = kwargs.get("url")
18
+ except Exception as e:
19
+ raise ValueError(f"Error getting identifier: {str(e)}")
14
20
 
15
21
  # Make a BS4 object
16
- soup = BeautifulSoup(page_data, features="html.parser")
17
- soup.prettify()
22
+ page = requests.get(url)
23
+ soup = BeautifulSoup(page.text, "html.parser")
24
+ soup.prettify
18
25
 
19
26
  # Form a JSON wrapper
20
27
  data = {"bins": []}
@@ -46,7 +46,8 @@ class CouncilClass(AbstractGetBinDataClass):
46
46
  "type": bin_type,
47
47
  "collectionDate": get_next_occurrence_from_day_month(
48
48
  datetime.strptime(
49
- c["BinCollectionDate"].replace(" (*)", "").strip()
49
+ remove_ordinal_indicator_from_date_string(
50
+ c["BinCollectionDate"].replace(" (*)", "").strip())
50
51
  + " "
51
52
  + datetime.now().strftime("%Y"),
52
53
  "%A %d %B %Y",
@@ -43,9 +43,13 @@ class CouncilClass(AbstractGetBinDataClass):
43
43
  collectionDate = (
44
44
  cells[1].get_text(strip=True) + " " + datetime.now().strftime("%Y")
45
45
  )
46
- nextCollectionDate = (
47
- cells[2].get_text(strip=True) + " " + datetime.now().strftime("%Y")
48
- )
46
+
47
+ if len(cells) > 2:
48
+ nextCollectionDate = (
49
+ cells[2].get_text(strip=True) + " " + datetime.now().strftime("%Y")
50
+ )
51
+ else:
52
+ nextCollectionDate = ""
49
53
 
50
54
  # Make each Bin element in the JSON
51
55
  dict_data = {
@@ -59,12 +63,13 @@ class CouncilClass(AbstractGetBinDataClass):
59
63
  data["bins"].append(dict_data)
60
64
 
61
65
  # Make each next Bin element in the JSON
62
- dict_data = {
63
- "type": binType,
64
- "collectionDate": get_next_occurrence_from_day_month(
65
- datetime.strptime(nextCollectionDate, "%A %d %B %Y")
66
- ).strftime(date_format),
67
- }
66
+ if nextCollectionDate != "":
67
+ dict_data = {
68
+ "type": binType,
69
+ "collectionDate": get_next_occurrence_from_day_month(
70
+ datetime.strptime(nextCollectionDate, "%A %d %B %Y")
71
+ ).strftime(date_format),
72
+ }
68
73
 
69
74
  # Add data to the main JSON Wrapper
70
75
  data["bins"].append(dict_data)
@@ -81,9 +81,9 @@ class CouncilClass(AbstractGetBinDataClass):
81
81
  # The regular calendar only shows until end of March 2026, work out how many weeks that is
82
82
  weeks_total = math.floor((datetime(2026, 4, 1) - datetime.now()).days / 7)
83
83
 
84
- # The garden calendar only shows until end of November 2024, work out how many weeks that is
84
+ # The garden calendar only shows until end of November 2025, work out how many weeks that is
85
85
  garden_weeks_total = math.floor(
86
- (datetime(2024, 12, 1) - datetime.now()).days / 7
86
+ (datetime(2025, 12, 1) - datetime.now()).days / 7
87
87
  )
88
88
 
89
89
  regular_collections, garden_collections, special_collections = [], [], []