uk_bin_collection 0.86.0__py3-none-any.whl → 0.86.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -427,6 +427,14 @@
427
427
  "wiki_name": "Glasgow City Council",
428
428
  "wiki_note": "Replace XXXXXXXX with UPRN."
429
429
  },
430
+ "GloucesterCityCouncil": {
431
+ "house_number": "111",
432
+ "postcode": "GL2 0RR",
433
+ "uprn": "100120479507",
434
+ "skip_get_url": true,
435
+ "url": "https://gloucester-self.achieveservice.com/service/Bins___Check_your_bin_day",
436
+ "wiki_name": "Gloucester City Council"
437
+ },
430
438
  "GuildfordCouncil": {
431
439
  "house_number": "THE LODGE, PUTTENHAM HILL HOUSE, PUTTENHAM HILL, PUTTENHAM, GUILDFORD, GU3 1AH",
432
440
  "postcode": "GU3 1AH",
@@ -0,0 +1,128 @@
1
+ import time
2
+ from datetime import datetime
3
+
4
+ from bs4 import BeautifulSoup
5
+ from selenium.webdriver.common.by import By
6
+ from selenium.webdriver.support import expected_conditions as EC
7
+ from selenium.webdriver.support.ui import Select
8
+ from selenium.webdriver.support.wait import WebDriverWait
9
+
10
+ from uk_bin_collection.uk_bin_collection.common import *
11
+ from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
12
+
13
+ # import the wonderful Beautiful Soup and the URL grabber
14
+
15
+
16
+ class CouncilClass(AbstractGetBinDataClass):
17
+ """
18
+ Concrete classes have to implement all abstract operations of the
19
+ base class. They can also override some operations with a default
20
+ implementation.
21
+ """
22
+
23
+ def parse_data(self, page: str, **kwargs) -> dict:
24
+ driver = None
25
+ try:
26
+ page = "https://gloucester-self.achieveservice.com/service/Bins___Check_your_bin_day"
27
+
28
+ bin_data = {"bins": []}
29
+
30
+ user_uprn = kwargs.get("uprn")
31
+ user_postcode = kwargs.get("postcode")
32
+ web_driver = kwargs.get("web_driver")
33
+ headless = kwargs.get("headless")
34
+ check_uprn(user_uprn)
35
+ check_postcode(user_postcode)
36
+ # Create Selenium webdriver
37
+ driver = create_webdriver(web_driver, headless, None, __name__)
38
+ driver.get(page)
39
+
40
+ cookies_button = WebDriverWait(driver, timeout=15).until(
41
+ EC.presence_of_element_located((By.ID, "close-cookie-message"))
42
+ )
43
+ cookies_button.click()
44
+
45
+ without_login_button = WebDriverWait(driver, timeout=15).until(
46
+ EC.presence_of_element_located(
47
+ (By.LINK_TEXT, "or, Continue with no account")
48
+ )
49
+ )
50
+ without_login_button.click()
51
+
52
+ iframe_presense = WebDriverWait(driver, 30).until(
53
+ EC.presence_of_element_located((By.ID, "fillform-frame-1"))
54
+ )
55
+
56
+ driver.switch_to.frame(iframe_presense)
57
+ wait = WebDriverWait(driver, 60)
58
+ inputElement_postcodesearch = wait.until(
59
+ EC.element_to_be_clickable((By.NAME, "find_postcode"))
60
+ )
61
+
62
+ inputElement_postcodesearch.send_keys(user_postcode)
63
+
64
+ # Wait for the 'Select address' dropdown to be updated
65
+ time.sleep(2)
66
+
67
+ dropdown = wait.until(
68
+ EC.element_to_be_clickable((By.NAME, "chooseAddress"))
69
+ )
70
+ # Create a 'Select' for it, then select the first address in the list
71
+ # (Index 0 is "Select...")
72
+ dropdownSelect = Select(dropdown)
73
+ dropdownSelect.select_by_value(str(user_uprn))
74
+
75
+ # Wait for 'Searching for...' to be added to page
76
+ WebDriverWait(driver, timeout=15).until(
77
+ EC.text_to_be_present_in_element(
78
+ (By.CSS_SELECTOR, "span[data-name=html1]"), "Searching"
79
+ )
80
+ )
81
+
82
+ # Wait for 'Searching for...' to be removed from page
83
+ WebDriverWait(driver, timeout=15).until(
84
+ EC.none_of(
85
+ EC.text_to_be_present_in_element(
86
+ (By.CSS_SELECTOR, "span[data-name=html1]"), "Searching"
87
+ )
88
+ )
89
+ )
90
+
91
+ # Even then it can still be adding data to the page...
92
+ time.sleep(5)
93
+
94
+ soup = BeautifulSoup(driver.page_source, features="html.parser")
95
+
96
+ # This is ugly but there is literally no consistency to the HTML
97
+ def is_a_collection_date(t):
98
+ return any("Next collection" in c for c in t.children)
99
+
100
+ for next_collection in soup.find_all(is_a_collection_date):
101
+ bin_info = list(
102
+ next_collection.parent.select_one("div:nth-child(1)").children
103
+ )
104
+ if not bin_info:
105
+ continue
106
+ bin = bin_info[0].get_text()
107
+ date = next_collection.select_one("strong").get_text(strip=True)
108
+ bin_date = datetime.strptime(date, "%d %b %Y")
109
+ dict_data = {
110
+ "type": bin,
111
+ "collectionDate": bin_date.strftime(date_format),
112
+ }
113
+ bin_data["bins"].append(dict_data)
114
+
115
+ bin_data["bins"].sort(
116
+ key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y")
117
+ )
118
+
119
+ except Exception as e:
120
+ # Here you can log the exception if needed
121
+ print(f"An error occurred: {e}")
122
+ # Optionally, re-raise the exception if you want it to propagate
123
+ raise
124
+ finally:
125
+ # This block ensures that the driver is closed regardless of an exception
126
+ if driver:
127
+ driver.quit()
128
+ return bin_data
@@ -1,15 +1,11 @@
1
1
  from bs4 import BeautifulSoup
2
-
3
2
  from uk_bin_collection.uk_bin_collection.common import *
4
3
  from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
4
+ from datetime import datetime
5
5
 
6
-
7
- # import the wonderful Beautiful Soup and the URL grabber
8
6
  class CouncilClass(AbstractGetBinDataClass):
9
7
  """
10
- Concrete classes have to implement all abstract operations of the base
11
- class. They can also override some operations with a default
12
- implementation.
8
+ Concrete class to scrape bin collection data.
13
9
  """
14
10
 
15
11
  def parse_data(self, page: str, **kwargs) -> dict:
@@ -27,65 +23,71 @@ class CouncilClass(AbstractGetBinDataClass):
27
23
  soup.prettify()
28
24
 
29
25
  data = {"bins": []}
26
+ # Locate the section with bin collection data
30
27
  sections = soup.find_all("div", {"class": "wil_c-content-section_heading"})
28
+
31
29
  for s in sections:
32
30
  if s.get_text(strip=True).lower() == "bin collections":
33
31
  rows = s.find_next_sibling(
34
32
  "div", {"class": "c-content-section_body"}
35
- ).find_all("div", {"class": "u-mb-8"})
33
+ ).find_all("div", class_="tablet:l-col-fb-4 u-mt-10")
34
+
36
35
  for row in rows:
37
- title = row.find("div", {"class": "u-mb-4"})
38
- collections = row.find_all("div", {"class": "u-mb-2"})
39
- if title and collections:
36
+ title_elem = row.find("div", class_="u-mb-4")
37
+ if title_elem:
38
+ title = title_elem.get_text(strip=True).capitalize()
39
+
40
+ # Find all collection info in the same section
41
+ collections = row.find_all("div", class_="u-mb-2")
40
42
  for c in collections:
41
- if (
42
- c.get_text(strip=True)
43
- .lower()
44
- .startswith("next collection")
45
- ):
46
- # add next collection
47
- next_collection_date = datetime.strptime(
48
- c.get_text(strip=True).replace(
49
- "Next collection - ", ""
50
- ),
51
- "%A, %d %B %Y",
52
- ).strftime(date_format)
53
- dict_data = {
54
- "type": title.get_text(strip=True).capitalize(),
55
- "collectionDate": next_collection_date,
56
- }
57
- data["bins"].append(dict_data)
58
- # add future collections without duplicating next collection
59
- future_collections = row.find(
60
- "ul", {"class": "u-mt-4"}
61
- ).find_all("li")
62
- for c in future_collections:
43
+ text = c.get_text(strip=True).lower()
44
+
45
+ if "next collection" in text:
46
+ date_text = text.replace("next collection - ", "")
47
+ try:
48
+ next_collection_date = datetime.strptime(
49
+ date_text, "%A, %d %B %Y"
50
+ ).strftime(date_format)
51
+
52
+ dict_data = {
53
+ "type": title,
54
+ "collectionDate": next_collection_date
55
+ }
56
+ data["bins"].append(dict_data)
57
+ except ValueError:
58
+ # Skip if the date isn't a valid date
59
+ print(f"Skipping invalid date: {date_text}")
60
+
61
+ # Get future collections
62
+ future_collections_section = row.find("ul", class_="u-mt-4")
63
+ if future_collections_section:
64
+ future_collections = future_collections_section.find_all("li")
65
+ for future_collection in future_collections:
66
+ future_date_text = future_collection.get_text(strip=True)
67
+ try:
63
68
  future_collection_date = datetime.strptime(
64
- c.get_text(strip=True),
65
- "%A, %d %B %Y",
69
+ future_date_text, "%A, %d %B %Y"
66
70
  ).strftime(date_format)
67
- if (
68
- future_collection_date
69
- != next_collection_date
70
- ):
71
+
72
+ # Avoid duplicates of next collection date
73
+ if future_collection_date != next_collection_date:
71
74
  dict_data = {
72
- "type": title.get_text(
73
- strip=True
74
- ).capitalize(),
75
- "collectionDate": future_collection_date,
75
+ "type": title,
76
+ "collectionDate": future_collection_date
76
77
  }
77
78
  data["bins"].append(dict_data)
79
+ except ValueError:
80
+ # Skip if the future collection date isn't valid
81
+ print(f"Skipping invalid future date: {future_date_text}")
78
82
 
83
+ # Sort the collections by date
79
84
  data["bins"].sort(
80
85
  key=lambda x: datetime.strptime(x.get("collectionDate"), date_format)
81
86
  )
82
87
  except Exception as e:
83
- # Here you can log the exception if needed
84
88
  print(f"An error occurred: {e}")
85
- # Optionally, re-raise the exception if you want it to propagate
86
89
  raise
87
90
  finally:
88
- # This block ensures that the driver is closed regardless of an exception
89
91
  if driver:
90
92
  driver.quit()
91
93
  return data
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: uk_bin_collection
3
- Version: 0.86.0
3
+ Version: 0.86.2
4
4
  Summary: Python Lib to collect UK Bin Data
5
5
  Author: Robert Bradley
6
6
  Author-email: robbrad182@gmail.com
@@ -2,7 +2,7 @@ uk_bin_collection/README.rst,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,
2
2
  uk_bin_collection/tests/council_feature_input_parity.py,sha256=DO6Mk4ImYgM5ZCZ-cutwz5RoYYWZRLYx2tr6zIs_9Rc,3843
3
3
  uk_bin_collection/tests/features/environment.py,sha256=VQZjJdJI_kZn08M0j5cUgvKT4k3iTw8icJge1DGOkoA,127
4
4
  uk_bin_collection/tests/features/validate_council_outputs.feature,sha256=SJK-Vc737hrf03tssxxbeg_JIvAH-ddB8f6gU1LTbuQ,251
5
- uk_bin_collection/tests/input.json,sha256=w1JMksNwhHC4EojFuBhlvn1NrPgrR1XojBqneb3hJyQ,59938
5
+ uk_bin_collection/tests/input.json,sha256=WJ6GsmG_nIzmOA0ItYnBRc7DwWD8jsytgA6XO5WstMo,60242
6
6
  uk_bin_collection/tests/output.schema,sha256=ZwKQBwYyTDEM4G2hJwfLUVM-5v1vKRvRK9W9SS1sd18,1086
7
7
  uk_bin_collection/tests/step_defs/step_helpers/file_handler.py,sha256=Ygzi4V0S1MIHqbdstUlIqtRIwnynvhu4UtpweJ6-5N8,1474
8
8
  uk_bin_collection/tests/step_defs/test_validate_council.py,sha256=LrOSt_loA1Mw3vTqaO2LpaDMu7rYJy6k5Kr-EOBln7s,3424
@@ -71,6 +71,7 @@ uk_bin_collection/uk_bin_collection/councils/ForestOfDeanDistrictCouncil.py,sha2
71
71
  uk_bin_collection/uk_bin_collection/councils/GatesheadCouncil.py,sha256=Ecq4kMbtAHnQrnxjhC7CG3oEZQ3D1aAk5qXVZk-ouxc,4601
72
72
  uk_bin_collection/uk_bin_collection/councils/GedlingBoroughCouncil.py,sha256=IssL5CJSdcGPkJCB0q2kieUSEjfoS6nDKfeT7-9eKsQ,2183
73
73
  uk_bin_collection/uk_bin_collection/councils/GlasgowCityCouncil.py,sha256=IOgM8Wl-LpO1T-F9uU1FlVfPaEObpvsdP7S2h03Mycc,2528
74
+ uk_bin_collection/uk_bin_collection/councils/GloucesterCityCouncil.py,sha256=8Wjvmdvg5blHVrREaEnhhWZaWhYVP4v_KdDVPLIUxaU,4889
74
75
  uk_bin_collection/uk_bin_collection/councils/GuildfordCouncil.py,sha256=9pVrmQhZcK2AD8gX8mNvP--L4L9KaY6L3B822VX6fec,5695
75
76
  uk_bin_collection/uk_bin_collection/councils/HaltonBoroughCouncil.py,sha256=r8cmtWhMJg-XG63ZHxidKKW7i4yQNrZSSMSCkBwrqjI,5837
76
77
  uk_bin_collection/uk_bin_collection/councils/HaringeyCouncil.py,sha256=t_6AkAu4wrv8Q0WlDhWh_82I0djl5tk531Pzs-SjWzg,2647
@@ -166,7 +167,7 @@ uk_bin_collection/uk_bin_collection/councils/TorridgeDistrictCouncil.py,sha256=6
166
167
  uk_bin_collection/uk_bin_collection/councils/UttlesfordDistrictCouncil.py,sha256=8CvO-WgdKdvyaOf3TYc4XwME8ogAXojgB40oyGRL8Dw,4129
167
168
  uk_bin_collection/uk_bin_collection/councils/ValeofGlamorganCouncil.py,sha256=Phgb_ECiUOOkqOx6OsfsTHMCW5VQfRmOC2zgYIQhuZA,5044
168
169
  uk_bin_collection/uk_bin_collection/councils/ValeofWhiteHorseCouncil.py,sha256=5nZLbU5YVKNsJ2X_wuybrNLFAzjVAxkazu-bYP4IGXw,4292
169
- uk_bin_collection/uk_bin_collection/councils/WakefieldCityCouncil.py,sha256=9zW4C0WKbsDLCKvFyVYFpyvp20JbrWc4G0HzadqF8WQ,4466
170
+ uk_bin_collection/uk_bin_collection/councils/WakefieldCityCouncil.py,sha256=-xqJOzHTrT4jOB3rHPXFYeqLaHyK9XmCPi92whaYBhw,4671
170
171
  uk_bin_collection/uk_bin_collection/councils/WalthamForest.py,sha256=P7MMw0EhpRmDbbnHb25tY5_yvYuZUFwJ1br4TOv24sY,4997
171
172
  uk_bin_collection/uk_bin_collection/councils/WarwickDistrictCouncil.py,sha256=3WQrAxzYzKoV4LyOqNTp9xINVsNi1xW9t8etducGeag,1146
172
173
  uk_bin_collection/uk_bin_collection/councils/WaverleyBoroughCouncil.py,sha256=tp9l7vdgSGRzNNG0pDfnNuFj4D2bpRJUJmAiTJ6bM0g,4662
@@ -187,8 +188,8 @@ uk_bin_collection/uk_bin_collection/councils/YorkCouncil.py,sha256=I2kBYMlsD4bId
187
188
  uk_bin_collection/uk_bin_collection/councils/council_class_template/councilclasstemplate.py,sha256=4s9ODGPAwPqwXc8SrTX5Wlfmizs3_58iXUtHc4Ir86o,1162
188
189
  uk_bin_collection/uk_bin_collection/create_new_council.py,sha256=m-IhmWmeWQlFsTZC4OxuFvtw5ZtB8EAJHxJTH4O59lQ,1536
189
190
  uk_bin_collection/uk_bin_collection/get_bin_data.py,sha256=YvmHfZqanwrJ8ToGch34x-L-7yPe31nB_x77_Mgl_vo,4545
190
- uk_bin_collection-0.86.0.dist-info/LICENSE,sha256=vABBUOzcrgfaTKpzeo-si9YVEun6juDkndqA8RKdKGs,1071
191
- uk_bin_collection-0.86.0.dist-info/METADATA,sha256=W9Zw3dC1eK4xUSPeYD_HDSR96SdLdci0-6ULiuop1gg,16231
192
- uk_bin_collection-0.86.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
193
- uk_bin_collection-0.86.0.dist-info/entry_points.txt,sha256=36WCSGMWSc916S3Hi1ZkazzDKHaJ6CD-4fCEFm5MIao,90
194
- uk_bin_collection-0.86.0.dist-info/RECORD,,
191
+ uk_bin_collection-0.86.2.dist-info/LICENSE,sha256=vABBUOzcrgfaTKpzeo-si9YVEun6juDkndqA8RKdKGs,1071
192
+ uk_bin_collection-0.86.2.dist-info/METADATA,sha256=LRsdDRZdbzUoFma7eHGhI7jA8ZtZrf7y4xaO_3q6Lcw,16231
193
+ uk_bin_collection-0.86.2.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
194
+ uk_bin_collection-0.86.2.dist-info/entry_points.txt,sha256=36WCSGMWSc916S3Hi1ZkazzDKHaJ6CD-4fCEFm5MIao,90
195
+ uk_bin_collection-0.86.2.dist-info/RECORD,,