uk_bin_collection 0.148.4__py3-none-any.whl → 0.148.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2341,6 +2341,7 @@
2341
2341
  "ThanetDistrictCouncil": {
2342
2342
  "uprn": "100061111858",
2343
2343
  "url": "https://www.thanet.gov.uk",
2344
+ "web_driver": "http://selenium:4444",
2344
2345
  "wiki_name": "Thanet",
2345
2346
  "wiki_note": "Use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find your UPRN.",
2346
2347
  "LAD24CD": "E07000114"
@@ -2375,7 +2376,8 @@
2375
2376
  },
2376
2377
  "TorbayCouncil": {
2377
2378
  "skip_get_url": true,
2378
- "uprn": "10024000295",
2379
+ "uprn": "10000016984",
2380
+ "postcode": "TQ1 1AG",
2379
2381
  "url": "https://www.torbay.gov.uk/recycling/bin-collections/",
2380
2382
  "wiki_name": "Torbay",
2381
2383
  "wiki_note": "Provide your UPRN. Use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find it.",
@@ -1,12 +1,16 @@
1
+ import json
1
2
  import time
3
+ from datetime import datetime
2
4
 
3
- import requests
5
+ from bs4 import BeautifulSoup
6
+ from selenium.webdriver.common.by import By
7
+ from selenium.webdriver.support import expected_conditions as EC
8
+ from selenium.webdriver.support.ui import WebDriverWait
4
9
 
5
10
  from uk_bin_collection.uk_bin_collection.common import *
6
11
  from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
7
12
 
8
13
 
9
- # import the wonderful Beautiful Soup and the URL grabber
10
14
  class CouncilClass(AbstractGetBinDataClass):
11
15
  """
12
16
  Concrete classes have to implement all abstract operations of the
@@ -15,37 +19,60 @@ class CouncilClass(AbstractGetBinDataClass):
15
19
  """
16
20
 
17
21
  def parse_data(self, page: str, **kwargs) -> dict:
18
-
19
22
  user_uprn = kwargs.get("uprn")
20
23
  check_uprn(user_uprn)
21
24
  bindata = {"bins": []}
22
25
 
23
- URI = f"https://www.thanet.gov.uk/wp-content/mu-plugins/collection-day/incl/mu-collection-day-calls.php?pAddress={user_uprn}"
26
+ url = f"https://www.thanet.gov.uk/wp-content/mu-plugins/collection-day/incl/mu-collection-day-calls.php?pAddress={user_uprn}"
27
+ web_driver = kwargs.get("web_driver")
28
+ headless = kwargs.get("headless")
29
+
30
+ # Create the Selenium WebDriver
31
+ driver = create_webdriver(web_driver, headless, None, __name__)
32
+
33
+ try:
34
+ print(f"Navigating to URL: {url}")
35
+ driver.get(url)
36
+
37
+ # Wait for Cloudflare to complete its check
38
+ WebDriverWait(driver, 30).until(
39
+ lambda d: d.execute_script("return document.readyState") == "complete"
40
+ )
41
+ print("Page loaded successfully.")
42
+
43
+ # Parse the page source with BeautifulSoup
44
+ soup = BeautifulSoup(driver.page_source, "html.parser")
24
45
 
25
- headers = {
26
- "x-requested-with": "XMLHttpRequest",
27
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
28
- }
46
+ # Extract the JSON data from the page
47
+ print("Extracting bin collection data...")
48
+ body_content = soup.find("body").text
49
+ if not body_content:
50
+ raise ValueError("Expected JSON data not found in the <body> tag.")
29
51
 
30
- # Make the GET request
31
- response = requests.get(URI, headers=headers)
52
+ bin_collection = json.loads(body_content)
32
53
 
33
- # Parse the JSON response
34
- bin_collection = response.json()
54
+ # Process the bin collection data
55
+ for collection in bin_collection:
56
+ bin_type = collection["type"]
57
+ collection_date = collection["nextDate"].split(" ")[0]
35
58
 
36
- # Loop through each collection in bin_collection
37
- for collection in bin_collection:
38
- bin_type = collection["type"]
39
- collection_date = collection["nextDate"].split(" ")[0]
59
+ dict_data = {
60
+ "type": bin_type,
61
+ "collectionDate": collection_date,
62
+ }
63
+ bindata["bins"].append(dict_data)
40
64
 
41
- dict_data = {
42
- "type": bin_type,
43
- "collectionDate": collection_date,
44
- }
45
- bindata["bins"].append(dict_data)
65
+ # Sort the bins by collection date
66
+ bindata["bins"].sort(
67
+ key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y")
68
+ )
69
+ print(bindata)
46
70
 
47
- bindata["bins"].sort(
48
- key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y")
49
- )
71
+ except Exception as e:
72
+ print(f"An error occurred: {e}")
73
+ raise
74
+ finally:
75
+ print("Cleaning up WebDriver...")
76
+ driver.quit()
50
77
 
51
78
  return bindata
@@ -1,51 +1,231 @@
1
+ # This script pulls bin collection data from Barking and Dagenham Council
2
+ # Example URL: https://www.lbbd.gov.uk/rubbish-recycling/household-bin-collection/check-your-bin-collection-days
3
+ import time
4
+
5
+ from bs4 import BeautifulSoup
6
+ from dateutil.parser import parse
7
+ from selenium.common.exceptions import NoSuchElementException, TimeoutException
8
+ from selenium.webdriver.common.by import By
9
+ from selenium.webdriver.common.keys import Keys
10
+ from selenium.webdriver.support import expected_conditions as EC
11
+ from selenium.webdriver.support.ui import Select
12
+ from selenium.webdriver.support.wait import WebDriverWait
13
+
1
14
  from uk_bin_collection.uk_bin_collection.common import *
2
15
  from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
3
16
 
4
17
 
5
18
  class CouncilClass(AbstractGetBinDataClass):
6
- """
7
- Concrete classes have to implement all abstract operations of the
8
- base class. They can also override some operations with a default
9
- implementation.
10
- """
11
19
 
12
20
  def parse_data(self, page: str, **kwargs) -> dict:
13
- uprn = kwargs.get("uprn")
14
- check_uprn(uprn)
15
-
16
- headers = {
17
- "Accept": "*/*",
18
- "Accept-Encoding": "gzip, deflate, br",
19
- "Accept-Language": "en-GB,en-US;q=0.9,en;q=0.8",
20
- "Connection": "keep-alive",
21
- "Host": "online.torbay.gov.uk",
22
- "Origin": "https://www.torbay.gov.uk",
23
- "Referer": "https://www.torbay.gov.uk/",
24
- "sec-ch-ua": '"Chromium";v="110", "Not A(Brand";v="24", "Google Chrome";v="110"',
25
- "sec-ch-ua-mobile": "?0",
26
- "sec-ch-ua-platform": '"Windows"',
27
- "Sec-Fetch-Dest": "empty",
28
- "Sec-Fetch-Mode": "cors",
29
- "Sec-Fetch-Site": "same-site",
30
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36",
31
- }
32
- requests.packages.urllib3.disable_warnings()
33
- response = requests.get(
34
- f"https://online.torbay.gov.uk/services.bartec/collections?uprn={uprn}",
35
- headers=headers,
36
- )
37
- if response.status_code != 200:
38
- raise ValueError("No bin data found for provided UPRN.")
39
- json_data = json.loads(response.text)
40
-
41
- data = {"bins": []}
42
- for c in json_data:
43
- dict_data = {
44
- "type": c["Service"].replace("Empty ", "").strip(),
45
- "collectionDate": datetime.strptime(
46
- c["NextCollection"].strip(), "%d %B %Y"
47
- ).strftime(date_format),
48
- }
49
- data["bins"].append(dict_data)
21
+ driver = None
22
+ try:
23
+ data = {"bins": []}
24
+
25
+ user_uprn = kwargs.get("uprn")
26
+ user_postcode = kwargs.get("postcode")
27
+ web_driver = kwargs.get("web_driver")
28
+ headless = kwargs.get("headless")
29
+ url = kwargs.get("url")
30
+
31
+ check_postcode(user_postcode)
32
+
33
+ print(
34
+ f"Starting parse_data with parameters: postcode={user_postcode}, uprn={user_uprn}"
35
+ )
36
+ print(
37
+ f"Creating webdriver with: web_driver={web_driver}, headless={headless}"
38
+ )
39
+
40
+ driver = create_webdriver(web_driver, headless, None, __name__)
41
+ print(f"Navigating to URL: {url}")
42
+ driver.get(url)
43
+ print("Successfully loaded the page")
44
+
45
+ driver.maximize_window()
46
+
47
+ WebDriverWait(driver, 10).until(
48
+ lambda d: d.execute_script("return document.readyState") == "complete"
49
+ )
50
+
51
+ # Handle cookie banner if present
52
+ wait = WebDriverWait(driver, 60)
53
+ try:
54
+ cookie_button = wait.until(
55
+ EC.element_to_be_clickable(
56
+ (
57
+ By.XPATH,
58
+ "/html/body/div[1]/div/div[2]/button[1]",
59
+ )
60
+ ),
61
+ message="Cookie banner not found",
62
+ )
63
+ cookie_button.click()
64
+ print("Cookie banner clicked.")
65
+ time.sleep(1) # Brief pause to let banner disappear
66
+ except (TimeoutException, NoSuchElementException):
67
+ print("No cookie banner appeared or selector failed.")
68
+
69
+ bin_collection_button = wait.until(
70
+ EC.element_to_be_clickable(
71
+ (
72
+ By.XPATH,
73
+ "/html/body/main/div[4]/div/div[1]/div/div/div/div/div[2]/div/div/div/p/a",
74
+ )
75
+ ),
76
+ )
77
+ bin_collection_button.click()
78
+
79
+ # Save the original window
80
+ original_window = driver.current_window_handle
81
+
82
+ # Wait for the new window or tab
83
+ WebDriverWait(driver, 10).until(EC.number_of_windows_to_be(2))
84
+
85
+ # Loop through until we find a new window handle
86
+ for window_handle in driver.window_handles:
87
+ if window_handle != original_window:
88
+ driver.switch_to.window(window_handle)
89
+ break
90
+ # Now you're in the new tab and can interact with the postcode input
91
+ # Enter postcode
92
+ print("Looking for postcode input...")
93
+ wait.until(EC.presence_of_element_located((By.ID, "FF1168-text")))
94
+ post_code_input = wait.until(
95
+ EC.element_to_be_clickable((By.ID, "FF1168-text")),
96
+ message="Postcode input not clickable",
97
+ )
98
+ post_code_input.clear()
99
+ post_code_input.send_keys(user_postcode)
100
+ print(f"Entered postcode: {user_postcode}")
101
+
102
+ post_code_input.send_keys(Keys.TAB + Keys.ENTER)
103
+ # driver.switch_to.active_element.send_keys(Keys.TAB + Keys.ENTER)
104
+ print("Pressed ENTER on Search button")
105
+
106
+ # Wait for the dropdown to be clickable
107
+ address_select = wait.until(
108
+ EC.element_to_be_clickable((By.ID, "FF1168-list")),
109
+ message="Address dropdown not found",
110
+ )
111
+
112
+ # Click to focus the dropdown
113
+ address_select.click()
114
+ time.sleep(0.5) # Brief pause to let the dropdown open
115
+
116
+ # Get all options
117
+ options = address_select.find_elements(By.TAG_NAME, "option")
118
+ print(f"Found {len(options)} options in dropdown")
119
+
120
+ # Print all options first for debugging
121
+ print("\nAvailable options:")
122
+ for opt in options:
123
+ value = opt.get_attribute("value")
124
+ text = opt.text
125
+ print(f"Value: '{value}', Text: '{text}'")
126
+
127
+ # Try to find our specific UPRN
128
+ target_uprn = f"U{user_uprn}|"
129
+ print(f"\nLooking for UPRN pattern: {target_uprn}")
130
+
131
+ found = False
132
+ for option in options:
133
+ value = option.get_attribute("value")
134
+ if value and target_uprn in value:
135
+ print(f"Found matching address with value: {value}")
136
+ option.click()
137
+ found = True
138
+ break
139
+
140
+ if not found:
141
+ print(f"No matching address found for UPRN: {user_uprn}")
142
+ return data
143
+
144
+ print("Address selected successfully")
145
+ time.sleep(1) # Give time for the selection to take effect
146
+
147
+ # Wait for the address selection confirmation to appear
148
+ print("Waiting for address selection confirmation...")
149
+ WebDriverWait(driver, 10).until(
150
+ EC.presence_of_element_located((By.CLASS_NAME, "esbAddressSelected"))
151
+ )
152
+ print("Address selection confirmed")
153
+
154
+ # Click the Submit button
155
+ print("Clicking Submit button...")
156
+ submit_button = WebDriverWait(driver, 10).until(
157
+ EC.element_to_be_clickable((By.ID, "submit-button"))
158
+ )
159
+ submit_button.click()
160
+ time.sleep(1) # Brief pause to let the navigation start
161
+
162
+ # Wait for the collection details to appear
163
+ print("Waiting for collection details to load...")
164
+ try:
165
+ schedule_list = WebDriverWait(driver, 20).until(
166
+ EC.presence_of_element_located((By.ID, "resiCollectionDetails"))
167
+ )
168
+ print("Collection details loaded successfully")
169
+ except TimeoutException:
170
+ print(
171
+ "Timeout waiting for collection details - checking if page needs refresh"
172
+ )
173
+ driver.refresh()
174
+ time.sleep(2)
175
+ schedule_list = WebDriverWait(driver, 20).until(
176
+ EC.presence_of_element_located((By.ID, "resiCollectionDetails"))
177
+ )
178
+ print("Collection details loaded after refresh")
179
+
180
+ # Make a BS4 object
181
+ print("Parsing page with BeautifulSoup...")
182
+ soup = BeautifulSoup(driver.page_source, features="html.parser")
183
+
184
+ # Process collection details
185
+ print("Looking for collection details in the page...")
186
+
187
+ # Find all collection rows
188
+ collection_rows = soup.select("#resiCollectionDetails .row.fs-4")
189
+ print(f"\nProcessing {len(collection_rows)} collection rows...")
190
+
191
+ for row in collection_rows:
192
+ try:
193
+ # Get the collection service type (e.g., "Domestic Collection Service")
194
+ service_type = row.select_one("div.col:nth-child(3)").text.strip()
195
+
196
+ # Get the date from the second column
197
+ date_text = row.select_one("div[style*='width:360px']").text.strip()
198
+
199
+ # Parse the date
200
+ parsed_date = parse(date_text, fuzzy=True)
201
+ bin_date = parsed_date.strftime("%d/%m/%Y")
202
+
203
+ # Extract just the service type without " Collection Service"
204
+ bin_type = service_type.replace(" Collection Service", "")
205
+
206
+ # Add to data
207
+ if bin_type and bin_date:
208
+ dict_data = {
209
+ "type": bin_type,
210
+ "collectionDate": bin_date,
211
+ }
212
+ data["bins"].append(dict_data)
213
+ print(f"Successfully added collection: {dict_data}")
214
+
215
+ except Exception as e:
216
+ print(f"Error processing collection row: {e}")
217
+ continue
218
+
219
+ # Debug: Print the complete dict_data
220
+ print("\nFinal bin collection data:")
221
+ print(data)
222
+
223
+ except Exception as e:
224
+ print(f"An error occurred: {e}")
225
+ raise
226
+ finally:
227
+ print("Cleaning up webdriver...")
228
+ if driver:
229
+ driver.quit()
50
230
 
51
231
  return data
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: uk_bin_collection
3
- Version: 0.148.4
3
+ Version: 0.148.6
4
4
  Summary: Python Lib to collect UK Bin Data
5
5
  Author: Robert Bradley
6
6
  Author-email: robbrad182@gmail.com
@@ -7,7 +7,7 @@ uk_bin_collection/tests/council_feature_input_parity.py,sha256=DO6Mk4ImYgM5ZCZ-c
7
7
  uk_bin_collection/tests/features/environment.py,sha256=VQZjJdJI_kZn08M0j5cUgvKT4k3iTw8icJge1DGOkoA,127
8
8
  uk_bin_collection/tests/features/validate_council_outputs.feature,sha256=SJK-Vc737hrf03tssxxbeg_JIvAH-ddB8f6gU1LTbuQ,251
9
9
  uk_bin_collection/tests/generate_map_test_results.py,sha256=CKnGK2ZgiSXomRGkomX90DitgMP-X7wkHhyKORDcL2E,1144
10
- uk_bin_collection/tests/input.json,sha256=FGS-k2Bw2apOrAXgS3DS75bNSb97-CB2s7RwLufUHYI,131163
10
+ uk_bin_collection/tests/input.json,sha256=WD2BCIhsrpbU_53tvTPnHW7MQXiOYowTzkKN4UtuI9E,131240
11
11
  uk_bin_collection/tests/output.schema,sha256=ZwKQBwYyTDEM4G2hJwfLUVM-5v1vKRvRK9W9SS1sd18,1086
12
12
  uk_bin_collection/tests/step_defs/step_helpers/file_handler.py,sha256=Ygzi4V0S1MIHqbdstUlIqtRIwnynvhu4UtpweJ6-5N8,1474
13
13
  uk_bin_collection/tests/step_defs/test_validate_council.py,sha256=VZ0a81sioJULD7syAYHjvK_-nT_Rd36tUyzPetSA0gk,3475
@@ -291,11 +291,11 @@ uk_bin_collection/uk_bin_collection/councils/TeignbridgeCouncil.py,sha256=-NowMN
291
291
  uk_bin_collection/uk_bin_collection/councils/TelfordAndWrekinCouncil.py,sha256=p1ZS5R4EGxbEWlRBrkGXgKwE_lkyBT-R60yKFFhVObc,1844
292
292
  uk_bin_collection/uk_bin_collection/councils/TendringDistrictCouncil.py,sha256=1_CkpWPTfRUEP5YJ9R4_dJRLtb-O9i83hfWJc1shw_c,4283
293
293
  uk_bin_collection/uk_bin_collection/councils/TestValleyBoroughCouncil.py,sha256=Dtfkyrwt795W7gqFJxVGRR8t3R5WMNQZwTWJckLpZWE,8480
294
- uk_bin_collection/uk_bin_collection/councils/ThanetDistrictCouncil.py,sha256=-opmZG9GzjB_NvmWpN6nFZ7rlkSoaRrQICU5E8T0DEQ,1659
294
+ uk_bin_collection/uk_bin_collection/councils/ThanetDistrictCouncil.py,sha256=Cxrf0tUryDL-wFclPH5yovVt8i7Sc7g-ZFrU9_wg6KY,2717
295
295
  uk_bin_collection/uk_bin_collection/councils/ThreeRiversDistrictCouncil.py,sha256=RHt3e9oeKzwxjjY-M8aC0nk-ZXhHIoyC81JzxkPVxsE,5531
296
296
  uk_bin_collection/uk_bin_collection/councils/ThurrockCouncil.py,sha256=vAZMm6mcsdEcOkP15xwxWy9gdXpmLYQFH7qRifurNoY,2935
297
297
  uk_bin_collection/uk_bin_collection/councils/TonbridgeAndMallingBC.py,sha256=UlgnHDoi8ecav2H5-HqKNDpqW1J3RN-c___5c08_Q7I,4859
298
- uk_bin_collection/uk_bin_collection/councils/TorbayCouncil.py,sha256=JW_BS7wkfxFsmx6taQtPAQWdBp1AfLrxs0XRQ2XZcSw,2029
298
+ uk_bin_collection/uk_bin_collection/councils/TorbayCouncil.py,sha256=fpyr5f5x8JSsusdMaEH248HINPl6yQv14McD1-xLnqk,9269
299
299
  uk_bin_collection/uk_bin_collection/councils/TorridgeDistrictCouncil.py,sha256=qyxzMFaQp4ymDsnvoySqmJonQ_MLo4jYBbKmREECur4,4777
300
300
  uk_bin_collection/uk_bin_collection/councils/TunbridgeWellsCouncil.py,sha256=s8Nm9Ef-4561mEXPa0ylYHrXyYIulgCcNV2uAnrXyZk,2846
301
301
  uk_bin_collection/uk_bin_collection/councils/UttlesfordDistrictCouncil.py,sha256=GSELWbSn5jtznv6FSLIMxK6CyQ27MW9FoY_m5jhTEBA,4175
@@ -337,8 +337,8 @@ uk_bin_collection/uk_bin_collection/councils/YorkCouncil.py,sha256=I2kBYMlsD4bId
337
337
  uk_bin_collection/uk_bin_collection/councils/council_class_template/councilclasstemplate.py,sha256=QD4v4xpsEE0QheR_fGaNOIRMc2FatcUfKkkhAhseyVU,1159
338
338
  uk_bin_collection/uk_bin_collection/create_new_council.py,sha256=m-IhmWmeWQlFsTZC4OxuFvtw5ZtB8EAJHxJTH4O59lQ,1536
339
339
  uk_bin_collection/uk_bin_collection/get_bin_data.py,sha256=YvmHfZqanwrJ8ToGch34x-L-7yPe31nB_x77_Mgl_vo,4545
340
- uk_bin_collection-0.148.4.dist-info/LICENSE,sha256=vABBUOzcrgfaTKpzeo-si9YVEun6juDkndqA8RKdKGs,1071
341
- uk_bin_collection-0.148.4.dist-info/METADATA,sha256=x1eLHMk3GomATBirPas1gd_m7XGlTwIYw9pD_tiSMhM,20914
342
- uk_bin_collection-0.148.4.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
343
- uk_bin_collection-0.148.4.dist-info/entry_points.txt,sha256=36WCSGMWSc916S3Hi1ZkazzDKHaJ6CD-4fCEFm5MIao,90
344
- uk_bin_collection-0.148.4.dist-info/RECORD,,
340
+ uk_bin_collection-0.148.6.dist-info/LICENSE,sha256=vABBUOzcrgfaTKpzeo-si9YVEun6juDkndqA8RKdKGs,1071
341
+ uk_bin_collection-0.148.6.dist-info/METADATA,sha256=w-ll8-C6yznvAJKkIyof53UFTDki9OGprblylgnCglc,20914
342
+ uk_bin_collection-0.148.6.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
343
+ uk_bin_collection-0.148.6.dist-info/entry_points.txt,sha256=36WCSGMWSc916S3Hi1ZkazzDKHaJ6CD-4fCEFm5MIao,90
344
+ uk_bin_collection-0.148.6.dist-info/RECORD,,