uk_bin_collection 0.152.2__py3-none-any.whl → 0.152.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1693,9 +1693,10 @@
1693
1693
  "LAD24CD": "E06000012"
1694
1694
  },
1695
1695
  "NorthHertfordshireDistrictCouncil": {
1696
- "house_number": "2",
1696
+ "house_number": "22",
1697
1697
  "postcode": "SG6 4BJ",
1698
1698
  "url": "https://www.north-herts.gov.uk",
1699
+ "web_driver": "http://selenium:4444",
1699
1700
  "wiki_name": "North Hertfordshire",
1700
1701
  "wiki_note": "Pass the house number and postcode in their respective parameters.",
1701
1702
  "LAD24CD": "E07000099"
@@ -2179,9 +2180,10 @@
2179
2180
  "LAD24CD": "E07000179"
2180
2181
  },
2181
2182
  "SouthRibbleCouncil": {
2182
- "uprn": "010013246384",
2183
- "url": "https://www.southribble.gov.uk",
2184
- "wiki_command_url_override": "https://www.southribble.gov.uk",
2183
+ "uprn": "10013243496",
2184
+ "postcode": "PR26 7RZ",
2185
+ "url": "https://forms.chorleysouthribble.gov.uk/xfp/form/70",
2186
+ "wiki_command_url_override": "https://forms.chorleysouthribble.gov.uk/xfp/form/70",
2185
2187
  "wiki_name": "South Ribble",
2186
2188
  "wiki_note": "You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find your UPRN.",
2187
2189
  "LAD24CD": "E07000126"
@@ -37,38 +37,55 @@ class CouncilClass(AbstractGetBinDataClass):
37
37
  paragraphs = bin_row.find_all("p")
38
38
 
39
39
  for p in paragraphs:
40
- if p.get_text() and "Next collection:" in p.get_text():
41
- date_str = p.get_text().replace("Next collection:", "").strip()
42
- # Extract day number from date string (e.g. "2" from "Friday 2nd May")
43
- day_number = int("".join(filter(str.isdigit, date_str)))
44
- # Replace ordinal in date string with plain number
45
- date_str = date_str.replace(
46
- get_date_with_ordinal(day_number), str(day_number)
40
+ # Check for both singular and plural "Next collection(s):"
41
+ if p.get_text() and (
42
+ "Next collection:" in p.get_text()
43
+ or "Next collections:" in p.get_text()
44
+ ):
45
+ # Extract collection dates
46
+ date_text = (
47
+ p.get_text()
48
+ .replace("Next collection:", "")
49
+ .replace("Next collections:", "")
50
+ .strip()
47
51
  )
48
52
 
49
- try:
50
- # Parse date with full format
51
- bin_date = datetime.strptime(date_str, "%A %d %B")
52
-
53
- # Add current year since it's not in the date string
54
- current_year = datetime.now().year
55
- bin_date = bin_date.replace(year=current_year)
56
-
57
- # If the date is in the past, it's probably for next year
58
- if bin_date < datetime.now():
59
- bin_date = bin_date.replace(year=current_year + 1)
60
-
61
- collections.append((bin_type, bin_date))
62
- print(
63
- f"Successfully parsed date for {bin_type}: {bin_date}"
64
- )
65
- break
66
-
67
- except ValueError as e:
68
- print(
69
- f"Failed to parse date '{date_str}' for {bin_type}: {e}"
70
- )
71
- continue
53
+ # Split multiple dates if comma-separated
54
+ date_strings = [date.strip() for date in date_text.split(",")]
55
+
56
+ for date_str in date_strings:
57
+ try:
58
+ # Extract day number from date string (e.g. "2" from "Tuesday 27th May")
59
+ day_number = int("".join(filter(str.isdigit, date_str)))
60
+ # Replace ordinal in date string with plain number
61
+ date_str = date_str.replace(
62
+ get_date_with_ordinal(day_number), str(day_number)
63
+ )
64
+
65
+ # Parse date with full format
66
+ bin_date = datetime.strptime(date_str, "%A %d %B")
67
+
68
+ # Add current year since it's not in the date string
69
+ current_year = datetime.now().year
70
+ bin_date = bin_date.replace(year=current_year)
71
+
72
+ # If the date is in the past, it's probably for next year
73
+ if bin_date < datetime.now():
74
+ bin_date = bin_date.replace(year=current_year + 1)
75
+
76
+ collections.append((bin_type, bin_date))
77
+ print(
78
+ f"Successfully parsed date for {bin_type}: {bin_date}"
79
+ )
80
+
81
+ except ValueError as e:
82
+ print(
83
+ f"Failed to parse date '{date_str}' for {bin_type}: {e}"
84
+ )
85
+ continue
86
+
87
+ # Found and processed the collection dates, so break the loop
88
+ break
72
89
 
73
90
  except Exception as e:
74
91
  print(f"Error processing bin row: {e}")
@@ -130,4 +130,22 @@ class CouncilClass(AbstractGetBinDataClass):
130
130
  key=lambda x: datetime.strptime(x.get("collectionDate"), date_format)
131
131
  )
132
132
 
133
+ data["bins"].sort(
134
+ key=lambda x: datetime.strptime(x.get("collectionDate"), date_format)
135
+ )
136
+
137
+ # Deduplicate the bins based on type and collection date
138
+ # Feels a bit hacky, but fixes
139
+ # https://github.com/robbrad/UKBinCollectionData/issues/1436
140
+ unique_bins = []
141
+ seen = set()
142
+ for bin_item in data["bins"]:
143
+ # Create a unique identifier for each bin entry
144
+ bin_key = (bin_item["type"], bin_item["collectionDate"])
145
+ if bin_key not in seen:
146
+ seen.add(bin_key)
147
+ unique_bins.append(bin_item)
148
+
149
+ data["bins"] = unique_bins
150
+
133
151
  return data
@@ -77,8 +77,13 @@ class CouncilClass(AbstractGetBinDataClass):
77
77
  return data
78
78
 
79
79
  except Exception as e:
80
- print(f"Error fetching/parsing data: {str(e)}")
81
- return {"bins": [{"type": "Error", "collectionDate": "2024-01-01"}]}
80
+ import traceback
81
+
82
+ error_message = f"Error fetching/parsing data for Eastleigh: {str(e)}\n{traceback.format_exc()}"
83
+ print(error_message)
84
+ # Use the correct date format for the error fallback
85
+ today = datetime.now().strftime("%d/%m/%Y")
86
+ return {"bins": [{"type": "Error", "collectionDate": today}]}
82
87
  finally:
83
88
  if "driver" in locals():
84
89
  driver.quit()
@@ -1,93 +1,287 @@
1
- import requests
1
+ # direct URL works, but includes a token, so I'm using Selenium
2
+ # https://waste.nc.north-herts.gov.uk/w/webpage/find-bin-collection-day-show-details?webpage_token=c7c7c3cbc2f0478735fc746ca985b8f4221dea31c24dde99e39fb1c556b07788&auth=YTc5YTAwZmUyMGQ3&id=1421457
3
+
4
+ import re
5
+ import time
6
+ from datetime import datetime
7
+
2
8
  from bs4 import BeautifulSoup
9
+ from dateutil.parser import parse
10
+ from selenium.common.exceptions import NoSuchElementException, TimeoutException
11
+ from selenium.webdriver.common.by import By
12
+ from selenium.webdriver.common.keys import Keys
13
+ from selenium.webdriver.support import expected_conditions as EC
14
+ from selenium.webdriver.support.ui import Select
15
+ from selenium.webdriver.support.wait import WebDriverWait
3
16
 
4
17
  from uk_bin_collection.uk_bin_collection.common import *
5
18
  from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
6
19
 
7
20
 
8
- # import the wonderful Beautiful Soup and the URL grabber
9
21
  class CouncilClass(AbstractGetBinDataClass):
10
- """
11
- Concrete classes have to implement all abstract operations of the
12
- base class. They can also override some operations with a default
13
- implementation.
14
- """
15
22
 
16
23
  def parse_data(self, page: str, **kwargs) -> dict:
24
+ driver = None
25
+ try:
26
+ data = {"bins": []}
27
+
28
+ user_paon = kwargs.get("paon")
29
+ postcode = kwargs.get("postcode")
30
+ web_driver = kwargs.get("web_driver")
31
+ headless = kwargs.get("headless")
32
+ url = "https://waste.nc.north-herts.gov.uk/w/webpage/find-bin-collection-day-input-address"
33
+
34
+ driver = create_webdriver(web_driver, headless, None, __name__)
35
+ driver.get(url)
36
+
37
+ WebDriverWait(driver, 10).until(
38
+ lambda d: d.execute_script("return document.readyState") == "complete"
39
+ )
40
+
41
+ # Define the wait variable
42
+ wait = WebDriverWait(
43
+ driver, 20
44
+ ) # Create the wait object with a 20-second timeout
45
+
46
+ # Enter postcode - try different approaches for reliability
47
+ # print("Looking for postcode input...")
48
+
49
+ postcode_input = wait.until(
50
+ EC.element_to_be_clickable(
51
+ (
52
+ By.CSS_SELECTOR,
53
+ "input.relation_path_type_ahead_search.form-control",
54
+ )
55
+ ),
56
+ message="Postcode input not found by class",
57
+ )
58
+ postcode_input.clear()
59
+ postcode_input.send_keys(postcode)
60
+ # print(f"Entered postcode: {postcode}")
61
+
62
+ # Wait for the dropdown to load
63
+ # print("Waiting for address list to populate...")
64
+ try:
65
+ # Wait for the results to appear
66
+ wait.until(
67
+ EC.presence_of_element_located(
68
+ (By.CSS_SELECTOR, ".relation_path_type_ahead_results_holder")
69
+ ),
70
+ message="Address results container not found",
71
+ )
72
+
73
+ # Wait for list items to appear
74
+ wait.until(
75
+ EC.presence_of_all_elements_located(
76
+ (By.CSS_SELECTOR, ".relation_path_type_ahead_results_holder li")
77
+ ),
78
+ message="No address items found in the list",
79
+ )
80
+ # print("Address list populated successfully")
81
+
82
+ # Search for user_paon in the address list using aria-label attribute
83
+ try:
84
+ # Use XPath to look for aria-label containing user_paon
85
+ address_xpath = (
86
+ f"//li[@aria-label and contains(@aria-label, '{user_paon}')]"
87
+ )
88
+ matching_address = wait.until(
89
+ EC.element_to_be_clickable((By.XPATH, address_xpath)),
90
+ message=f"No address containing '{user_paon}' found in aria-label attributes",
91
+ )
92
+ # print(f"Found matching address: {matching_address.get_attribute('aria-label')}")
93
+ matching_address.click()
94
+ # print("Clicked on matching address")
95
+
96
+ # Allow time for the selection to take effect
97
+ time.sleep(2)
98
+
99
+ # Find and click the "Select address and continue" button
100
+ continue_button = wait.until(
101
+ EC.element_to_be_clickable(
102
+ (
103
+ By.CSS_SELECTOR,
104
+ "input.btn.bg-green[value='Select address and continue']",
105
+ )
106
+ ),
107
+ message="Could not find 'Select address and continue' button",
108
+ )
109
+ # print("Found 'Select address and continue' button, clicking it...")
110
+ continue_button.click()
111
+ # print("Clicked on 'Select address and continue' button")
112
+
113
+ # Allow time for the page to load after clicking the button
114
+ time.sleep(3)
115
+ except TimeoutException as e:
116
+ # print(f"Error finding address: {e}")
117
+ raise
118
+ except TimeoutException as e:
119
+ # print(f"Error loading address list: {e}")
120
+ raise
121
+
122
+ # After pressing Next button and waiting for page to load
123
+ # print("Looking for schedule list...")
124
+
125
+ # Wait for the page to load - giving it extra time
126
+ time.sleep(5)
127
+
128
+ # Use only the selector that we know works
129
+ # print("Looking for bin type elements...")
130
+ try:
131
+ bin_type_selector = (
132
+ By.CSS_SELECTOR,
133
+ "div.formatting_bold.formatting_size_bigger.formatting span.value-as-text",
134
+ )
135
+ WebDriverWait(driver, 15).until(
136
+ EC.presence_of_element_located(bin_type_selector)
137
+ )
138
+ # print(f"Found bin type elements with selector: {bin_type_selector}")
139
+ except TimeoutException:
140
+ # print("Could not find bin type elements. Taking screenshot for debugging...")
141
+ screenshot_path = f"bin_type_error_{int(time.time())}.png"
142
+ driver.save_screenshot(screenshot_path)
143
+ # print(f"Screenshot saved to {screenshot_path}")
144
+
145
+ # Create BS4 object from driver's page source
146
+ # print("Parsing page with BeautifulSoup...")
147
+ soup = BeautifulSoup(driver.page_source, features="html.parser")
148
+
149
+ # Initialize data dictionary
150
+ data = {"bins": []}
151
+
152
+ # Looking for bin types in the exact HTML structure
153
+ bin_type_elements = soup.select(
154
+ "div.formatting_bold.formatting_size_bigger.formatting span.value-as-text"
155
+ )
156
+ # print(f"Found {len(bin_type_elements)} bin type elements")
157
+
158
+ # Look specifically for date elements with the exact structure
159
+ date_elements = soup.select("div.col-sm-12.font-xs-3xl span.value-as-text")
160
+ hidden_dates = soup.select(
161
+ "div.col-sm-12.font-xs-3xl input[type='hidden'][value*='/']"
162
+ )
163
+
164
+ # print(f"Found {len(bin_type_elements)} bin types and {len(date_elements)} date elements")
165
+
166
+ # We need a smarter way to match bin types with their dates
167
+ bin_count = 0
168
+
169
+ # Map of bin types to their collection dates
170
+ bin_date_map = {}
171
+
172
+ # Extract all date strings that look like actual dates
173
+ date_texts = []
174
+ date_pattern = re.compile(
175
+ r"(?:Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday)\s+\d+(?:st|nd|rd|th)?\s+\w+\s+\d{4}",
176
+ re.IGNORECASE,
177
+ )
17
178
 
18
- user_postcode = kwargs.get("postcode")
19
- user_paon = kwargs.get("paon")
20
- check_postcode(user_postcode)
21
- check_paon(user_paon)
22
- bindata = {"bins": []}
179
+ for element in date_elements:
180
+ text = element.get_text(strip=True)
181
+ if date_pattern.search(text):
182
+ date_texts.append(text)
183
+ # print(f"Found valid date text: {text}")
23
184
 
24
- URI = "https://uhtn-wrp.whitespacews.com/"
185
+ # Find hidden date inputs with values in DD/MM/YYYY format
186
+ hidden_date_values = []
187
+ for hidden in hidden_dates:
188
+ value = hidden.get("value", "")
189
+ if re.match(r"\d{1,2}/\d{1,2}/\d{4}", value):
190
+ hidden_date_values.append(value)
191
+ # print(f"Found hidden date value: {value}")
25
192
 
26
- session = requests.Session()
193
+ # When filtering date elements
194
+ date_elements = soup.select("div.col-sm-12.font-xs-3xl span.value-as-text")
195
+ valid_date_elements = []
27
196
 
28
- # get link from first page as has some kind of unique hash
29
- r = session.get(
30
- URI,
31
- )
32
- r.raise_for_status()
33
- soup = BeautifulSoup(r.text, features="html.parser")
197
+ for element in date_elements:
198
+ text = element.get_text(strip=True)
199
+ if contains_date(text):
200
+ valid_date_elements.append(element)
201
+ # print(f"Found valid date element: {text}")
202
+ else:
203
+ pass
204
+ # print(f"Skipping non-date element: {text}")
34
205
 
35
- alink = soup.find("a", text="Find my bin collection day")
206
+ # print(f"Found {len(bin_type_elements)} bin types and {len(valid_date_elements)} valid date elements")
36
207
 
37
- if alink is None:
38
- raise Exception("Initial page did not load correctly")
208
+ # When processing each bin type
209
+ for i, bin_type_elem in enumerate(bin_type_elements):
210
+ bin_type = bin_type_elem.get_text(strip=True)
39
211
 
40
- # greplace 'seq' query string to skip next step
41
- nextpageurl = alink["href"].replace("seq=1", "seq=2")
212
+ # Try to find a date for this bin type
213
+ date_text = None
42
214
 
43
- data = {
44
- "address_name_number": user_paon,
45
- "address_postcode": user_postcode,
46
- }
215
+ # Look for a valid date element
216
+ if i < len(valid_date_elements):
217
+ date_elem = valid_date_elements[i]
218
+ date_text = date_elem.get_text(strip=True)
47
219
 
48
- # get list of addresses
49
- r = session.post(nextpageurl, data)
50
- r.raise_for_status()
220
+ # If we don't have a valid date yet, try using the hidden input
221
+ if not date_text or not contains_date(date_text):
222
+ if i < len(hidden_dates):
223
+ date_value = hidden_dates[i].get("value")
224
+ if contains_date(date_value):
225
+ date_text = date_value
51
226
 
52
- soup = BeautifulSoup(r.text, features="html.parser")
227
+ # Skip if we don't have a valid date
228
+ if not date_text or not contains_date(date_text):
229
+ # print(f"No valid date found for bin type: {bin_type}")
230
+ continue
53
231
 
54
- # get first address (if you don't enter enough argument values this won't find the right address)
55
- alink = soup.find("div", id="property_list").find("a")
232
+ # print(f"Found bin type: {bin_type} with date: {date_text}")
56
233
 
57
- if alink is None:
58
- raise Exception("Address not found")
234
+ try:
235
+ # Clean up the date text
236
+ date_text = remove_ordinal_indicator_from_date_string(date_text)
59
237
 
60
- nextpageurl = URI + alink["href"]
238
+ # Try to parse the date
239
+ try:
240
+ collection_date = datetime.strptime(
241
+ date_text, "%A %d %B %Y"
242
+ ).date()
243
+ except ValueError:
244
+ try:
245
+ collection_date = datetime.strptime(
246
+ date_text, "%d/%m/%Y"
247
+ ).date()
248
+ except ValueError:
249
+ # Last resort
250
+ collection_date = parse(date_text).date()
61
251
 
62
- # get collection page
63
- r = session.get(
64
- nextpageurl,
65
- )
66
- r.raise_for_status()
67
- soup = BeautifulSoup(r.text, features="html.parser")
252
+ # Create bin entry
253
+ bin_entry = {
254
+ "type": bin_type,
255
+ "collectionDate": collection_date.strftime(date_format),
256
+ }
68
257
 
69
- if soup.find("span", id="waste-hint"):
70
- raise Exception("No scheduled services at this address")
258
+ # Add to data
259
+ data["bins"].append(bin_entry)
260
+ bin_count += 1
261
+ # print(f"Added bin entry: {bin_entry}")
71
262
 
72
- u1s = soup.find("section", id="scheduled-collections").find_all("u1")
263
+ except Exception as e:
264
+ pass
265
+ # print(f"Error parsing date '{date_text}': {str(e)}")
73
266
 
74
- for u1 in u1s:
75
- lis = u1.find_all("li", recursive=False)
267
+ # print(f"Successfully parsed {bin_count} bin collections")
76
268
 
77
- date = lis[1].text.replace("\n", "")
78
- bin_type = lis[2].text.replace("\n", "")
269
+ if not data["bins"]:
270
+ # print("No bin data found. Saving page for debugging...")
271
+ with open(f"debug_page_{int(time.time())}.html", "w") as f:
272
+ f.write(driver.page_source)
273
+ driver.save_screenshot(f"final_error_screenshot_{int(time.time())}.png")
274
+ raise ValueError(
275
+ "No bin collection data could be extracted from the page"
276
+ )
79
277
 
80
- dict_data = {
81
- "type": bin_type,
82
- "collectionDate": datetime.strptime(
83
- date,
84
- "%d/%m/%Y",
85
- ).strftime(date_format),
86
- }
87
- bindata["bins"].append(dict_data)
278
+ # Sort the bin collections by date
279
+ data["bins"].sort(
280
+ key=lambda x: datetime.strptime(x.get("collectionDate"), date_format)
281
+ )
88
282
 
89
- bindata["bins"].sort(
90
- key=lambda x: datetime.strptime(x.get("collectionDate"), date_format)
91
- )
283
+ return data
92
284
 
93
- return bindata
285
+ except Exception as e:
286
+ # print(f"Error parsing bin collection data: {e}")
287
+ raise
@@ -1,29 +1,11 @@
1
- from typing import Dict, List, Any, Optional
2
- from bs4 import BeautifulSoup
3
- from dateutil.relativedelta import relativedelta
4
1
  import requests
5
- import logging
6
- import re
7
- from datetime import datetime
8
- from uk_bin_collection.uk_bin_collection.common import *
9
- from dateutil.parser import parse
2
+ from bs4 import BeautifulSoup
10
3
 
11
- from uk_bin_collection.uk_bin_collection.common import check_uprn, check_postcode
4
+ from uk_bin_collection.uk_bin_collection.common import *
12
5
  from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
13
6
 
14
7
 
15
- def get_token(page) -> str:
16
- """
17
- Get a __token to include in the form data
18
- :param page: Page html
19
- :return: Form __token
20
- """
21
- soup = BeautifulSoup(page.text, features="html.parser")
22
- soup.prettify()
23
- token = soup.find("input", {"name": "__token"}).get("value")
24
- return token
25
-
26
-
8
+ # import the wonderful Beautiful Soup and the URL grabber
27
9
  class CouncilClass(AbstractGetBinDataClass):
28
10
  """
29
11
  Concrete classes have to implement all abstract operations of the
@@ -31,69 +13,38 @@ class CouncilClass(AbstractGetBinDataClass):
31
13
  implementation.
32
14
  """
33
15
 
34
- def get_data(self, url: str) -> str:
35
- """This method makes the request to the council
36
-
37
- Keyword arguments:
38
- url -- the url to get the data from
39
- """
40
- # Set a user agent so we look like a browser ;-)
41
- user_agent = (
42
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
43
- "Chrome/108.0.0.0 Safari/537.36"
44
- )
45
- headers = {"User-Agent": user_agent}
46
- requests.packages.urllib3.disable_warnings()
16
+ def parse_data(self, page: str, **kwargs) -> dict:
47
17
 
48
- # Make the Request - change the URL - find out your property number
49
- try:
50
- session = requests.Session()
51
- session.headers.update(headers)
52
- full_page = session.get(url)
53
- return full_page
54
- except requests.exceptions.HTTPError as errh:
55
- logging.error(f"Http Error: {errh}")
56
- raise
57
- except requests.exceptions.ConnectionError as errc:
58
- logging.error(f"Error Connecting: {errc}")
59
- raise
60
- except requests.exceptions.Timeout as errt:
61
- logging.error(f"Timeout Error: {errt}")
62
- raise
63
- except requests.exceptions.RequestException as err:
64
- logging.error(f"Oops: Something Else {err}")
65
- raise
18
+ user_uprn = kwargs.get("uprn")
19
+ user_postcode = kwargs.get("postcode")
20
+ check_uprn(user_uprn)
21
+ check_postcode(user_postcode)
22
+ bindata = {"bins": []}
66
23
 
67
- def parse_data(self, page: str, **kwargs: Any) -> Dict[str, List[Dict[str, str]]]:
68
- uprn: Optional[str] = kwargs.get("uprn")
69
- postcode: Optional[str] = kwargs.get("postcode")
24
+ session_uri = "https://forms.chorleysouthribble.gov.uk/xfp/form/70"
25
+ URI = "https://forms.chorleysouthribble.gov.uk/xfp/form/70#qc576c657112a8277ba6f954ebc0490c946168363_0"
70
26
 
71
- if uprn is None:
72
- raise ValueError("UPRN is required and must be a non-empty string.")
73
- if postcode is None:
74
- raise ValueError("Postcode is required and must be a non-empty string.")
27
+ session = requests.Session()
28
+ token_response = session.get(session_uri)
29
+ soup = BeautifulSoup(token_response.text, "html.parser")
30
+ token = soup.find("input", {"name": "__token"}).attrs["value"]
75
31
 
76
- check_uprn(uprn)
77
- check_postcode(postcode)
78
-
79
- values = {
80
- "__token": get_token(page),
81
- "page": "491",
32
+ form_data = {
33
+ "__token": token,
34
+ "page": "196",
82
35
  "locale": "en_GB",
83
- "q1f8ccce1d1e2f58649b4069712be6879a839233f_0_0": postcode,
84
- "q1f8ccce1d1e2f58649b4069712be6879a839233f_1_0": uprn,
36
+ "qc576c657112a8277ba6f954ebc0490c946168363_0_0": user_postcode,
37
+ "qc576c657112a8277ba6f954ebc0490c946168363_1_0": user_uprn,
85
38
  "next": "Next",
86
39
  }
87
- headers = {"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64)"}
88
- requests.packages.urllib3.disable_warnings()
89
- response = requests.request(
90
- "POST",
91
- "https://forms.chorleysouthribble.gov.uk/xfp/form/70",
92
- headers=headers,
93
- data=values,
94
- )
95
40
 
96
- soup = BeautifulSoup(response.text, features="html.parser")
41
+ collection_response = session.post(URI, data=form_data)
42
+
43
+ #collection_soup = BeautifulSoup(collection_response.text, "html.parser")
44
+
45
+
46
+ soup = BeautifulSoup(collection_response.text, "html.parser")
47
+ #print(soup)
97
48
 
98
49
  rows = soup.find("table").find_all("tr")
99
50
 
@@ -103,31 +54,23 @@ class CouncilClass(AbstractGetBinDataClass):
103
54
  # Loops the Rows
104
55
  for row in rows:
105
56
  cells = row.find_all("td")
57
+
106
58
  if cells:
107
59
  bin_type = cells[0].get_text(strip=True)
108
60
  collection_next = cells[1].get_text(strip=True)
109
61
 
110
- collection_date = re.findall(r"\(.*?\)", collection_next)
111
-
112
- if len(collection_date) != 1:
113
- continue
114
-
115
- collection_date_obj = parse(
116
- re.sub(r"[()]", "", collection_date[0])
117
- ).date()
118
-
119
- # since we only have the next collection day, if the parsed date is in the past,
120
- # assume the day is instead next month
121
- if collection_date_obj < datetime.now().date():
122
- collection_date_obj += relativedelta(months=1)
123
-
124
- # Make each Bin element in the JSON
125
- dict_data = {
126
- "type": bin_type,
127
- "collectionDate": collection_date_obj.strftime(date_format),
128
- }
129
-
130
- # Add data to the main JSON Wrapper
131
- data["bins"].append(dict_data)
132
-
62
+ if len(collection_next) != 1:
63
+ collection_date_obj = datetime.strptime(collection_next, "%d/%m/%y").date()
64
+ # since we only have the next collection day, if the parsed date is in the past,
65
+ # assume the day is instead next month
66
+ if collection_date_obj < datetime.now().date():
67
+ collection_date_obj += relativedelta(months=1)
68
+ # Make each Bin element in the JSON
69
+ dict_data = {
70
+ "type": bin_type,
71
+ "collectionDate": collection_date_obj.strftime("%d/%m/%Y"),
72
+ }
73
+ # Add data to the main JSON Wrapper
74
+ data["bins"].append(dict_data)
75
+ continue
133
76
  return data
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: uk_bin_collection
3
- Version: 0.152.2
3
+ Version: 0.152.4
4
4
  Summary: Python Lib to collect UK Bin Data
5
5
  Author: Robert Bradley
6
6
  Author-email: robbrad182@gmail.com
@@ -7,7 +7,7 @@ uk_bin_collection/tests/council_feature_input_parity.py,sha256=DO6Mk4ImYgM5ZCZ-c
7
7
  uk_bin_collection/tests/features/environment.py,sha256=VQZjJdJI_kZn08M0j5cUgvKT4k3iTw8icJge1DGOkoA,127
8
8
  uk_bin_collection/tests/features/validate_council_outputs.feature,sha256=SJK-Vc737hrf03tssxxbeg_JIvAH-ddB8f6gU1LTbuQ,251
9
9
  uk_bin_collection/tests/generate_map_test_results.py,sha256=CKnGK2ZgiSXomRGkomX90DitgMP-X7wkHhyKORDcL2E,1144
10
- uk_bin_collection/tests/input.json,sha256=f8Jq_9vzVy3LFAZJ1nRZSwmJ34lOhqG53ee0_LJu5HI,132470
10
+ uk_bin_collection/tests/input.json,sha256=hy8tlgmuKQWiq3PxNE16kaYHk6XxesuFkBvtpk-9oIw,132590
11
11
  uk_bin_collection/tests/output.schema,sha256=ZwKQBwYyTDEM4G2hJwfLUVM-5v1vKRvRK9W9SS1sd18,1086
12
12
  uk_bin_collection/tests/step_defs/step_helpers/file_handler.py,sha256=Ygzi4V0S1MIHqbdstUlIqtRIwnynvhu4UtpweJ6-5N8,1474
13
13
  uk_bin_collection/tests/step_defs/test_validate_council.py,sha256=VZ0a81sioJULD7syAYHjvK_-nT_Rd36tUyzPetSA0gk,3475
@@ -19,7 +19,7 @@ uk_bin_collection/uk_bin_collection/collect_data.py,sha256=dB7wWXsJX4fm5bIf84lex
19
19
  uk_bin_collection/uk_bin_collection/common.py,sha256=izotgwavB08pUWisNL3wqcBrE9E1-bdrq-v6YKyriDE,11034
20
20
  uk_bin_collection/uk_bin_collection/councils/AberdeenCityCouncil.py,sha256=Je8VwVLK9KnYl9vqf2gWJ7ZYDgUq3A7caDiIzk5Xof8,4194
21
21
  uk_bin_collection/uk_bin_collection/councils/AberdeenshireCouncil.py,sha256=aO1CSdyqa8oAD0fB79y1Q9bikAWCP_JFa7CsyTa2j9s,1655
22
- uk_bin_collection/uk_bin_collection/councils/AdurAndWorthingCouncils.py,sha256=mLbfq8a2jilFfnNs0ujj49V8to2bNERgJp_4Ugbro-g,3418
22
+ uk_bin_collection/uk_bin_collection/councils/AdurAndWorthingCouncils.py,sha256=N0BcdTxRlCiCh6SPluPK3wMlNDli8_wJgUOOwe4hgSE,4250
23
23
  uk_bin_collection/uk_bin_collection/councils/AmberValleyBoroughCouncil.py,sha256=mTeluIIEcuxLxhfDQ95A1fp8RM6AkJT5tRGZPUbYGdk,1853
24
24
  uk_bin_collection/uk_bin_collection/councils/AngusCouncil.py,sha256=YlhAnxkRAAvrwbUvleNKUuLROcwMTps2eMHElpuctm4,5894
25
25
  uk_bin_collection/uk_bin_collection/councils/AntrimAndNewtonabbeyCouncil.py,sha256=Hp5pteaC5RjL5ZqPZ564S9WQ6ZTKLMO6Dl_fxip2TUc,1653
@@ -50,7 +50,7 @@ uk_bin_collection/uk_bin_collection/councils/BolsoverCouncil.py,sha256=_NZuSvSbl
50
50
  uk_bin_collection/uk_bin_collection/councils/BoltonCouncil.py,sha256=WI68r8jB0IHPUT4CgmZMtng899AAMFTxkyTdPg9yLF8,4117
51
51
  uk_bin_collection/uk_bin_collection/councils/BostonBoroughCouncil.py,sha256=8xv6FMNj8Qgwn5K0nMdB5X8hkcNFzhcJ48DMordflJY,5631
52
52
  uk_bin_collection/uk_bin_collection/councils/BracknellForestCouncil.py,sha256=Llo1rULaAZ8rChVYZqXFFLo7CN6vbT0ULUJD6ActouY,9015
53
- uk_bin_collection/uk_bin_collection/councils/BradfordMDC.py,sha256=BEWS2c62cOsf26jqn1AkNUvVmc5AlUADYLaQuPn9RY4,5456
53
+ uk_bin_collection/uk_bin_collection/councils/BradfordMDC.py,sha256=qtCGHIwKDJQw0SNvQr0EZub21PFPDsdcxABOPv_MC6s,6114
54
54
  uk_bin_collection/uk_bin_collection/councils/BraintreeDistrictCouncil.py,sha256=2vYHilpI8mSwC2Ykdr1gxYAN3excDWqF6AwtGbkwbTw,2441
55
55
  uk_bin_collection/uk_bin_collection/councils/BrecklandCouncil.py,sha256=PX6A_pDvaN109aSNWmEhm88GFKfkClIkmbwGURWvsks,1744
56
56
  uk_bin_collection/uk_bin_collection/councils/BrentCouncil.py,sha256=BsP7V0vezteX0WAxcxqMf3g6ro-J78W6hubefALRMyg,5222
@@ -114,7 +114,7 @@ uk_bin_collection/uk_bin_collection/councils/EastRidingCouncil.py,sha256=oL-Nqri
114
114
  uk_bin_collection/uk_bin_collection/councils/EastStaffordshireBoroughCouncil.py,sha256=s13zlAN9Rac-RVHNFLIjIY0X8C6sPTNS37EL2t6vXw8,3692
115
115
  uk_bin_collection/uk_bin_collection/councils/EastSuffolkCouncil.py,sha256=qQ0oOfGd0sWcczse_B22YoeL9uj3og8v3UJLt_Sx29c,4353
116
116
  uk_bin_collection/uk_bin_collection/councils/EastbourneBoroughCouncil.py,sha256=t2-Ri58feN4BHZ-yZx83QjmWuxlCkF7iu9UvXJ2rVp8,2669
117
- uk_bin_collection/uk_bin_collection/councils/EastleighBoroughCouncil.py,sha256=45oT8K8OoTx47FVHYqSl5sw8b-NQj8PJ11ugZTgf4Bg,3171
117
+ uk_bin_collection/uk_bin_collection/councils/EastleighBoroughCouncil.py,sha256=ymHYdRVlTNNYIhZigvnwsEZUpJIecjxV0HrZm7lEdpY,3397
118
118
  uk_bin_collection/uk_bin_collection/councils/EdinburghCityCouncil.py,sha256=YRjNgevnCxfaAIU8BV9dkqG17NiT6S-hp7l-1rdLVgQ,3150
119
119
  uk_bin_collection/uk_bin_collection/councils/ElmbridgeBoroughCouncil.py,sha256=TgBOaReHWBbm0avV7HqRf0x7cxDe9cacTUcP9TFFprs,3005
120
120
  uk_bin_collection/uk_bin_collection/councils/EnfieldCouncil.py,sha256=2yR5p-kdApOm1gHiynNECP0jQDvaYHOiT6MAQJAvunE,6144
@@ -213,7 +213,7 @@ uk_bin_collection/uk_bin_collection/councils/NorthAyrshireCouncil.py,sha256=o8zv
213
213
  uk_bin_collection/uk_bin_collection/councils/NorthDevonCountyCouncil.py,sha256=tgJKIvu7nnCAHu_HImfG5SQABD6ygKFqrZU-ZoC6ObY,6260
214
214
  uk_bin_collection/uk_bin_collection/councils/NorthEastDerbyshireDistrictCouncil.py,sha256=BfNpYcjG3z0Yz8OYN6NkfzvZ5k1FI-80D-rv211kPPU,5449
215
215
  uk_bin_collection/uk_bin_collection/councils/NorthEastLincs.py,sha256=fYf438VZIaOaqPSwdTTWVjFTdrI0jGfFsxVzOc-QdkA,1817
216
- uk_bin_collection/uk_bin_collection/councils/NorthHertfordshireDistrictCouncil.py,sha256=dFgvZqVKEVEP0zSPeh2s9xIWSCGbhYHpXn2U6Nk0HXM,2847
216
+ uk_bin_collection/uk_bin_collection/councils/NorthHertfordshireDistrictCouncil.py,sha256=Zps52bZroibL1h0h7WKD1aKJzQD7-OLwd9tHXDQkDFg,12263
217
217
  uk_bin_collection/uk_bin_collection/councils/NorthKestevenDistrictCouncil.py,sha256=vYOCerJXr9LTP6F2wm4vpYNYbQaWNZ6yfHEQ33N_hTw,1681
218
218
  uk_bin_collection/uk_bin_collection/councils/NorthLanarkshireCouncil.py,sha256=npK1V8D3SLNTSSKkfEpEPvVgXDFyhH_tAsuGogsVKQY,1763
219
219
  uk_bin_collection/uk_bin_collection/councils/NorthLincolnshireCouncil.py,sha256=MPzrfdo9YQFVlqBUOM-jDQkacz2DXnygLILQ_ojZeJo,2543
@@ -271,7 +271,7 @@ uk_bin_collection/uk_bin_collection/councils/SouthKestevenDistrictCouncil.py,sha
271
271
  uk_bin_collection/uk_bin_collection/councils/SouthLanarkshireCouncil.py,sha256=fj-eZI0yrvQVCv8GvhcovZ3b9bV6Xv_ws3IunWjnv4U,3126
272
272
  uk_bin_collection/uk_bin_collection/councils/SouthNorfolkCouncil.py,sha256=C2qIZjjbl9JnuukX9OH2RbfP0hSdp3uX76APGY33qKs,4622
273
273
  uk_bin_collection/uk_bin_collection/councils/SouthOxfordshireCouncil.py,sha256=f9d2YDGv5hnN7Ul-u_I63h_BbpBU7CJFdgv-lOviRGc,4031
274
- uk_bin_collection/uk_bin_collection/councils/SouthRibbleCouncil.py,sha256=QujJYORfiQmfBBUPt3Vnb0ryJy1XUiJtli-jLlUt5fs,4695
274
+ uk_bin_collection/uk_bin_collection/councils/SouthRibbleCouncil.py,sha256=5HSTmrPc-gN3ZzLq6n2VDs_NhtCgXhnGqJkEejDmSHI,2900
275
275
  uk_bin_collection/uk_bin_collection/councils/SouthStaffordshireDistrictCouncil.py,sha256=ACQMHWyamnj1ag3gNF-8Jhp-DKUok1GhFdnzH4nCzwU,3201
276
276
  uk_bin_collection/uk_bin_collection/councils/SouthTynesideCouncil.py,sha256=dxXGrJfg_fn2IPTBgq6Duwy0WY8GYLafMuisaCjOnbs,3426
277
277
  uk_bin_collection/uk_bin_collection/councils/SouthamptonCityCouncil.py,sha256=exNoI-Vun_C5FowCYhZ_600MBUe_OPR7MdGZEMNLL0I,1542
@@ -346,8 +346,8 @@ uk_bin_collection/uk_bin_collection/councils/YorkCouncil.py,sha256=I2kBYMlsD4bId
346
346
  uk_bin_collection/uk_bin_collection/councils/council_class_template/councilclasstemplate.py,sha256=QD4v4xpsEE0QheR_fGaNOIRMc2FatcUfKkkhAhseyVU,1159
347
347
  uk_bin_collection/uk_bin_collection/create_new_council.py,sha256=m-IhmWmeWQlFsTZC4OxuFvtw5ZtB8EAJHxJTH4O59lQ,1536
348
348
  uk_bin_collection/uk_bin_collection/get_bin_data.py,sha256=YvmHfZqanwrJ8ToGch34x-L-7yPe31nB_x77_Mgl_vo,4545
349
- uk_bin_collection-0.152.2.dist-info/LICENSE,sha256=vABBUOzcrgfaTKpzeo-si9YVEun6juDkndqA8RKdKGs,1071
350
- uk_bin_collection-0.152.2.dist-info/METADATA,sha256=TkzzP87OZ_9Mqtlf7CjxJSstnyQJveMZzCV_yXX7zHM,26688
351
- uk_bin_collection-0.152.2.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
352
- uk_bin_collection-0.152.2.dist-info/entry_points.txt,sha256=36WCSGMWSc916S3Hi1ZkazzDKHaJ6CD-4fCEFm5MIao,90
353
- uk_bin_collection-0.152.2.dist-info/RECORD,,
349
+ uk_bin_collection-0.152.4.dist-info/LICENSE,sha256=vABBUOzcrgfaTKpzeo-si9YVEun6juDkndqA8RKdKGs,1071
350
+ uk_bin_collection-0.152.4.dist-info/METADATA,sha256=wK9bVaoTAyW9e1hieARhWnIUnlDdpL6b_h0VNvGP4zw,26688
351
+ uk_bin_collection-0.152.4.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
352
+ uk_bin_collection-0.152.4.dist-info/entry_points.txt,sha256=36WCSGMWSc916S3Hi1ZkazzDKHaJ6CD-4fCEFm5MIao,90
353
+ uk_bin_collection-0.152.4.dist-info/RECORD,,