uk_bin_collection 0.134.3__py3-none-any.whl → 0.135.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- uk_bin_collection/tests/check_selenium_url_in_input.json.py +209 -0
- uk_bin_collection/tests/input.json +49 -4
- uk_bin_collection/uk_bin_collection/councils/AmberValleyBoroughCouncil.py +60 -0
- uk_bin_collection/uk_bin_collection/councils/BolsoverCouncil.py +298 -0
- uk_bin_collection/uk_bin_collection/councils/CherwellDistrictCouncil.py +75 -0
- uk_bin_collection/uk_bin_collection/councils/ConwyCountyBorough.py +11 -3
- uk_bin_collection/uk_bin_collection/councils/CotswoldDistrictCouncil.py +3 -5
- uk_bin_collection/uk_bin_collection/councils/DerbyshireDalesDistrictCouncil.py +54 -50
- uk_bin_collection/uk_bin_collection/councils/EpsomandEwellBoroughCouncil.py +86 -0
- uk_bin_collection/uk_bin_collection/councils/GloucesterCityCouncil.py +1 -1
- uk_bin_collection/uk_bin_collection/councils/LeedsCityCouncil.py +2 -1
- uk_bin_collection/uk_bin_collection/councils/MiddlesbroughCouncil.py +100 -0
- uk_bin_collection/uk_bin_collection/councils/NeathPortTalbotCouncil.py +2 -0
- uk_bin_collection/uk_bin_collection/councils/RedcarandClevelandCouncil.py +108 -0
- uk_bin_collection/uk_bin_collection/councils/RunnymedeBoroughCouncil.py +54 -0
- uk_bin_collection/uk_bin_collection/councils/SunderlandCityCouncil.py +21 -15
- uk_bin_collection/uk_bin_collection/councils/TendringDistrictCouncil.py +1 -1
- uk_bin_collection/uk_bin_collection/councils/TorridgeDistrictCouncil.py +1 -35
- {uk_bin_collection-0.134.3.dist-info → uk_bin_collection-0.135.1.dist-info}/METADATA +1 -1
- {uk_bin_collection-0.134.3.dist-info → uk_bin_collection-0.135.1.dist-info}/RECORD +23 -15
- {uk_bin_collection-0.134.3.dist-info → uk_bin_collection-0.135.1.dist-info}/LICENSE +0 -0
- {uk_bin_collection-0.134.3.dist-info → uk_bin_collection-0.135.1.dist-info}/WHEEL +0 -0
- {uk_bin_collection-0.134.3.dist-info → uk_bin_collection-0.135.1.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,75 @@
|
|
1
|
+
from datetime import datetime, timedelta
|
2
|
+
|
3
|
+
import requests
|
4
|
+
from bs4 import BeautifulSoup
|
5
|
+
|
6
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
7
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
8
|
+
|
9
|
+
|
10
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
11
|
+
class CouncilClass(AbstractGetBinDataClass):
|
12
|
+
"""
|
13
|
+
Concrete classes have to implement all abstract operations of the
|
14
|
+
base class. They can also override some operations with a default
|
15
|
+
implementation.
|
16
|
+
"""
|
17
|
+
|
18
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
19
|
+
|
20
|
+
user_uprn = kwargs.get("uprn")
|
21
|
+
check_uprn(user_uprn)
|
22
|
+
bindata = {"bins": []}
|
23
|
+
|
24
|
+
URI = f"https://www.cherwell.gov.uk/homepage/129/bin-collection-search?uprn={user_uprn}"
|
25
|
+
|
26
|
+
# Make the GET request
|
27
|
+
response = requests.get(URI)
|
28
|
+
|
29
|
+
soup = BeautifulSoup(response.text, "html.parser")
|
30
|
+
|
31
|
+
def get_full_date(date_str):
|
32
|
+
# Get the current year
|
33
|
+
current_year = datetime.today().year
|
34
|
+
|
35
|
+
date_str = remove_ordinal_indicator_from_date_string(date_str)
|
36
|
+
|
37
|
+
# Convert the input string to a datetime object (assuming the current year first)
|
38
|
+
date_obj = datetime.strptime(f"{date_str} {current_year}", "%d %B %Y")
|
39
|
+
|
40
|
+
# If the date has already passed this year, use next year
|
41
|
+
if date_obj < datetime.today():
|
42
|
+
date_obj = datetime.strptime(
|
43
|
+
f"{date_str} {current_year + 1}", "%d %B %Y"
|
44
|
+
)
|
45
|
+
|
46
|
+
return date_obj.strftime(date_format) # Return in YYYY-MM-DD format
|
47
|
+
|
48
|
+
# print(soup)
|
49
|
+
|
50
|
+
div = soup.find("div", class_="bin-collection-results__tasks")
|
51
|
+
|
52
|
+
for item in div.find_all("li", class_="list__item"):
|
53
|
+
# Extract bin type
|
54
|
+
bin_type_tag = item.find("h3", class_="bin-collection-tasks__heading")
|
55
|
+
bin_type = (
|
56
|
+
"".join(bin_type_tag.find_all(text=True, recursive=False)).strip()
|
57
|
+
if bin_type_tag
|
58
|
+
else "Unknown Bin"
|
59
|
+
)
|
60
|
+
|
61
|
+
# Extract collection date
|
62
|
+
date_tag = item.find("p", class_="bin-collection-tasks__date")
|
63
|
+
collection_date = date_tag.text.strip() if date_tag else "Unknown Date"
|
64
|
+
|
65
|
+
dict_data = {
|
66
|
+
"type": bin_type,
|
67
|
+
"collectionDate": get_full_date(collection_date),
|
68
|
+
}
|
69
|
+
bindata["bins"].append(dict_data)
|
70
|
+
|
71
|
+
bindata["bins"].sort(
|
72
|
+
key=lambda x: datetime.strptime(x.get("collectionDate"), date_format)
|
73
|
+
)
|
74
|
+
|
75
|
+
return bindata
|
@@ -1,12 +1,20 @@
|
|
1
|
+
from datetime import datetime
|
2
|
+
|
3
|
+
import requests
|
1
4
|
from bs4 import BeautifulSoup
|
2
|
-
|
5
|
+
|
3
6
|
from uk_bin_collection.uk_bin_collection.common import *
|
4
|
-
from
|
7
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
5
8
|
|
6
9
|
|
7
10
|
class CouncilClass(AbstractGetBinDataClass):
|
8
11
|
def parse_data(self, page: str, **kwargs) -> dict:
|
9
|
-
|
12
|
+
user_uprn = kwargs.get("uprn")
|
13
|
+
check_uprn(user_uprn)
|
14
|
+
uri = f"https://www.conwy.gov.uk/Contensis-Forms/erf/collection-result-soap-xmas2025.asp?ilangid=1&uprn={user_uprn}"
|
15
|
+
|
16
|
+
response = requests.get(uri)
|
17
|
+
soup = BeautifulSoup(response.content, features="html.parser")
|
10
18
|
data = {"bins": []}
|
11
19
|
|
12
20
|
for bin_section in soup.select('div[class*="containererf"]'):
|
@@ -41,15 +41,13 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
41
41
|
# If you bang in the house number (or property name) and postcode in the box it should find your property
|
42
42
|
wait = WebDriverWait(driver, 60)
|
43
43
|
address_entry_field = wait.until(
|
44
|
-
EC.
|
45
|
-
(By.XPATH, '//*[@id="combobox-input-20"]')
|
46
|
-
)
|
44
|
+
EC.element_to_be_clickable((By.XPATH, '//*[@id="combobox-input-22"]'))
|
47
45
|
)
|
48
46
|
|
49
47
|
address_entry_field.send_keys(str(full_address))
|
50
48
|
|
51
49
|
address_entry_field = wait.until(
|
52
|
-
EC.element_to_be_clickable((By.XPATH, '//*[@id="combobox-input-
|
50
|
+
EC.element_to_be_clickable((By.XPATH, '//*[@id="combobox-input-22"]'))
|
53
51
|
)
|
54
52
|
address_entry_field.click()
|
55
53
|
address_entry_field.send_keys(Keys.BACKSPACE)
|
@@ -57,7 +55,7 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
57
55
|
|
58
56
|
first_found_address = wait.until(
|
59
57
|
EC.element_to_be_clickable(
|
60
|
-
(By.XPATH, '//*[@id="dropdown-element-
|
58
|
+
(By.XPATH, '//*[@id="dropdown-element-22"]/ul')
|
61
59
|
)
|
62
60
|
)
|
63
61
|
|
@@ -1,8 +1,5 @@
|
|
1
|
+
import requests
|
1
2
|
from bs4 import BeautifulSoup
|
2
|
-
from selenium.webdriver.common.by import By
|
3
|
-
from selenium.webdriver.support import expected_conditions as EC
|
4
|
-
from selenium.webdriver.support.ui import Select
|
5
|
-
from selenium.webdriver.support.wait import WebDriverWait
|
6
3
|
|
7
4
|
from uk_bin_collection.uk_bin_collection.common import *
|
8
5
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
@@ -19,63 +16,70 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
19
16
|
def parse_data(self, page: str, **kwargs) -> dict:
|
20
17
|
driver = None
|
21
18
|
try:
|
22
|
-
|
19
|
+
uri = "https://selfserve.derbyshiredales.gov.uk/renderform.aspx?t=103&k=9644C066D2168A4C21BCDA351DA2642526359DFF"
|
23
20
|
|
24
|
-
|
21
|
+
bindata = {"bins": []}
|
25
22
|
|
26
23
|
user_uprn = kwargs.get("uprn")
|
27
24
|
user_postcode = kwargs.get("postcode")
|
28
|
-
web_driver = kwargs.get("web_driver")
|
29
|
-
headless = kwargs.get("headless")
|
30
25
|
check_uprn(user_uprn)
|
31
26
|
check_postcode(user_postcode)
|
32
27
|
|
33
|
-
#
|
34
|
-
|
35
|
-
driver.get(page)
|
28
|
+
# Start a session
|
29
|
+
session = requests.Session()
|
36
30
|
|
37
|
-
|
38
|
-
inputElement_postcode = driver.find_element(
|
39
|
-
By.ID,
|
40
|
-
"ctl00_ContentPlaceHolder1_FF2924TB",
|
41
|
-
)
|
42
|
-
inputElement_postcode.send_keys(user_postcode)
|
43
|
-
|
44
|
-
# Click search button
|
45
|
-
driver.find_element(
|
46
|
-
By.ID,
|
47
|
-
"ctl00_ContentPlaceHolder1_FF2924BTN",
|
48
|
-
).click()
|
49
|
-
|
50
|
-
# Wait for the 'Select address' dropdown to appear and select option matching UPRN
|
51
|
-
dropdown = WebDriverWait(driver, 10).until(
|
52
|
-
EC.presence_of_element_located(
|
53
|
-
(By.ID, "ctl00_ContentPlaceHolder1_FF2924DDL")
|
54
|
-
)
|
55
|
-
)
|
56
|
-
# Create a 'Select' for it, then select the matching URPN option
|
57
|
-
dropdownSelect = Select(dropdown)
|
58
|
-
dropdownSelect.select_by_value("U" + user_uprn)
|
59
|
-
|
60
|
-
# Wait for the submit button to appear, then click it to get the collection dates
|
61
|
-
submit = WebDriverWait(driver, 10).until(
|
62
|
-
EC.presence_of_element_located(
|
63
|
-
(By.ID, "ctl00_ContentPlaceHolder1_btnSubmit")
|
64
|
-
)
|
65
|
-
)
|
66
|
-
submit.click()
|
31
|
+
response = session.get(uri)
|
67
32
|
|
68
|
-
soup = BeautifulSoup(
|
33
|
+
soup = BeautifulSoup(response.content, features="html.parser")
|
69
34
|
|
70
|
-
|
71
|
-
|
72
|
-
.find("
|
73
|
-
|
74
|
-
|
35
|
+
# Function to extract hidden input values
|
36
|
+
def get_hidden_value(soup, name):
|
37
|
+
element = soup.find("input", {"name": name})
|
38
|
+
return element["value"] if element else None
|
39
|
+
|
40
|
+
# Extract the required values
|
41
|
+
data = {
|
42
|
+
"__RequestVerificationToken": get_hidden_value(
|
43
|
+
soup, "__RequestVerificationToken"
|
44
|
+
),
|
45
|
+
"FormGuid": get_hidden_value(soup, "FormGuid"),
|
46
|
+
"ObjectTemplateID": get_hidden_value(soup, "ObjectTemplateID"),
|
47
|
+
"Trigger": "submit",
|
48
|
+
"CurrentSectionID": get_hidden_value(soup, "CurrentSectionID"),
|
49
|
+
"TriggerCtl": "",
|
50
|
+
"FF2924": "U" + user_uprn,
|
51
|
+
"FF2924lbltxt": "Collection address",
|
52
|
+
"FF2924-text": user_postcode,
|
53
|
+
}
|
54
|
+
|
55
|
+
# Print extracted data
|
56
|
+
# print("Extracted Data:", data)
|
57
|
+
|
58
|
+
# Step 2: Submit the extracted data via a POST request
|
59
|
+
headers = {
|
60
|
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
61
|
+
"Referer": uri,
|
62
|
+
"Content-Type": "application/x-www-form-urlencoded",
|
63
|
+
}
|
64
|
+
|
65
|
+
URI = "https://selfserve.derbyshiredales.gov.uk/renderform/Form"
|
66
|
+
|
67
|
+
# Make the POST request
|
68
|
+
post_response = session.post(URI, data=data, headers=headers)
|
69
|
+
|
70
|
+
soup = BeautifulSoup(post_response.content, features="html.parser")
|
71
|
+
|
72
|
+
# print(soup)
|
73
|
+
|
74
|
+
bin_rows = soup.find("div", {"class": "ss_confPanel"})
|
75
|
+
|
76
|
+
bin_rows = bin_rows.find_all("div", {"class": "row"})
|
75
77
|
if bin_rows:
|
76
78
|
for bin_row in bin_rows:
|
77
79
|
bin_data = bin_row.find_all("div")
|
78
80
|
if bin_data and bin_data[0] and bin_data[1]:
|
81
|
+
if bin_data[0].get_text(strip=True) == "Your Collections":
|
82
|
+
continue
|
79
83
|
collection_date = datetime.strptime(
|
80
84
|
bin_data[0].get_text(strip=True), "%A%d %B, %Y"
|
81
85
|
)
|
@@ -83,9 +87,9 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
83
87
|
"type": bin_data[1].get_text(strip=True),
|
84
88
|
"collectionDate": collection_date.strftime(date_format),
|
85
89
|
}
|
86
|
-
|
90
|
+
bindata["bins"].append(dict_data)
|
87
91
|
|
88
|
-
|
92
|
+
bindata["bins"].sort(
|
89
93
|
key=lambda x: datetime.strptime(x.get("collectionDate"), date_format)
|
90
94
|
)
|
91
95
|
except Exception as e:
|
@@ -97,4 +101,4 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
97
101
|
# This block ensures that the driver is closed regardless of an exception
|
98
102
|
if driver:
|
99
103
|
driver.quit()
|
100
|
-
return
|
104
|
+
return bindata
|
@@ -0,0 +1,86 @@
|
|
1
|
+
from datetime import datetime, timedelta
|
2
|
+
|
3
|
+
import requests
|
4
|
+
from bs4 import BeautifulSoup
|
5
|
+
|
6
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
7
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
8
|
+
|
9
|
+
|
10
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
11
|
+
class CouncilClass(AbstractGetBinDataClass):
|
12
|
+
"""
|
13
|
+
Concrete classes have to implement all abstract operations of the
|
14
|
+
base class. They can also override some operations with a default
|
15
|
+
implementation.
|
16
|
+
"""
|
17
|
+
|
18
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
19
|
+
|
20
|
+
user_uprn = kwargs.get("uprn")
|
21
|
+
check_uprn(user_uprn)
|
22
|
+
bindata = {"bins": []}
|
23
|
+
|
24
|
+
URI = f"https://maps.epsom-ewell.gov.uk/myeebc.aspx?action=SetAddress&UniqueId={user_uprn}"
|
25
|
+
|
26
|
+
# Make the GET request
|
27
|
+
response = requests.get(URI)
|
28
|
+
|
29
|
+
soup = BeautifulSoup(response.text, "html.parser")
|
30
|
+
|
31
|
+
# print(soup)
|
32
|
+
|
33
|
+
div = soup.find_all("div", class_="atPanelContent atAlt1 atLast")
|
34
|
+
|
35
|
+
# print(div[1])
|
36
|
+
|
37
|
+
panels = div[1].find_all("div", class_="atPanelData")
|
38
|
+
|
39
|
+
# print(panels)
|
40
|
+
|
41
|
+
def get_full_date(date_str):
|
42
|
+
# Get the current year
|
43
|
+
current_year = datetime.today().year
|
44
|
+
|
45
|
+
# Convert the input string to a datetime object (assuming the current year first)
|
46
|
+
date_obj = datetime.strptime(f"{date_str} {current_year}", "%A %d %B %Y")
|
47
|
+
|
48
|
+
# If the date has already passed this year, use next year
|
49
|
+
if date_obj < datetime.today():
|
50
|
+
date_obj = datetime.strptime(
|
51
|
+
f"{date_str} {current_year + 1}", "%A %d %B %Y"
|
52
|
+
)
|
53
|
+
|
54
|
+
return date_obj.strftime(date_format) # Return in YYYY-MM-DD format
|
55
|
+
|
56
|
+
for panel in panels:
|
57
|
+
bin_type_tag = panel.find("h4") # Extracts bin type
|
58
|
+
date_text = panel.find_all("td") # Extracts collection date
|
59
|
+
|
60
|
+
date_text = date_text[1]
|
61
|
+
|
62
|
+
if bin_type_tag and date_text:
|
63
|
+
bin_type = bin_type_tag.text.strip()
|
64
|
+
try:
|
65
|
+
collection_date = date_text.text.strip().split(":")[1]
|
66
|
+
except IndexError:
|
67
|
+
continue
|
68
|
+
|
69
|
+
bin_type = (
|
70
|
+
(" ".join(bin_type.splitlines())).replace(" ", " ")
|
71
|
+
).lstrip()
|
72
|
+
collection_date = (
|
73
|
+
(" ".join(collection_date.splitlines())).replace(" ", " ")
|
74
|
+
).lstrip()
|
75
|
+
|
76
|
+
dict_data = {
|
77
|
+
"type": bin_type,
|
78
|
+
"collectionDate": get_full_date(collection_date),
|
79
|
+
}
|
80
|
+
bindata["bins"].append(dict_data)
|
81
|
+
|
82
|
+
bindata["bins"].sort(
|
83
|
+
key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y")
|
84
|
+
)
|
85
|
+
|
86
|
+
return bindata
|
@@ -113,7 +113,7 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
113
113
|
bin_data["bins"].append(dict_data)
|
114
114
|
|
115
115
|
bin_data["bins"].sort(
|
116
|
-
key=lambda x: datetime.strptime(x.get("collectionDate"),
|
116
|
+
key=lambda x: datetime.strptime(x.get("collectionDate"), date_format)
|
117
117
|
)
|
118
118
|
|
119
119
|
except Exception as e:
|
@@ -104,7 +104,8 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
104
104
|
bin_types = soup.find_all("ul", class_="binCollectionTimesList")
|
105
105
|
|
106
106
|
for bin_collection_dates in bin_types:
|
107
|
-
|
107
|
+
|
108
|
+
bin_collection_list = bin_collection_dates.find_all("li")
|
108
109
|
|
109
110
|
if bin_collection_list:
|
110
111
|
collection_dates = [
|
@@ -0,0 +1,100 @@
|
|
1
|
+
import time
|
2
|
+
from datetime import datetime
|
3
|
+
|
4
|
+
from bs4 import BeautifulSoup
|
5
|
+
from selenium import webdriver
|
6
|
+
from selenium.webdriver.common.by import By
|
7
|
+
from selenium.webdriver.support import expected_conditions as EC
|
8
|
+
from selenium.webdriver.support.ui import Select, WebDriverWait
|
9
|
+
|
10
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
11
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
12
|
+
|
13
|
+
import re
|
14
|
+
|
15
|
+
class CouncilClass(AbstractGetBinDataClass):
|
16
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
17
|
+
try:
|
18
|
+
data = {"bins": []}
|
19
|
+
|
20
|
+
user_paon = kwargs.get("paon")
|
21
|
+
headless = kwargs.get("headless")
|
22
|
+
web_driver = kwargs.get("web_driver")
|
23
|
+
driver = create_webdriver(web_driver, headless, None, __name__)
|
24
|
+
|
25
|
+
page = "https://www.middlesbrough.gov.uk/recycling-and-rubbish/bin-collection-dates/"
|
26
|
+
driver.get(page)
|
27
|
+
|
28
|
+
address_box = WebDriverWait(driver, timeout=15).until(
|
29
|
+
EC.presence_of_element_located((By.ID, "row-input-0"))
|
30
|
+
)
|
31
|
+
address_box.click()
|
32
|
+
address_box.send_keys(user_paon)
|
33
|
+
|
34
|
+
search_button = WebDriverWait(driver, timeout=15).until(
|
35
|
+
EC.presence_of_element_located((By.ID, "rCbtn-search"))
|
36
|
+
)
|
37
|
+
search_button.click()
|
38
|
+
|
39
|
+
iframe_presense = WebDriverWait(driver, 30).until(
|
40
|
+
EC.presence_of_element_located((By.ID, "recollect-frame"))
|
41
|
+
)
|
42
|
+
driver.switch_to.frame(iframe_presense)
|
43
|
+
|
44
|
+
results = WebDriverWait(driver, timeout=15).until(
|
45
|
+
EC.presence_of_element_located((By.ID, "rCpage-place_calendar"))
|
46
|
+
)
|
47
|
+
|
48
|
+
html_content = driver.page_source
|
49
|
+
soup = BeautifulSoup(html_content, "html.parser")
|
50
|
+
|
51
|
+
calendar_section = soup.find("section", {"id": "alt-calendar-list"})
|
52
|
+
if not calendar_section:
|
53
|
+
raise ValueError("Calendar section not found in the HTML.")
|
54
|
+
|
55
|
+
date_headers = calendar_section.find_all("h3")
|
56
|
+
collection_lists = calendar_section.find_all("ul")
|
57
|
+
|
58
|
+
current_month = datetime.now().month
|
59
|
+
current_year = datetime.now().year
|
60
|
+
|
61
|
+
for date_header, collection_list in zip(date_headers, collection_lists):
|
62
|
+
raw_date = date_header.text.strip()
|
63
|
+
|
64
|
+
# **Regex to match "Wednesday, February 19" format**
|
65
|
+
match = re.match(r"([A-Za-z]+), ([A-Za-z]+) (\d{1,2})", raw_date)
|
66
|
+
|
67
|
+
if match:
|
68
|
+
day_name, month_name, day_number = match.groups() # Extract components
|
69
|
+
extracted_month = datetime.strptime(month_name, "%B").month
|
70
|
+
extracted_day = int(day_number)
|
71
|
+
|
72
|
+
# Handle Dec-Jan rollover: If month is before the current month, assume next year
|
73
|
+
inferred_year = current_year + 1 if extracted_month < current_month else current_year
|
74
|
+
|
75
|
+
# **Correct the raw_date format before parsing**
|
76
|
+
raw_date = f"{day_name}, {month_name} {day_number}, {inferred_year}"
|
77
|
+
|
78
|
+
print(f"DEBUG: Final raw_date before parsing -> {raw_date}") # Debugging output
|
79
|
+
|
80
|
+
# Convert to required format (%d/%m/%Y)
|
81
|
+
try:
|
82
|
+
parsed_date = datetime.strptime(raw_date, "%A, %B %d, %Y")
|
83
|
+
formatted_date = parsed_date.strftime(date_format)
|
84
|
+
except ValueError:
|
85
|
+
raise ValueError(f"Date format error after inference: {raw_date}")
|
86
|
+
|
87
|
+
for li in collection_list.find_all("li"):
|
88
|
+
bin_type = li.get_text(strip=True).split(".")[0]
|
89
|
+
data["bins"].append(
|
90
|
+
{"type": bin_type, "collectionDate": formatted_date}
|
91
|
+
)
|
92
|
+
|
93
|
+
except Exception as e:
|
94
|
+
print(f"An error occurred: {e}")
|
95
|
+
raise
|
96
|
+
finally:
|
97
|
+
if driver:
|
98
|
+
driver.quit()
|
99
|
+
|
100
|
+
return data
|
@@ -100,6 +100,8 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
100
100
|
{"id": "contentInner"},
|
101
101
|
)
|
102
102
|
|
103
|
+
soup = soup.find("div", class_="umb-block-grid__layout-item")
|
104
|
+
|
103
105
|
# Get the dates
|
104
106
|
for date in soup.find_all("h2"):
|
105
107
|
if date.get_text(strip=True) != "Bank Holidays":
|
@@ -0,0 +1,108 @@
|
|
1
|
+
import time
|
2
|
+
|
3
|
+
import requests
|
4
|
+
|
5
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
6
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
7
|
+
|
8
|
+
|
9
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
10
|
+
class CouncilClass(AbstractGetBinDataClass):
|
11
|
+
"""
|
12
|
+
Concrete classes have to implement all abstract operations of the
|
13
|
+
base class. They can also override some operations with a default
|
14
|
+
implementation.
|
15
|
+
"""
|
16
|
+
|
17
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
18
|
+
|
19
|
+
user_postcode = kwargs.get("postcode")
|
20
|
+
user_paon = kwargs.get("paon")
|
21
|
+
check_postcode(user_postcode)
|
22
|
+
check_paon(user_paon)
|
23
|
+
bindata = {"bins": []}
|
24
|
+
|
25
|
+
URI = "https://api.eu.recollect.net/api/areas/RedcarandClevelandUK/services/50006/address-suggest"
|
26
|
+
|
27
|
+
params = {
|
28
|
+
"q": user_postcode,
|
29
|
+
"locale": "en-GB",
|
30
|
+
"_": str(int(time.time() * 1000)),
|
31
|
+
}
|
32
|
+
|
33
|
+
# print(params)
|
34
|
+
|
35
|
+
# Send GET request
|
36
|
+
response = requests.get(URI, params=params)
|
37
|
+
|
38
|
+
addresses = response.json()
|
39
|
+
|
40
|
+
place_id = next(
|
41
|
+
(
|
42
|
+
item["place_id"]
|
43
|
+
for item in addresses
|
44
|
+
if item.get("name", "").startswith(user_paon)
|
45
|
+
),
|
46
|
+
None,
|
47
|
+
)
|
48
|
+
|
49
|
+
# print(addresses)
|
50
|
+
# print(place_id)
|
51
|
+
|
52
|
+
URI = (
|
53
|
+
f"https://api.eu.recollect.net/api/places/{place_id}/services/50006/events"
|
54
|
+
)
|
55
|
+
|
56
|
+
after = datetime.today()
|
57
|
+
before = after + timedelta(days=30)
|
58
|
+
|
59
|
+
after = after.strftime("%Y-%m-%d")
|
60
|
+
before = before.strftime("%Y-%m-%d")
|
61
|
+
|
62
|
+
# print(after)
|
63
|
+
# print(before)
|
64
|
+
|
65
|
+
params = {
|
66
|
+
"nomerge": 1,
|
67
|
+
"hide": "reminder_only",
|
68
|
+
"after": after,
|
69
|
+
"before": before,
|
70
|
+
"locale": "en-GB",
|
71
|
+
"include_message": "email",
|
72
|
+
"_": str(int(time.time() * 1000)),
|
73
|
+
}
|
74
|
+
|
75
|
+
# print(params)
|
76
|
+
|
77
|
+
# Send GET request
|
78
|
+
response = requests.get(URI, params=params)
|
79
|
+
|
80
|
+
response = response.json()
|
81
|
+
|
82
|
+
bin_collection = response["events"]
|
83
|
+
|
84
|
+
# print(bin_collection)
|
85
|
+
|
86
|
+
# Extract "end_day" and "name"
|
87
|
+
events = [
|
88
|
+
(event["end_day"], flag["name"])
|
89
|
+
for event in bin_collection
|
90
|
+
for flag in event.get("flags", [])
|
91
|
+
]
|
92
|
+
|
93
|
+
# Print results
|
94
|
+
for end_day, bin_type in events:
|
95
|
+
|
96
|
+
date = datetime.strptime(end_day, "%Y-%m-%d")
|
97
|
+
|
98
|
+
dict_data = {
|
99
|
+
"type": bin_type,
|
100
|
+
"collectionDate": date.strftime(date_format),
|
101
|
+
}
|
102
|
+
bindata["bins"].append(dict_data)
|
103
|
+
|
104
|
+
bindata["bins"].sort(
|
105
|
+
key=lambda x: datetime.strptime(x.get("collectionDate"), date_format)
|
106
|
+
)
|
107
|
+
|
108
|
+
return bindata
|
@@ -0,0 +1,54 @@
|
|
1
|
+
from datetime import datetime, timedelta
|
2
|
+
|
3
|
+
import requests
|
4
|
+
from bs4 import BeautifulSoup
|
5
|
+
|
6
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
7
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
8
|
+
|
9
|
+
|
10
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
11
|
+
class CouncilClass(AbstractGetBinDataClass):
|
12
|
+
"""
|
13
|
+
Concrete classes have to implement all abstract operations of the
|
14
|
+
base class. They can also override some operations with a default
|
15
|
+
implementation.
|
16
|
+
"""
|
17
|
+
|
18
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
19
|
+
|
20
|
+
user_uprn = kwargs.get("uprn")
|
21
|
+
check_uprn(user_uprn)
|
22
|
+
bindata = {"bins": []}
|
23
|
+
|
24
|
+
URI = f"https://www.runnymede.gov.uk/homepage/150/check-your-bin-collection-day?address={user_uprn}"
|
25
|
+
|
26
|
+
# Make the GET request
|
27
|
+
response = requests.get(URI)
|
28
|
+
|
29
|
+
soup = BeautifulSoup(response.text, "html.parser")
|
30
|
+
|
31
|
+
div = soup.find("div", class_="widget-bin-collection")
|
32
|
+
|
33
|
+
table = div.find("table")
|
34
|
+
|
35
|
+
tbody = table.find("tbody")
|
36
|
+
|
37
|
+
for tr in tbody.find_all("tr"):
|
38
|
+
tds = tr.find_all("td")
|
39
|
+
bin_type = tds[0].text.strip()
|
40
|
+
date_text = tds[1].text.strip()
|
41
|
+
|
42
|
+
dict_data = {
|
43
|
+
"type": bin_type,
|
44
|
+
"collectionDate": (
|
45
|
+
datetime.strptime(date_text, "%A, %d %B %Y")
|
46
|
+
).strftime(date_format),
|
47
|
+
}
|
48
|
+
bindata["bins"].append(dict_data)
|
49
|
+
|
50
|
+
bindata["bins"].sort(
|
51
|
+
key=lambda x: datetime.strptime(x.get("collectionDate"), date_format)
|
52
|
+
)
|
53
|
+
|
54
|
+
return bindata
|