uk_bin_collection 0.114.6__py3-none-any.whl → 0.116.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- uk_bin_collection/tests/input.json +84 -21
- uk_bin_collection/uk_bin_collection/councils/AntrimAndNewtonabbeyCouncil.py +53 -0
- uk_bin_collection/uk_bin_collection/councils/ArgyllandButeCouncil.py +67 -0
- uk_bin_collection/uk_bin_collection/councils/AshfieldDistrictCouncil.py +105 -0
- uk_bin_collection/uk_bin_collection/councils/BaberghDistrictCouncil.py +132 -0
- uk_bin_collection/uk_bin_collection/councils/BradfordMDC.py +36 -6
- uk_bin_collection/uk_bin_collection/councils/BroxbourneCouncil.py +71 -0
- uk_bin_collection/uk_bin_collection/councils/CheshireWestAndChesterCouncil.py +95 -113
- uk_bin_collection/uk_bin_collection/councils/DerbyCityCouncil.py +55 -0
- uk_bin_collection/uk_bin_collection/councils/GraveshamBoroughCouncil.py +122 -0
- uk_bin_collection/uk_bin_collection/councils/HertsmereBoroughCouncil.py +161 -0
- uk_bin_collection/uk_bin_collection/councils/MidSuffolkDistrictCouncil.py +132 -0
- uk_bin_collection/uk_bin_collection/councils/MiltonKeynesCityCouncil.py +60 -41
- uk_bin_collection/uk_bin_collection/councils/WarringtonBoroughCouncil.py +50 -0
- uk_bin_collection/uk_bin_collection/councils/WestLancashireBoroughCouncil.py +114 -0
- {uk_bin_collection-0.114.6.dist-info → uk_bin_collection-0.116.0.dist-info}/METADATA +1 -1
- {uk_bin_collection-0.114.6.dist-info → uk_bin_collection-0.116.0.dist-info}/RECORD +20 -9
- {uk_bin_collection-0.114.6.dist-info → uk_bin_collection-0.116.0.dist-info}/LICENSE +0 -0
- {uk_bin_collection-0.114.6.dist-info → uk_bin_collection-0.116.0.dist-info}/WHEEL +0 -0
- {uk_bin_collection-0.114.6.dist-info → uk_bin_collection-0.116.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,71 @@
|
|
1
|
+
from datetime import datetime
|
2
|
+
|
3
|
+
import requests
|
4
|
+
from bs4 import BeautifulSoup
|
5
|
+
|
6
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
7
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
8
|
+
|
9
|
+
|
10
|
+
class CouncilClass(AbstractGetBinDataClass):
|
11
|
+
"""
|
12
|
+
Concrete classes have to implement all abstract operations of the
|
13
|
+
base class. They can also override some operations with a default
|
14
|
+
implementation.
|
15
|
+
"""
|
16
|
+
|
17
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
18
|
+
user_uprn = kwargs.get("uprn")
|
19
|
+
user_postcode = kwargs.get("postcode")
|
20
|
+
check_uprn(user_uprn)
|
21
|
+
check_postcode(user_postcode)
|
22
|
+
bindata = {"bins": []}
|
23
|
+
|
24
|
+
API_URL = "https://www.broxbourne.gov.uk/xfp/form/205"
|
25
|
+
|
26
|
+
post_data = {
|
27
|
+
"page": "490",
|
28
|
+
"locale": "en_GB",
|
29
|
+
"qacf7e570cf99fae4cb3a2e14d5a75fd0d6561058_0_0": user_postcode,
|
30
|
+
"qacf7e570cf99fae4cb3a2e14d5a75fd0d6561058_1_0": user_uprn,
|
31
|
+
"next": "Next",
|
32
|
+
}
|
33
|
+
|
34
|
+
r = requests.post(API_URL, data=post_data)
|
35
|
+
r.raise_for_status()
|
36
|
+
|
37
|
+
soup = BeautifulSoup(r.content, features="html.parser")
|
38
|
+
soup.prettify()
|
39
|
+
|
40
|
+
form__instructions = soup.find(attrs={"class": "form__instructions"})
|
41
|
+
table = form__instructions.find("table")
|
42
|
+
|
43
|
+
rows = table.find_all("tr")
|
44
|
+
|
45
|
+
current_year = datetime.now().year
|
46
|
+
|
47
|
+
# Process each row into a list of dictionaries
|
48
|
+
for row in rows[1:]: # Skip the header row
|
49
|
+
columns = row.find_all("td")
|
50
|
+
collection_date = (
|
51
|
+
columns[0].get_text(separator=" ").replace("\xa0", " ").strip()
|
52
|
+
)
|
53
|
+
service = columns[1].get_text(separator=" ").replace("\xa0", " ").strip()
|
54
|
+
|
55
|
+
collection_date = datetime.strptime(collection_date, "%a %d %b")
|
56
|
+
|
57
|
+
if collection_date.month == 1:
|
58
|
+
collection_date = collection_date.replace(year=current_year + 1)
|
59
|
+
else:
|
60
|
+
collection_date = collection_date.replace(year=current_year)
|
61
|
+
|
62
|
+
dict_data = {
|
63
|
+
"type": service,
|
64
|
+
"collectionDate": (collection_date).strftime(date_format),
|
65
|
+
}
|
66
|
+
bindata["bins"].append(dict_data)
|
67
|
+
|
68
|
+
bindata["bins"].sort(
|
69
|
+
key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y")
|
70
|
+
)
|
71
|
+
return bindata
|
@@ -1,123 +1,105 @@
|
|
1
|
-
import logging
|
2
1
|
import time
|
3
2
|
|
4
|
-
|
5
|
-
from selenium.webdriver.common.by import By
|
6
|
-
from selenium.webdriver.support import expected_conditions as EC
|
7
|
-
from selenium.webdriver.support.ui import Select
|
8
|
-
from selenium.webdriver.support.wait import WebDriverWait
|
3
|
+
import requests
|
9
4
|
|
10
5
|
from uk_bin_collection.uk_bin_collection.common import *
|
11
6
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
12
7
|
|
13
|
-
# Set up logging
|
14
|
-
logging.basicConfig(
|
15
|
-
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
|
16
|
-
)
|
17
8
|
|
9
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
18
10
|
class CouncilClass(AbstractGetBinDataClass):
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
user_uprn = kwargs.get("uprn")
|
25
|
-
user_paon = kwargs.get("paon")
|
26
|
-
user_postcode = kwargs.get("postcode")
|
27
|
-
web_driver = kwargs.get("web_driver")
|
28
|
-
headless = kwargs.get("headless")
|
29
|
-
check_paon(user_paon)
|
30
|
-
check_postcode(user_postcode)
|
31
|
-
|
32
|
-
# Create Selenium webdriver
|
33
|
-
driver = create_webdriver(web_driver, headless, None, __name__)
|
34
|
-
if headless:
|
35
|
-
driver.set_window_size(1920, 1080)
|
36
|
-
|
37
|
-
driver.get(
|
38
|
-
"https://www.cheshirewestandchester.gov.uk/residents/waste-and-recycling/your-bin-collection/collection-day"
|
39
|
-
)
|
40
|
-
wait = WebDriverWait(driver, 60)
|
41
|
-
|
42
|
-
def click_element(by, value):
|
43
|
-
element = wait.until(EC.element_to_be_clickable((by, value)))
|
44
|
-
driver.execute_script("arguments[0].scrollIntoView();", element)
|
45
|
-
element.click()
|
46
|
-
|
47
|
-
logging.info("Accepting cookies")
|
48
|
-
click_element(By.ID, "ccc-close")
|
49
|
-
|
50
|
-
logging.info("Finding collection day")
|
51
|
-
click_element(By.LINK_TEXT, "Find your collection day")
|
52
|
-
|
53
|
-
logging.info("Switching to iframe")
|
54
|
-
iframe_presence = wait.until(
|
55
|
-
EC.presence_of_element_located((By.ID, "fillform-frame-1"))
|
56
|
-
)
|
57
|
-
driver.switch_to.frame(iframe_presence)
|
58
|
-
|
59
|
-
logging.info("Entering postcode")
|
60
|
-
input_element_postcode = wait.until(
|
61
|
-
EC.presence_of_element_located(
|
62
|
-
(By.XPATH, '//input[@id="postcode_search"]')
|
63
|
-
)
|
64
|
-
)
|
65
|
-
input_element_postcode.send_keys(user_postcode)
|
66
|
-
|
67
|
-
pcsearch_btn = wait.until(
|
68
|
-
EC.element_to_be_clickable((By.XPATH, "//input[@id='postcode_search']"))
|
69
|
-
)
|
70
|
-
click_element(By.XPATH, "//input[@id='postcode_search']")
|
71
|
-
|
72
|
-
logging.info("Selecting address")
|
73
|
-
dropdown = wait.until(EC.element_to_be_clickable((By.ID, "Choose_Address")))
|
74
|
-
dropdown_options = wait.until(
|
75
|
-
EC.presence_of_element_located((By.CLASS_NAME, "lookup-option"))
|
76
|
-
)
|
77
|
-
drop_down_values = Select(dropdown)
|
78
|
-
option_element = wait.until(
|
79
|
-
EC.presence_of_element_located(
|
80
|
-
(By.CSS_SELECTOR, f'option.lookup-option[value="{str(user_uprn)}"]')
|
81
|
-
)
|
82
|
-
)
|
83
|
-
driver.execute_script("arguments[0].scrollIntoView();", option_element)
|
84
|
-
drop_down_values.select_by_value(str(user_uprn))
|
85
|
-
|
86
|
-
logging.info("Waiting for bin schedule")
|
87
|
-
wait.until(
|
88
|
-
EC.presence_of_element_located(
|
89
|
-
(By.CLASS_NAME, "bin-schedule-content-bin-card")
|
90
|
-
)
|
91
|
-
)
|
92
|
-
|
93
|
-
logging.info("Extracting bin collection data")
|
94
|
-
soup = BeautifulSoup(driver.page_source, features="html.parser")
|
95
|
-
bin_cards = soup.find_all("div", {"class": "bin-schedule-content-bin-card"})
|
96
|
-
collections = []
|
97
|
-
|
98
|
-
for card in bin_cards:
|
99
|
-
bin_info = card.find("div", {"class": "bin-schedule-content-info"})
|
100
|
-
bin_name = bin_info.find_all("p")[0].text.strip() + " bin"
|
101
|
-
bin_date_str = bin_info.find_all("p")[1].text.split(":")[1].strip()
|
102
|
-
bin_date = datetime.strptime(bin_date_str, "%A, %B %d, %Y")
|
103
|
-
collections.append((bin_name, bin_date))
|
104
|
-
|
105
|
-
ordered_data = sorted(collections, key=lambda x: x[1])
|
106
|
-
|
107
|
-
for item in ordered_data:
|
108
|
-
dict_data = {
|
109
|
-
"type": item[0].capitalize(),
|
110
|
-
"collectionDate": item[1].strftime(date_format),
|
111
|
-
}
|
112
|
-
data["bins"].append(dict_data)
|
113
|
-
|
114
|
-
logging.info("Data extraction complete")
|
115
|
-
return data
|
11
|
+
"""
|
12
|
+
Concrete classes have to implement all abstract operations of the
|
13
|
+
base class. They can also override some operations with a default
|
14
|
+
implementation.
|
15
|
+
"""
|
116
16
|
|
117
|
-
|
118
|
-
logging.error(f"An error occurred: {e}")
|
119
|
-
raise
|
17
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
120
18
|
|
121
|
-
|
122
|
-
|
123
|
-
|
19
|
+
user_uprn = kwargs.get("uprn")
|
20
|
+
check_uprn(user_uprn)
|
21
|
+
bindata = {"bins": []}
|
22
|
+
|
23
|
+
SESSION_URL = "https://my.cheshirewestandchester.gov.uk/authapi/isauthenticated?uri=https://my.cheshirewestandchester.gov.uk/fillform/?iframe_id=fillform-frame-1&db_id=&hostname=my.cheshirewestandchester.gov.uk&withCredentials=true"
|
24
|
+
|
25
|
+
API_URL = "https://my.cheshirewestandchester.gov.uk/apibroker/runLookup"
|
26
|
+
|
27
|
+
headers = {
|
28
|
+
"Content-Type": "application/json",
|
29
|
+
"Accept": "application/json",
|
30
|
+
"User-Agent": "Mozilla/5.0",
|
31
|
+
"X-Requested-With": "XMLHttpRequest",
|
32
|
+
"Referer": "https://mycouncil.milton-keynes.gov.uk/fillform/?iframe_id=fillform-frame-1&db_id=",
|
33
|
+
}
|
34
|
+
s = requests.session()
|
35
|
+
r = s.get(SESSION_URL)
|
36
|
+
r.raise_for_status()
|
37
|
+
session_data = r.json()
|
38
|
+
sid = session_data["auth-session"]
|
39
|
+
params = {
|
40
|
+
"id": "609b918c7dd6d",
|
41
|
+
"repeat_against": "",
|
42
|
+
"noRetry": "false",
|
43
|
+
"getOnlyTokens": "undefined",
|
44
|
+
"log_id": "",
|
45
|
+
"app_name": "AchieveForms",
|
46
|
+
# unix_timestamp
|
47
|
+
"_": str(int(time.time() * 1000)),
|
48
|
+
"sid": sid,
|
49
|
+
}
|
50
|
+
|
51
|
+
r = s.post(API_URL, headers=headers, params=params)
|
52
|
+
r.raise_for_status()
|
53
|
+
|
54
|
+
data = r.json()
|
55
|
+
rows_data = data["integration"]["transformed"]["rows_data"]["0"]
|
56
|
+
AuthenticateResponse = rows_data["AuthenticateResponse"]
|
57
|
+
|
58
|
+
params = {
|
59
|
+
"id": "6101d23110243",
|
60
|
+
"repeat_against": "",
|
61
|
+
"noRetry": "false",
|
62
|
+
"getOnlyTokens": "undefined",
|
63
|
+
"log_id": "",
|
64
|
+
"app_name": "AchieveForms",
|
65
|
+
# unix_timestamp
|
66
|
+
"_": str(int(time.time() * 1000)),
|
67
|
+
"sid": sid,
|
68
|
+
}
|
69
|
+
|
70
|
+
data = {
|
71
|
+
"formValues": {
|
72
|
+
"Section 1": {
|
73
|
+
"UPRN": {
|
74
|
+
"value": user_uprn,
|
75
|
+
},
|
76
|
+
"AuthenticateResponse": {
|
77
|
+
"value": AuthenticateResponse,
|
78
|
+
},
|
79
|
+
}
|
80
|
+
},
|
81
|
+
}
|
82
|
+
|
83
|
+
r = s.post(API_URL, json=data, headers=headers, params=params)
|
84
|
+
r.raise_for_status()
|
85
|
+
|
86
|
+
data = r.json()
|
87
|
+
rows_data = data["integration"]["transformed"]["rows_data"]
|
88
|
+
if not isinstance(rows_data, dict):
|
89
|
+
raise ValueError("Invalid data returned from API")
|
90
|
+
|
91
|
+
# Extract each service's relevant details for the bin schedule
|
92
|
+
for item in rows_data.values():
|
93
|
+
dict_data = {
|
94
|
+
"type": item["serviceType"],
|
95
|
+
"collectionDate": datetime.strptime(
|
96
|
+
item["collectionDateTime"], "%Y-%m-%dT%H:%M:%S"
|
97
|
+
).strftime(date_format),
|
98
|
+
}
|
99
|
+
bindata["bins"].append(dict_data)
|
100
|
+
|
101
|
+
bindata["bins"].sort(
|
102
|
+
key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y")
|
103
|
+
)
|
104
|
+
|
105
|
+
return bindata
|
@@ -0,0 +1,55 @@
|
|
1
|
+
import requests
|
2
|
+
from bs4 import BeautifulSoup
|
3
|
+
|
4
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
5
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
6
|
+
|
7
|
+
|
8
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
9
|
+
class CouncilClass(AbstractGetBinDataClass):
|
10
|
+
"""
|
11
|
+
Concrete classes have to implement all abstract operations of the
|
12
|
+
base class. They can also override some operations with a default
|
13
|
+
implementation.
|
14
|
+
"""
|
15
|
+
|
16
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
17
|
+
|
18
|
+
user_uprn = kwargs.get("uprn")
|
19
|
+
check_uprn(user_uprn)
|
20
|
+
bindata = {"bins": []}
|
21
|
+
|
22
|
+
URI = f"https://secure.derby.gov.uk/binday/Binday?search.PremisesId={user_uprn}"
|
23
|
+
|
24
|
+
# Make the GET request
|
25
|
+
session = requests.Session()
|
26
|
+
response = session.get(URI)
|
27
|
+
|
28
|
+
soup = BeautifulSoup(response.content, "html.parser")
|
29
|
+
|
30
|
+
# Find all divs with class "binresult" which contain the bin collection information
|
31
|
+
bin_results = soup.find_all("div", class_="binresult")
|
32
|
+
|
33
|
+
# Loop through each bin result to extract date and bin type
|
34
|
+
for result in bin_results:
|
35
|
+
# Find the collection date
|
36
|
+
date_text = result.find("p").strong.get_text(strip=True)
|
37
|
+
|
38
|
+
# Find the bin type by looking at the 'alt' attribute of the img tag
|
39
|
+
bin_type = result.find("img")["alt"]
|
40
|
+
|
41
|
+
if bin_type != "No bins":
|
42
|
+
dict_data = {
|
43
|
+
"type": bin_type,
|
44
|
+
"collectionDate": datetime.strptime(
|
45
|
+
date_text,
|
46
|
+
"%A, %d %B %Y:",
|
47
|
+
).strftime(date_format),
|
48
|
+
}
|
49
|
+
bindata["bins"].append(dict_data)
|
50
|
+
|
51
|
+
bindata["bins"].sort(
|
52
|
+
key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y")
|
53
|
+
)
|
54
|
+
|
55
|
+
return bindata
|
@@ -0,0 +1,122 @@
|
|
1
|
+
import time
|
2
|
+
|
3
|
+
import requests
|
4
|
+
from dateutil.relativedelta import relativedelta
|
5
|
+
|
6
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
7
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
8
|
+
|
9
|
+
|
10
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
11
|
+
class CouncilClass(AbstractGetBinDataClass):
|
12
|
+
"""
|
13
|
+
Concrete classes have to implement all abstract operations of the
|
14
|
+
base class. They can also override some operations with a default
|
15
|
+
implementation.
|
16
|
+
"""
|
17
|
+
|
18
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
19
|
+
|
20
|
+
user_uprn = kwargs.get("uprn")
|
21
|
+
check_uprn(user_uprn)
|
22
|
+
bindata = {"bins": []}
|
23
|
+
|
24
|
+
SESSION_URL = "https://my.gravesham.gov.uk/authapi/isauthenticated?uri=https%253A%252F%252Fmy.gravesham.gov.uk%252Fen%252FAchieveForms%252F%253Fform_uri%253Dsandbox-publish%253A%252F%252FAF-Process-22218d5c-c6d6-492f-b627-c713771126be%252FAF-Stage-905e87c1-144b-4a72-8932-5518ddd3e618%252Fdefinition.json%2526redirectlink%253D%25252Fen%2526cancelRedirectLink%253D%25252Fen%2526consentMessage%253Dyes&hostname=my.gravesham.gov.uk&withCredentials=true"
|
25
|
+
|
26
|
+
API_URL = "https://my.gravesham.gov.uk/apibroker/runLookup"
|
27
|
+
|
28
|
+
headers = {
|
29
|
+
"Content-Type": "application/json",
|
30
|
+
"Accept": "application/json",
|
31
|
+
"User-Agent": "Mozilla/5.0",
|
32
|
+
"X-Requested-With": "XMLHttpRequest",
|
33
|
+
"Referer": "https://my.gravesham.gov.uk/fillform/?iframe_id=fillform-frame-1&db_id=",
|
34
|
+
}
|
35
|
+
s = requests.session()
|
36
|
+
r = s.get(SESSION_URL)
|
37
|
+
r.raise_for_status()
|
38
|
+
session_data = r.json()
|
39
|
+
sid = session_data["auth-session"]
|
40
|
+
params = {
|
41
|
+
"id": "5ee8854759297",
|
42
|
+
"repeat_against": "",
|
43
|
+
"noRetry": "false",
|
44
|
+
"getOnlyTokens": "undefined",
|
45
|
+
"log_id": "",
|
46
|
+
"app_name": "AF-Renderer::Self",
|
47
|
+
# unix_timestamp
|
48
|
+
"_": str(int(time.time() * 1000)),
|
49
|
+
"sid": sid,
|
50
|
+
}
|
51
|
+
r = s.post(API_URL, headers=headers, params=params)
|
52
|
+
r.raise_for_status()
|
53
|
+
|
54
|
+
data = r.json()
|
55
|
+
rows_data = data["integration"]["transformed"]["rows_data"]["0"]
|
56
|
+
tokenString = rows_data["tokenString"]
|
57
|
+
|
58
|
+
# Get the current date and time
|
59
|
+
current_datetime = datetime.now()
|
60
|
+
future_datetime = current_datetime + relativedelta(months=1)
|
61
|
+
|
62
|
+
# Format it using strftime
|
63
|
+
current_datetime = current_datetime.strftime("%Y-%m-%dT%H:%M:%S")
|
64
|
+
future_datetime = future_datetime.strftime("%Y-%m-%dT%H:%M:%S")
|
65
|
+
|
66
|
+
data = {
|
67
|
+
"formValues": {
|
68
|
+
"Check your bin day": {
|
69
|
+
"tokenString": {
|
70
|
+
"value": tokenString,
|
71
|
+
},
|
72
|
+
"UPRNForAPI": {
|
73
|
+
"value": user_uprn,
|
74
|
+
},
|
75
|
+
"formatDateToday": {
|
76
|
+
"value": current_datetime,
|
77
|
+
},
|
78
|
+
"formatDateTo": {
|
79
|
+
"value": future_datetime,
|
80
|
+
},
|
81
|
+
}
|
82
|
+
},
|
83
|
+
}
|
84
|
+
|
85
|
+
params = {
|
86
|
+
"id": "5c8f869376376",
|
87
|
+
"repeat_against": "",
|
88
|
+
"noRetry": "false",
|
89
|
+
"getOnlyTokens": "undefined",
|
90
|
+
"log_id": "",
|
91
|
+
"app_name": "AF-Renderer::Self",
|
92
|
+
# unix_timestamp
|
93
|
+
"_": str(int(time.time() * 1000)),
|
94
|
+
"sid": sid,
|
95
|
+
}
|
96
|
+
r = s.post(API_URL, json=data, headers=headers, params=params)
|
97
|
+
r.raise_for_status()
|
98
|
+
|
99
|
+
data = r.json()
|
100
|
+
|
101
|
+
rows_data = data["integration"]["transformed"]["rows_data"]
|
102
|
+
if not isinstance(rows_data, dict):
|
103
|
+
raise ValueError("Invalid data returned from API")
|
104
|
+
|
105
|
+
# Extract each service's relevant details for the bin schedule
|
106
|
+
for item in rows_data.values():
|
107
|
+
if item["Name"]:
|
108
|
+
Bin_Types = item["Name"].split("Empty Bin ")
|
109
|
+
for Bin_Type in Bin_Types:
|
110
|
+
if Bin_Type:
|
111
|
+
dict_data = {
|
112
|
+
"type": Bin_Type.strip(),
|
113
|
+
"collectionDate": datetime.strptime(
|
114
|
+
item["Date"], "%Y-%m-%dT%H:%M:%S"
|
115
|
+
).strftime(date_format),
|
116
|
+
}
|
117
|
+
bindata["bins"].append(dict_data)
|
118
|
+
|
119
|
+
bindata["bins"].sort(
|
120
|
+
key=lambda x: datetime.strptime(x.get("collectionDate"), date_format)
|
121
|
+
)
|
122
|
+
return bindata
|
@@ -0,0 +1,161 @@
|
|
1
|
+
import re
|
2
|
+
import time
|
3
|
+
|
4
|
+
import requests
|
5
|
+
from bs4 import BeautifulSoup
|
6
|
+
from selenium.webdriver.common.by import By
|
7
|
+
from selenium.webdriver.support import expected_conditions as EC
|
8
|
+
from selenium.webdriver.support.ui import Select
|
9
|
+
from selenium.webdriver.support.wait import WebDriverWait
|
10
|
+
|
11
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
12
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
13
|
+
|
14
|
+
|
15
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
16
|
+
class CouncilClass(AbstractGetBinDataClass):
|
17
|
+
"""
|
18
|
+
Concrete classes have to implement all abstract operations of the
|
19
|
+
base class. They can also override some operations with a default
|
20
|
+
implementation.
|
21
|
+
"""
|
22
|
+
|
23
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
24
|
+
|
25
|
+
user_paon = kwargs.get("paon")
|
26
|
+
user_postcode = kwargs.get("postcode")
|
27
|
+
web_driver = kwargs.get("web_driver")
|
28
|
+
headless = kwargs.get("headless")
|
29
|
+
check_paon(user_paon)
|
30
|
+
check_postcode(user_postcode)
|
31
|
+
bindata = {"bins": []}
|
32
|
+
|
33
|
+
URI_1 = "https://www.hertsmere.gov.uk/Environment-Refuse-and-Recycling/Recycling--Waste/Bin-collections/Collections-and-calendar.aspx"
|
34
|
+
URI_2 = "https://hertsmere-services.onmats.com/w/webpage/round-search"
|
35
|
+
|
36
|
+
# Create Selenium webdriver
|
37
|
+
driver = create_webdriver(web_driver, headless, None, __name__)
|
38
|
+
driver.get(URI_1)
|
39
|
+
|
40
|
+
soup = BeautifulSoup(driver.page_source, "html.parser")
|
41
|
+
|
42
|
+
current_week = (soup.find("li", class_="current")).text.strip()
|
43
|
+
|
44
|
+
strong = soup.find_all("strong", text=re.compile(r"^Week"))
|
45
|
+
|
46
|
+
bin_weeks = []
|
47
|
+
for tag in strong:
|
48
|
+
parent = tag.parent
|
49
|
+
bin_type = (
|
50
|
+
(parent.text).split("-")[1].strip().replace("\xa0", " ").split(" and ")
|
51
|
+
)
|
52
|
+
for bin in bin_type:
|
53
|
+
dict_data = {
|
54
|
+
"week": tag.text.replace("\xa0", " "),
|
55
|
+
"bin_type": bin,
|
56
|
+
}
|
57
|
+
bin_weeks.append(dict_data)
|
58
|
+
|
59
|
+
driver.get(URI_2)
|
60
|
+
|
61
|
+
# Wait for the postcode field to appear then populate it
|
62
|
+
inputElement_postcode = WebDriverWait(driver, 30).until(
|
63
|
+
EC.presence_of_element_located(
|
64
|
+
(
|
65
|
+
By.CLASS_NAME,
|
66
|
+
"relation_path_type_ahead_search",
|
67
|
+
)
|
68
|
+
)
|
69
|
+
)
|
70
|
+
inputElement_postcode.send_keys(user_postcode)
|
71
|
+
|
72
|
+
WebDriverWait(driver, 10).until(
|
73
|
+
EC.element_to_be_clickable(
|
74
|
+
(
|
75
|
+
By.XPATH,
|
76
|
+
f"//ul[@class='result_list']/li[starts-with(@aria-label, '{user_paon}')]",
|
77
|
+
)
|
78
|
+
)
|
79
|
+
).click()
|
80
|
+
|
81
|
+
WebDriverWait(driver, timeout=10).until(
|
82
|
+
EC.element_to_be_clickable(
|
83
|
+
(
|
84
|
+
By.CSS_SELECTOR,
|
85
|
+
"input.fragment_presenter_template_edit.btn.bg-primary.btn-medium[type='submit']",
|
86
|
+
)
|
87
|
+
)
|
88
|
+
).click()
|
89
|
+
|
90
|
+
WebDriverWait(driver, timeout=10).until(
|
91
|
+
EC.presence_of_element_located(
|
92
|
+
(By.XPATH, "//h3[contains(text(), 'Collection days')]")
|
93
|
+
)
|
94
|
+
)
|
95
|
+
|
96
|
+
soup = BeautifulSoup(driver.page_source, "html.parser")
|
97
|
+
|
98
|
+
table = soup.find("table", class_="table listing table-striped")
|
99
|
+
|
100
|
+
# Check if the table was found
|
101
|
+
if table:
|
102
|
+
# Extract table rows and cells
|
103
|
+
table_data = []
|
104
|
+
for row in table.find("tbody").find_all("tr"):
|
105
|
+
# Extract cell data from each <td> tag
|
106
|
+
row_data = [cell.get_text(strip=True) for cell in row.find_all("td")]
|
107
|
+
table_data.append(row_data)
|
108
|
+
|
109
|
+
else:
|
110
|
+
print("Table not found.")
|
111
|
+
|
112
|
+
collection_day = (table_data[0])[1]
|
113
|
+
|
114
|
+
current_week_bins = [bin for bin in bin_weeks if bin["week"] == current_week]
|
115
|
+
next_week_bins = [bin for bin in bin_weeks if bin["week"] != current_week]
|
116
|
+
|
117
|
+
days_of_week = [
|
118
|
+
"Monday",
|
119
|
+
"Tuesday",
|
120
|
+
"Wednesday",
|
121
|
+
"Thursday",
|
122
|
+
"Friday",
|
123
|
+
"Saturday",
|
124
|
+
"Sunday",
|
125
|
+
]
|
126
|
+
|
127
|
+
today = datetime.now()
|
128
|
+
today_idx = today.weekday() # Monday is 0 and Sunday is 6
|
129
|
+
target_idx = days_of_week.index(collection_day)
|
130
|
+
|
131
|
+
days_until_target = (target_idx - today_idx) % 7
|
132
|
+
if days_until_target == 0:
|
133
|
+
next_day = today
|
134
|
+
else:
|
135
|
+
next_day = today + timedelta(days=days_until_target)
|
136
|
+
|
137
|
+
current_week_dates = get_dates_every_x_days(next_day, 14, 7)
|
138
|
+
next_week_date = next_day + timedelta(days=7)
|
139
|
+
next_week_dates = get_dates_every_x_days(next_week_date, 14, 7)
|
140
|
+
|
141
|
+
for date in current_week_dates:
|
142
|
+
for bin in current_week_bins:
|
143
|
+
dict_data = {
|
144
|
+
"type": bin["bin_type"],
|
145
|
+
"collectionDate": date,
|
146
|
+
}
|
147
|
+
bindata["bins"].append(dict_data)
|
148
|
+
|
149
|
+
for date in next_week_dates:
|
150
|
+
for bin in next_week_bins:
|
151
|
+
dict_data = {
|
152
|
+
"type": bin["bin_type"],
|
153
|
+
"collectionDate": date,
|
154
|
+
}
|
155
|
+
bindata["bins"].append(dict_data)
|
156
|
+
|
157
|
+
bindata["bins"].sort(
|
158
|
+
key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y")
|
159
|
+
)
|
160
|
+
|
161
|
+
return bindata
|