uk_bin_collection 0.114.6__py3-none-any.whl → 0.116.0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- uk_bin_collection/tests/input.json +84 -21
- uk_bin_collection/uk_bin_collection/councils/AntrimAndNewtonabbeyCouncil.py +53 -0
- uk_bin_collection/uk_bin_collection/councils/ArgyllandButeCouncil.py +67 -0
- uk_bin_collection/uk_bin_collection/councils/AshfieldDistrictCouncil.py +105 -0
- uk_bin_collection/uk_bin_collection/councils/BaberghDistrictCouncil.py +132 -0
- uk_bin_collection/uk_bin_collection/councils/BradfordMDC.py +36 -6
- uk_bin_collection/uk_bin_collection/councils/BroxbourneCouncil.py +71 -0
- uk_bin_collection/uk_bin_collection/councils/CheshireWestAndChesterCouncil.py +95 -113
- uk_bin_collection/uk_bin_collection/councils/DerbyCityCouncil.py +55 -0
- uk_bin_collection/uk_bin_collection/councils/GraveshamBoroughCouncil.py +122 -0
- uk_bin_collection/uk_bin_collection/councils/HertsmereBoroughCouncil.py +161 -0
- uk_bin_collection/uk_bin_collection/councils/MidSuffolkDistrictCouncil.py +132 -0
- uk_bin_collection/uk_bin_collection/councils/MiltonKeynesCityCouncil.py +60 -41
- uk_bin_collection/uk_bin_collection/councils/WarringtonBoroughCouncil.py +50 -0
- uk_bin_collection/uk_bin_collection/councils/WestLancashireBoroughCouncil.py +114 -0
- {uk_bin_collection-0.114.6.dist-info → uk_bin_collection-0.116.0.dist-info}/METADATA +1 -1
- {uk_bin_collection-0.114.6.dist-info → uk_bin_collection-0.116.0.dist-info}/RECORD +20 -9
- {uk_bin_collection-0.114.6.dist-info → uk_bin_collection-0.116.0.dist-info}/LICENSE +0 -0
- {uk_bin_collection-0.114.6.dist-info → uk_bin_collection-0.116.0.dist-info}/WHEEL +0 -0
- {uk_bin_collection-0.114.6.dist-info → uk_bin_collection-0.116.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,71 @@
|
|
1
|
+
from datetime import datetime
|
2
|
+
|
3
|
+
import requests
|
4
|
+
from bs4 import BeautifulSoup
|
5
|
+
|
6
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
7
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
8
|
+
|
9
|
+
|
10
|
+
class CouncilClass(AbstractGetBinDataClass):
|
11
|
+
"""
|
12
|
+
Concrete classes have to implement all abstract operations of the
|
13
|
+
base class. They can also override some operations with a default
|
14
|
+
implementation.
|
15
|
+
"""
|
16
|
+
|
17
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
18
|
+
user_uprn = kwargs.get("uprn")
|
19
|
+
user_postcode = kwargs.get("postcode")
|
20
|
+
check_uprn(user_uprn)
|
21
|
+
check_postcode(user_postcode)
|
22
|
+
bindata = {"bins": []}
|
23
|
+
|
24
|
+
API_URL = "https://www.broxbourne.gov.uk/xfp/form/205"
|
25
|
+
|
26
|
+
post_data = {
|
27
|
+
"page": "490",
|
28
|
+
"locale": "en_GB",
|
29
|
+
"qacf7e570cf99fae4cb3a2e14d5a75fd0d6561058_0_0": user_postcode,
|
30
|
+
"qacf7e570cf99fae4cb3a2e14d5a75fd0d6561058_1_0": user_uprn,
|
31
|
+
"next": "Next",
|
32
|
+
}
|
33
|
+
|
34
|
+
r = requests.post(API_URL, data=post_data)
|
35
|
+
r.raise_for_status()
|
36
|
+
|
37
|
+
soup = BeautifulSoup(r.content, features="html.parser")
|
38
|
+
soup.prettify()
|
39
|
+
|
40
|
+
form__instructions = soup.find(attrs={"class": "form__instructions"})
|
41
|
+
table = form__instructions.find("table")
|
42
|
+
|
43
|
+
rows = table.find_all("tr")
|
44
|
+
|
45
|
+
current_year = datetime.now().year
|
46
|
+
|
47
|
+
# Process each row into a list of dictionaries
|
48
|
+
for row in rows[1:]: # Skip the header row
|
49
|
+
columns = row.find_all("td")
|
50
|
+
collection_date = (
|
51
|
+
columns[0].get_text(separator=" ").replace("\xa0", " ").strip()
|
52
|
+
)
|
53
|
+
service = columns[1].get_text(separator=" ").replace("\xa0", " ").strip()
|
54
|
+
|
55
|
+
collection_date = datetime.strptime(collection_date, "%a %d %b")
|
56
|
+
|
57
|
+
if collection_date.month == 1:
|
58
|
+
collection_date = collection_date.replace(year=current_year + 1)
|
59
|
+
else:
|
60
|
+
collection_date = collection_date.replace(year=current_year)
|
61
|
+
|
62
|
+
dict_data = {
|
63
|
+
"type": service,
|
64
|
+
"collectionDate": (collection_date).strftime(date_format),
|
65
|
+
}
|
66
|
+
bindata["bins"].append(dict_data)
|
67
|
+
|
68
|
+
bindata["bins"].sort(
|
69
|
+
key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y")
|
70
|
+
)
|
71
|
+
return bindata
|
@@ -1,123 +1,105 @@
|
|
1
|
-
import logging
|
2
1
|
import time
|
3
2
|
|
4
|
-
|
5
|
-
from selenium.webdriver.common.by import By
|
6
|
-
from selenium.webdriver.support import expected_conditions as EC
|
7
|
-
from selenium.webdriver.support.ui import Select
|
8
|
-
from selenium.webdriver.support.wait import WebDriverWait
|
3
|
+
import requests
|
9
4
|
|
10
5
|
from uk_bin_collection.uk_bin_collection.common import *
|
11
6
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
12
7
|
|
13
|
-
# Set up logging
|
14
|
-
logging.basicConfig(
|
15
|
-
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
|
16
|
-
)
|
17
8
|
|
9
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
18
10
|
class CouncilClass(AbstractGetBinDataClass):
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
user_uprn = kwargs.get("uprn")
|
25
|
-
user_paon = kwargs.get("paon")
|
26
|
-
user_postcode = kwargs.get("postcode")
|
27
|
-
web_driver = kwargs.get("web_driver")
|
28
|
-
headless = kwargs.get("headless")
|
29
|
-
check_paon(user_paon)
|
30
|
-
check_postcode(user_postcode)
|
31
|
-
|
32
|
-
# Create Selenium webdriver
|
33
|
-
driver = create_webdriver(web_driver, headless, None, __name__)
|
34
|
-
if headless:
|
35
|
-
driver.set_window_size(1920, 1080)
|
36
|
-
|
37
|
-
driver.get(
|
38
|
-
"https://www.cheshirewestandchester.gov.uk/residents/waste-and-recycling/your-bin-collection/collection-day"
|
39
|
-
)
|
40
|
-
wait = WebDriverWait(driver, 60)
|
41
|
-
|
42
|
-
def click_element(by, value):
|
43
|
-
element = wait.until(EC.element_to_be_clickable((by, value)))
|
44
|
-
driver.execute_script("arguments[0].scrollIntoView();", element)
|
45
|
-
element.click()
|
46
|
-
|
47
|
-
logging.info("Accepting cookies")
|
48
|
-
click_element(By.ID, "ccc-close")
|
49
|
-
|
50
|
-
logging.info("Finding collection day")
|
51
|
-
click_element(By.LINK_TEXT, "Find your collection day")
|
52
|
-
|
53
|
-
logging.info("Switching to iframe")
|
54
|
-
iframe_presence = wait.until(
|
55
|
-
EC.presence_of_element_located((By.ID, "fillform-frame-1"))
|
56
|
-
)
|
57
|
-
driver.switch_to.frame(iframe_presence)
|
58
|
-
|
59
|
-
logging.info("Entering postcode")
|
60
|
-
input_element_postcode = wait.until(
|
61
|
-
EC.presence_of_element_located(
|
62
|
-
(By.XPATH, '//input[@id="postcode_search"]')
|
63
|
-
)
|
64
|
-
)
|
65
|
-
input_element_postcode.send_keys(user_postcode)
|
66
|
-
|
67
|
-
pcsearch_btn = wait.until(
|
68
|
-
EC.element_to_be_clickable((By.XPATH, "//input[@id='postcode_search']"))
|
69
|
-
)
|
70
|
-
click_element(By.XPATH, "//input[@id='postcode_search']")
|
71
|
-
|
72
|
-
logging.info("Selecting address")
|
73
|
-
dropdown = wait.until(EC.element_to_be_clickable((By.ID, "Choose_Address")))
|
74
|
-
dropdown_options = wait.until(
|
75
|
-
EC.presence_of_element_located((By.CLASS_NAME, "lookup-option"))
|
76
|
-
)
|
77
|
-
drop_down_values = Select(dropdown)
|
78
|
-
option_element = wait.until(
|
79
|
-
EC.presence_of_element_located(
|
80
|
-
(By.CSS_SELECTOR, f'option.lookup-option[value="{str(user_uprn)}"]')
|
81
|
-
)
|
82
|
-
)
|
83
|
-
driver.execute_script("arguments[0].scrollIntoView();", option_element)
|
84
|
-
drop_down_values.select_by_value(str(user_uprn))
|
85
|
-
|
86
|
-
logging.info("Waiting for bin schedule")
|
87
|
-
wait.until(
|
88
|
-
EC.presence_of_element_located(
|
89
|
-
(By.CLASS_NAME, "bin-schedule-content-bin-card")
|
90
|
-
)
|
91
|
-
)
|
92
|
-
|
93
|
-
logging.info("Extracting bin collection data")
|
94
|
-
soup = BeautifulSoup(driver.page_source, features="html.parser")
|
95
|
-
bin_cards = soup.find_all("div", {"class": "bin-schedule-content-bin-card"})
|
96
|
-
collections = []
|
97
|
-
|
98
|
-
for card in bin_cards:
|
99
|
-
bin_info = card.find("div", {"class": "bin-schedule-content-info"})
|
100
|
-
bin_name = bin_info.find_all("p")[0].text.strip() + " bin"
|
101
|
-
bin_date_str = bin_info.find_all("p")[1].text.split(":")[1].strip()
|
102
|
-
bin_date = datetime.strptime(bin_date_str, "%A, %B %d, %Y")
|
103
|
-
collections.append((bin_name, bin_date))
|
104
|
-
|
105
|
-
ordered_data = sorted(collections, key=lambda x: x[1])
|
106
|
-
|
107
|
-
for item in ordered_data:
|
108
|
-
dict_data = {
|
109
|
-
"type": item[0].capitalize(),
|
110
|
-
"collectionDate": item[1].strftime(date_format),
|
111
|
-
}
|
112
|
-
data["bins"].append(dict_data)
|
113
|
-
|
114
|
-
logging.info("Data extraction complete")
|
115
|
-
return data
|
11
|
+
"""
|
12
|
+
Concrete classes have to implement all abstract operations of the
|
13
|
+
base class. They can also override some operations with a default
|
14
|
+
implementation.
|
15
|
+
"""
|
116
16
|
|
117
|
-
|
118
|
-
logging.error(f"An error occurred: {e}")
|
119
|
-
raise
|
17
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
120
18
|
|
121
|
-
|
122
|
-
|
123
|
-
|
19
|
+
user_uprn = kwargs.get("uprn")
|
20
|
+
check_uprn(user_uprn)
|
21
|
+
bindata = {"bins": []}
|
22
|
+
|
23
|
+
SESSION_URL = "https://my.cheshirewestandchester.gov.uk/authapi/isauthenticated?uri=https://my.cheshirewestandchester.gov.uk/fillform/?iframe_id=fillform-frame-1&db_id=&hostname=my.cheshirewestandchester.gov.uk&withCredentials=true"
|
24
|
+
|
25
|
+
API_URL = "https://my.cheshirewestandchester.gov.uk/apibroker/runLookup"
|
26
|
+
|
27
|
+
headers = {
|
28
|
+
"Content-Type": "application/json",
|
29
|
+
"Accept": "application/json",
|
30
|
+
"User-Agent": "Mozilla/5.0",
|
31
|
+
"X-Requested-With": "XMLHttpRequest",
|
32
|
+
"Referer": "https://mycouncil.milton-keynes.gov.uk/fillform/?iframe_id=fillform-frame-1&db_id=",
|
33
|
+
}
|
34
|
+
s = requests.session()
|
35
|
+
r = s.get(SESSION_URL)
|
36
|
+
r.raise_for_status()
|
37
|
+
session_data = r.json()
|
38
|
+
sid = session_data["auth-session"]
|
39
|
+
params = {
|
40
|
+
"id": "609b918c7dd6d",
|
41
|
+
"repeat_against": "",
|
42
|
+
"noRetry": "false",
|
43
|
+
"getOnlyTokens": "undefined",
|
44
|
+
"log_id": "",
|
45
|
+
"app_name": "AchieveForms",
|
46
|
+
# unix_timestamp
|
47
|
+
"_": str(int(time.time() * 1000)),
|
48
|
+
"sid": sid,
|
49
|
+
}
|
50
|
+
|
51
|
+
r = s.post(API_URL, headers=headers, params=params)
|
52
|
+
r.raise_for_status()
|
53
|
+
|
54
|
+
data = r.json()
|
55
|
+
rows_data = data["integration"]["transformed"]["rows_data"]["0"]
|
56
|
+
AuthenticateResponse = rows_data["AuthenticateResponse"]
|
57
|
+
|
58
|
+
params = {
|
59
|
+
"id": "6101d23110243",
|
60
|
+
"repeat_against": "",
|
61
|
+
"noRetry": "false",
|
62
|
+
"getOnlyTokens": "undefined",
|
63
|
+
"log_id": "",
|
64
|
+
"app_name": "AchieveForms",
|
65
|
+
# unix_timestamp
|
66
|
+
"_": str(int(time.time() * 1000)),
|
67
|
+
"sid": sid,
|
68
|
+
}
|
69
|
+
|
70
|
+
data = {
|
71
|
+
"formValues": {
|
72
|
+
"Section 1": {
|
73
|
+
"UPRN": {
|
74
|
+
"value": user_uprn,
|
75
|
+
},
|
76
|
+
"AuthenticateResponse": {
|
77
|
+
"value": AuthenticateResponse,
|
78
|
+
},
|
79
|
+
}
|
80
|
+
},
|
81
|
+
}
|
82
|
+
|
83
|
+
r = s.post(API_URL, json=data, headers=headers, params=params)
|
84
|
+
r.raise_for_status()
|
85
|
+
|
86
|
+
data = r.json()
|
87
|
+
rows_data = data["integration"]["transformed"]["rows_data"]
|
88
|
+
if not isinstance(rows_data, dict):
|
89
|
+
raise ValueError("Invalid data returned from API")
|
90
|
+
|
91
|
+
# Extract each service's relevant details for the bin schedule
|
92
|
+
for item in rows_data.values():
|
93
|
+
dict_data = {
|
94
|
+
"type": item["serviceType"],
|
95
|
+
"collectionDate": datetime.strptime(
|
96
|
+
item["collectionDateTime"], "%Y-%m-%dT%H:%M:%S"
|
97
|
+
).strftime(date_format),
|
98
|
+
}
|
99
|
+
bindata["bins"].append(dict_data)
|
100
|
+
|
101
|
+
bindata["bins"].sort(
|
102
|
+
key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y")
|
103
|
+
)
|
104
|
+
|
105
|
+
return bindata
|
@@ -0,0 +1,55 @@
|
|
1
|
+
import requests
|
2
|
+
from bs4 import BeautifulSoup
|
3
|
+
|
4
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
5
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
6
|
+
|
7
|
+
|
8
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
9
|
+
class CouncilClass(AbstractGetBinDataClass):
|
10
|
+
"""
|
11
|
+
Concrete classes have to implement all abstract operations of the
|
12
|
+
base class. They can also override some operations with a default
|
13
|
+
implementation.
|
14
|
+
"""
|
15
|
+
|
16
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
17
|
+
|
18
|
+
user_uprn = kwargs.get("uprn")
|
19
|
+
check_uprn(user_uprn)
|
20
|
+
bindata = {"bins": []}
|
21
|
+
|
22
|
+
URI = f"https://secure.derby.gov.uk/binday/Binday?search.PremisesId={user_uprn}"
|
23
|
+
|
24
|
+
# Make the GET request
|
25
|
+
session = requests.Session()
|
26
|
+
response = session.get(URI)
|
27
|
+
|
28
|
+
soup = BeautifulSoup(response.content, "html.parser")
|
29
|
+
|
30
|
+
# Find all divs with class "binresult" which contain the bin collection information
|
31
|
+
bin_results = soup.find_all("div", class_="binresult")
|
32
|
+
|
33
|
+
# Loop through each bin result to extract date and bin type
|
34
|
+
for result in bin_results:
|
35
|
+
# Find the collection date
|
36
|
+
date_text = result.find("p").strong.get_text(strip=True)
|
37
|
+
|
38
|
+
# Find the bin type by looking at the 'alt' attribute of the img tag
|
39
|
+
bin_type = result.find("img")["alt"]
|
40
|
+
|
41
|
+
if bin_type != "No bins":
|
42
|
+
dict_data = {
|
43
|
+
"type": bin_type,
|
44
|
+
"collectionDate": datetime.strptime(
|
45
|
+
date_text,
|
46
|
+
"%A, %d %B %Y:",
|
47
|
+
).strftime(date_format),
|
48
|
+
}
|
49
|
+
bindata["bins"].append(dict_data)
|
50
|
+
|
51
|
+
bindata["bins"].sort(
|
52
|
+
key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y")
|
53
|
+
)
|
54
|
+
|
55
|
+
return bindata
|
@@ -0,0 +1,122 @@
|
|
1
|
+
import time
|
2
|
+
|
3
|
+
import requests
|
4
|
+
from dateutil.relativedelta import relativedelta
|
5
|
+
|
6
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
7
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
8
|
+
|
9
|
+
|
10
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
11
|
+
class CouncilClass(AbstractGetBinDataClass):
|
12
|
+
"""
|
13
|
+
Concrete classes have to implement all abstract operations of the
|
14
|
+
base class. They can also override some operations with a default
|
15
|
+
implementation.
|
16
|
+
"""
|
17
|
+
|
18
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
19
|
+
|
20
|
+
user_uprn = kwargs.get("uprn")
|
21
|
+
check_uprn(user_uprn)
|
22
|
+
bindata = {"bins": []}
|
23
|
+
|
24
|
+
SESSION_URL = "https://my.gravesham.gov.uk/authapi/isauthenticated?uri=https%253A%252F%252Fmy.gravesham.gov.uk%252Fen%252FAchieveForms%252F%253Fform_uri%253Dsandbox-publish%253A%252F%252FAF-Process-22218d5c-c6d6-492f-b627-c713771126be%252FAF-Stage-905e87c1-144b-4a72-8932-5518ddd3e618%252Fdefinition.json%2526redirectlink%253D%25252Fen%2526cancelRedirectLink%253D%25252Fen%2526consentMessage%253Dyes&hostname=my.gravesham.gov.uk&withCredentials=true"
|
25
|
+
|
26
|
+
API_URL = "https://my.gravesham.gov.uk/apibroker/runLookup"
|
27
|
+
|
28
|
+
headers = {
|
29
|
+
"Content-Type": "application/json",
|
30
|
+
"Accept": "application/json",
|
31
|
+
"User-Agent": "Mozilla/5.0",
|
32
|
+
"X-Requested-With": "XMLHttpRequest",
|
33
|
+
"Referer": "https://my.gravesham.gov.uk/fillform/?iframe_id=fillform-frame-1&db_id=",
|
34
|
+
}
|
35
|
+
s = requests.session()
|
36
|
+
r = s.get(SESSION_URL)
|
37
|
+
r.raise_for_status()
|
38
|
+
session_data = r.json()
|
39
|
+
sid = session_data["auth-session"]
|
40
|
+
params = {
|
41
|
+
"id": "5ee8854759297",
|
42
|
+
"repeat_against": "",
|
43
|
+
"noRetry": "false",
|
44
|
+
"getOnlyTokens": "undefined",
|
45
|
+
"log_id": "",
|
46
|
+
"app_name": "AF-Renderer::Self",
|
47
|
+
# unix_timestamp
|
48
|
+
"_": str(int(time.time() * 1000)),
|
49
|
+
"sid": sid,
|
50
|
+
}
|
51
|
+
r = s.post(API_URL, headers=headers, params=params)
|
52
|
+
r.raise_for_status()
|
53
|
+
|
54
|
+
data = r.json()
|
55
|
+
rows_data = data["integration"]["transformed"]["rows_data"]["0"]
|
56
|
+
tokenString = rows_data["tokenString"]
|
57
|
+
|
58
|
+
# Get the current date and time
|
59
|
+
current_datetime = datetime.now()
|
60
|
+
future_datetime = current_datetime + relativedelta(months=1)
|
61
|
+
|
62
|
+
# Format it using strftime
|
63
|
+
current_datetime = current_datetime.strftime("%Y-%m-%dT%H:%M:%S")
|
64
|
+
future_datetime = future_datetime.strftime("%Y-%m-%dT%H:%M:%S")
|
65
|
+
|
66
|
+
data = {
|
67
|
+
"formValues": {
|
68
|
+
"Check your bin day": {
|
69
|
+
"tokenString": {
|
70
|
+
"value": tokenString,
|
71
|
+
},
|
72
|
+
"UPRNForAPI": {
|
73
|
+
"value": user_uprn,
|
74
|
+
},
|
75
|
+
"formatDateToday": {
|
76
|
+
"value": current_datetime,
|
77
|
+
},
|
78
|
+
"formatDateTo": {
|
79
|
+
"value": future_datetime,
|
80
|
+
},
|
81
|
+
}
|
82
|
+
},
|
83
|
+
}
|
84
|
+
|
85
|
+
params = {
|
86
|
+
"id": "5c8f869376376",
|
87
|
+
"repeat_against": "",
|
88
|
+
"noRetry": "false",
|
89
|
+
"getOnlyTokens": "undefined",
|
90
|
+
"log_id": "",
|
91
|
+
"app_name": "AF-Renderer::Self",
|
92
|
+
# unix_timestamp
|
93
|
+
"_": str(int(time.time() * 1000)),
|
94
|
+
"sid": sid,
|
95
|
+
}
|
96
|
+
r = s.post(API_URL, json=data, headers=headers, params=params)
|
97
|
+
r.raise_for_status()
|
98
|
+
|
99
|
+
data = r.json()
|
100
|
+
|
101
|
+
rows_data = data["integration"]["transformed"]["rows_data"]
|
102
|
+
if not isinstance(rows_data, dict):
|
103
|
+
raise ValueError("Invalid data returned from API")
|
104
|
+
|
105
|
+
# Extract each service's relevant details for the bin schedule
|
106
|
+
for item in rows_data.values():
|
107
|
+
if item["Name"]:
|
108
|
+
Bin_Types = item["Name"].split("Empty Bin ")
|
109
|
+
for Bin_Type in Bin_Types:
|
110
|
+
if Bin_Type:
|
111
|
+
dict_data = {
|
112
|
+
"type": Bin_Type.strip(),
|
113
|
+
"collectionDate": datetime.strptime(
|
114
|
+
item["Date"], "%Y-%m-%dT%H:%M:%S"
|
115
|
+
).strftime(date_format),
|
116
|
+
}
|
117
|
+
bindata["bins"].append(dict_data)
|
118
|
+
|
119
|
+
bindata["bins"].sort(
|
120
|
+
key=lambda x: datetime.strptime(x.get("collectionDate"), date_format)
|
121
|
+
)
|
122
|
+
return bindata
|
@@ -0,0 +1,161 @@
|
|
1
|
+
import re
|
2
|
+
import time
|
3
|
+
|
4
|
+
import requests
|
5
|
+
from bs4 import BeautifulSoup
|
6
|
+
from selenium.webdriver.common.by import By
|
7
|
+
from selenium.webdriver.support import expected_conditions as EC
|
8
|
+
from selenium.webdriver.support.ui import Select
|
9
|
+
from selenium.webdriver.support.wait import WebDriverWait
|
10
|
+
|
11
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
12
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
13
|
+
|
14
|
+
|
15
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
16
|
+
class CouncilClass(AbstractGetBinDataClass):
|
17
|
+
"""
|
18
|
+
Concrete classes have to implement all abstract operations of the
|
19
|
+
base class. They can also override some operations with a default
|
20
|
+
implementation.
|
21
|
+
"""
|
22
|
+
|
23
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
24
|
+
|
25
|
+
user_paon = kwargs.get("paon")
|
26
|
+
user_postcode = kwargs.get("postcode")
|
27
|
+
web_driver = kwargs.get("web_driver")
|
28
|
+
headless = kwargs.get("headless")
|
29
|
+
check_paon(user_paon)
|
30
|
+
check_postcode(user_postcode)
|
31
|
+
bindata = {"bins": []}
|
32
|
+
|
33
|
+
URI_1 = "https://www.hertsmere.gov.uk/Environment-Refuse-and-Recycling/Recycling--Waste/Bin-collections/Collections-and-calendar.aspx"
|
34
|
+
URI_2 = "https://hertsmere-services.onmats.com/w/webpage/round-search"
|
35
|
+
|
36
|
+
# Create Selenium webdriver
|
37
|
+
driver = create_webdriver(web_driver, headless, None, __name__)
|
38
|
+
driver.get(URI_1)
|
39
|
+
|
40
|
+
soup = BeautifulSoup(driver.page_source, "html.parser")
|
41
|
+
|
42
|
+
current_week = (soup.find("li", class_="current")).text.strip()
|
43
|
+
|
44
|
+
strong = soup.find_all("strong", text=re.compile(r"^Week"))
|
45
|
+
|
46
|
+
bin_weeks = []
|
47
|
+
for tag in strong:
|
48
|
+
parent = tag.parent
|
49
|
+
bin_type = (
|
50
|
+
(parent.text).split("-")[1].strip().replace("\xa0", " ").split(" and ")
|
51
|
+
)
|
52
|
+
for bin in bin_type:
|
53
|
+
dict_data = {
|
54
|
+
"week": tag.text.replace("\xa0", " "),
|
55
|
+
"bin_type": bin,
|
56
|
+
}
|
57
|
+
bin_weeks.append(dict_data)
|
58
|
+
|
59
|
+
driver.get(URI_2)
|
60
|
+
|
61
|
+
# Wait for the postcode field to appear then populate it
|
62
|
+
inputElement_postcode = WebDriverWait(driver, 30).until(
|
63
|
+
EC.presence_of_element_located(
|
64
|
+
(
|
65
|
+
By.CLASS_NAME,
|
66
|
+
"relation_path_type_ahead_search",
|
67
|
+
)
|
68
|
+
)
|
69
|
+
)
|
70
|
+
inputElement_postcode.send_keys(user_postcode)
|
71
|
+
|
72
|
+
WebDriverWait(driver, 10).until(
|
73
|
+
EC.element_to_be_clickable(
|
74
|
+
(
|
75
|
+
By.XPATH,
|
76
|
+
f"//ul[@class='result_list']/li[starts-with(@aria-label, '{user_paon}')]",
|
77
|
+
)
|
78
|
+
)
|
79
|
+
).click()
|
80
|
+
|
81
|
+
WebDriverWait(driver, timeout=10).until(
|
82
|
+
EC.element_to_be_clickable(
|
83
|
+
(
|
84
|
+
By.CSS_SELECTOR,
|
85
|
+
"input.fragment_presenter_template_edit.btn.bg-primary.btn-medium[type='submit']",
|
86
|
+
)
|
87
|
+
)
|
88
|
+
).click()
|
89
|
+
|
90
|
+
WebDriverWait(driver, timeout=10).until(
|
91
|
+
EC.presence_of_element_located(
|
92
|
+
(By.XPATH, "//h3[contains(text(), 'Collection days')]")
|
93
|
+
)
|
94
|
+
)
|
95
|
+
|
96
|
+
soup = BeautifulSoup(driver.page_source, "html.parser")
|
97
|
+
|
98
|
+
table = soup.find("table", class_="table listing table-striped")
|
99
|
+
|
100
|
+
# Check if the table was found
|
101
|
+
if table:
|
102
|
+
# Extract table rows and cells
|
103
|
+
table_data = []
|
104
|
+
for row in table.find("tbody").find_all("tr"):
|
105
|
+
# Extract cell data from each <td> tag
|
106
|
+
row_data = [cell.get_text(strip=True) for cell in row.find_all("td")]
|
107
|
+
table_data.append(row_data)
|
108
|
+
|
109
|
+
else:
|
110
|
+
print("Table not found.")
|
111
|
+
|
112
|
+
collection_day = (table_data[0])[1]
|
113
|
+
|
114
|
+
current_week_bins = [bin for bin in bin_weeks if bin["week"] == current_week]
|
115
|
+
next_week_bins = [bin for bin in bin_weeks if bin["week"] != current_week]
|
116
|
+
|
117
|
+
days_of_week = [
|
118
|
+
"Monday",
|
119
|
+
"Tuesday",
|
120
|
+
"Wednesday",
|
121
|
+
"Thursday",
|
122
|
+
"Friday",
|
123
|
+
"Saturday",
|
124
|
+
"Sunday",
|
125
|
+
]
|
126
|
+
|
127
|
+
today = datetime.now()
|
128
|
+
today_idx = today.weekday() # Monday is 0 and Sunday is 6
|
129
|
+
target_idx = days_of_week.index(collection_day)
|
130
|
+
|
131
|
+
days_until_target = (target_idx - today_idx) % 7
|
132
|
+
if days_until_target == 0:
|
133
|
+
next_day = today
|
134
|
+
else:
|
135
|
+
next_day = today + timedelta(days=days_until_target)
|
136
|
+
|
137
|
+
current_week_dates = get_dates_every_x_days(next_day, 14, 7)
|
138
|
+
next_week_date = next_day + timedelta(days=7)
|
139
|
+
next_week_dates = get_dates_every_x_days(next_week_date, 14, 7)
|
140
|
+
|
141
|
+
for date in current_week_dates:
|
142
|
+
for bin in current_week_bins:
|
143
|
+
dict_data = {
|
144
|
+
"type": bin["bin_type"],
|
145
|
+
"collectionDate": date,
|
146
|
+
}
|
147
|
+
bindata["bins"].append(dict_data)
|
148
|
+
|
149
|
+
for date in next_week_dates:
|
150
|
+
for bin in next_week_bins:
|
151
|
+
dict_data = {
|
152
|
+
"type": bin["bin_type"],
|
153
|
+
"collectionDate": date,
|
154
|
+
}
|
155
|
+
bindata["bins"].append(dict_data)
|
156
|
+
|
157
|
+
bindata["bins"].sort(
|
158
|
+
key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y")
|
159
|
+
)
|
160
|
+
|
161
|
+
return bindata
|