uk_bin_collection 0.79.1__py3-none-any.whl → 0.81.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- uk_bin_collection/tests/input.json +47 -1
- uk_bin_collection/uk_bin_collection/common.py +23 -1
- uk_bin_collection/uk_bin_collection/councils/BedfordshireCouncil.py +2 -1
- uk_bin_collection/uk_bin_collection/councils/BirminghamCityCouncil.py +27 -13
- uk_bin_collection/uk_bin_collection/councils/BracknellForestCouncil.py +4 -4
- uk_bin_collection/uk_bin_collection/councils/BristolCityCouncil.py +2 -1
- uk_bin_collection/uk_bin_collection/councils/BromleyBoroughCouncil.py +6 -15
- uk_bin_collection/uk_bin_collection/councils/BuckinghamshireCouncil.py +8 -6
- uk_bin_collection/uk_bin_collection/councils/CroydonCouncil.py +5 -4
- uk_bin_collection/uk_bin_collection/councils/DartfordBoroughCouncil.py +7 -9
- uk_bin_collection/uk_bin_collection/councils/EastCambridgeshireCouncil.py +2 -1
- uk_bin_collection/uk_bin_collection/councils/HaringeyCouncil.py +13 -5
- uk_bin_collection/uk_bin_collection/councils/LancasterCityCouncil.py +6 -4
- uk_bin_collection/uk_bin_collection/councils/LisburnCastlereaghCityCouncil.py +1 -0
- uk_bin_collection/uk_bin_collection/councils/ManchesterCityCouncil.py +2 -1
- uk_bin_collection/uk_bin_collection/councils/NewForestCouncil.py +146 -0
- uk_bin_collection/uk_bin_collection/councils/NewportCityCouncil.py +1 -0
- uk_bin_collection/uk_bin_collection/councils/NorthTynesideCouncil.py +2 -1
- uk_bin_collection/uk_bin_collection/councils/SomersetCouncil.py +1 -0
- uk_bin_collection/uk_bin_collection/councils/SouthCambridgeshireCouncil.py +1 -0
- uk_bin_collection/uk_bin_collection/councils/StHelensBC.py +2 -1
- uk_bin_collection/uk_bin_collection/councils/StaffordBoroughCouncil.py +69 -0
- uk_bin_collection/uk_bin_collection/councils/SwanseaCouncil.py +70 -0
- uk_bin_collection/uk_bin_collection/councils/TestValleyBoroughCouncil.py +1 -0
- uk_bin_collection/uk_bin_collection/councils/ThreeRiversDistrictCouncil.py +140 -0
- uk_bin_collection/uk_bin_collection/councils/UttlesfordDistrictCouncil.py +117 -0
- uk_bin_collection/uk_bin_collection/councils/WaverleyBoroughCouncil.py +2 -1
- uk_bin_collection/uk_bin_collection/councils/WiganBoroughCouncil.py +2 -1
- uk_bin_collection/uk_bin_collection/councils/WindsorAndMaidenheadCouncil.py +2 -1
- uk_bin_collection/uk_bin_collection/councils/WokingBoroughCouncil.py +4 -2
- uk_bin_collection/uk_bin_collection/councils/WychavonDistrictCouncil.py +156 -0
- {uk_bin_collection-0.79.1.dist-info → uk_bin_collection-0.81.0.dist-info}/METADATA +1 -1
- {uk_bin_collection-0.79.1.dist-info → uk_bin_collection-0.81.0.dist-info}/RECORD +36 -30
- {uk_bin_collection-0.79.1.dist-info → uk_bin_collection-0.81.0.dist-info}/LICENSE +0 -0
- {uk_bin_collection-0.79.1.dist-info → uk_bin_collection-0.81.0.dist-info}/WHEEL +0 -0
- {uk_bin_collection-0.79.1.dist-info → uk_bin_collection-0.81.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,146 @@
|
|
1
|
+
import logging
|
2
|
+
import pickle
|
3
|
+
import time
|
4
|
+
|
5
|
+
import requests
|
6
|
+
from bs4 import BeautifulSoup
|
7
|
+
from selenium import webdriver
|
8
|
+
from selenium.webdriver.common.by import By
|
9
|
+
from selenium.webdriver.common.keys import Keys
|
10
|
+
from selenium.webdriver.support import expected_conditions as EC
|
11
|
+
from selenium.webdriver.support.ui import Select
|
12
|
+
from selenium.webdriver.support.wait import WebDriverWait
|
13
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
14
|
+
|
15
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
16
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
17
|
+
|
18
|
+
# Set up logging
|
19
|
+
logging.basicConfig(
|
20
|
+
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
|
21
|
+
)
|
22
|
+
|
23
|
+
|
24
|
+
class CouncilClass(AbstractGetBinDataClass):
|
25
|
+
|
26
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
27
|
+
driver = None
|
28
|
+
try:
|
29
|
+
data = {"bins": []}
|
30
|
+
collections = []
|
31
|
+
user_uprn = kwargs.get("uprn")
|
32
|
+
user_postcode = kwargs.get("postcode")
|
33
|
+
web_driver = kwargs.get("web_driver")
|
34
|
+
headless = kwargs.get("headless")
|
35
|
+
check_postcode(user_postcode)
|
36
|
+
url = "https://forms.newforest.gov.uk/ufs/FIND_MY_COLLECTION.eb"
|
37
|
+
|
38
|
+
# Get session cookies using requests
|
39
|
+
|
40
|
+
user_agent = """general.useragent.override", "userAgent=Mozilla/5.0
|
41
|
+
(iPhone; CPU iPhone OS 15_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like
|
42
|
+
Gecko) CriOS/101.0.4951.44 Mobile/15E148 Safari/604.1"""
|
43
|
+
|
44
|
+
# Create Selenium webdriver
|
45
|
+
driver = create_webdriver(web_driver, headless, user_agent, __name__)
|
46
|
+
|
47
|
+
# Navigate to the page first
|
48
|
+
driver.get(url)
|
49
|
+
driver.refresh() # important otherwise it results in too many redirects
|
50
|
+
|
51
|
+
wait = WebDriverWait(driver, 60)
|
52
|
+
|
53
|
+
logging.info("Entering postcode")
|
54
|
+
input_element_postcode = wait.until(
|
55
|
+
EC.presence_of_element_located((By.XPATH, '//input[@id="CTID-1-_-A"]'))
|
56
|
+
)
|
57
|
+
|
58
|
+
input_element_postcode.send_keys(user_postcode)
|
59
|
+
|
60
|
+
logging.info("Searching for postcode")
|
61
|
+
input_element_postcode_btn = wait.until(
|
62
|
+
EC.element_to_be_clickable((By.XPATH, '//input[@type="submit"]'))
|
63
|
+
)
|
64
|
+
|
65
|
+
input_element_postcode_btn.click()
|
66
|
+
|
67
|
+
logging.info("Waiting for address dropdown")
|
68
|
+
input_element_postcode_dropdown = wait.until(
|
69
|
+
EC.presence_of_element_located((By.XPATH, '//select[@id="CTID-6-_-A"]'))
|
70
|
+
)
|
71
|
+
|
72
|
+
logging.info("Selecting address")
|
73
|
+
drop_down_values = Select(input_element_postcode_dropdown)
|
74
|
+
option_element = wait.until(
|
75
|
+
EC.presence_of_element_located(
|
76
|
+
(By.CSS_SELECTOR, f'option[value="{str(user_uprn)}"]')
|
77
|
+
)
|
78
|
+
)
|
79
|
+
|
80
|
+
driver.execute_script("arguments[0].scrollIntoView();", option_element)
|
81
|
+
drop_down_values.select_by_value(str(user_uprn))
|
82
|
+
|
83
|
+
input_element_address_btn = wait.until(
|
84
|
+
EC.element_to_be_clickable((By.XPATH, '//input[@value="Submit"]'))
|
85
|
+
)
|
86
|
+
|
87
|
+
input_element_address_btn.click()
|
88
|
+
|
89
|
+
logging.info("Waiting for bin collection page")
|
90
|
+
h4_element = wait.until(
|
91
|
+
EC.presence_of_element_located(
|
92
|
+
(By.XPATH, "//h1[contains(text(), 'Collections days for')]")
|
93
|
+
)
|
94
|
+
)
|
95
|
+
|
96
|
+
logging.info("Extracting bin collection data")
|
97
|
+
soup = BeautifulSoup(driver.page_source, features="html.parser")
|
98
|
+
bins = []
|
99
|
+
|
100
|
+
# Rubbish and recycling
|
101
|
+
rubbish_recycling = soup.find(
|
102
|
+
"span", class_="CTID-77-_ eb-77-Override-textControl"
|
103
|
+
)
|
104
|
+
if rubbish_recycling:
|
105
|
+
match = re.search(r"collected weekly on (\w+)", rubbish_recycling.text)
|
106
|
+
if match:
|
107
|
+
day_name = match.group(1)
|
108
|
+
next_collection = get_next_day_of_week(day_name)
|
109
|
+
bins.append(
|
110
|
+
{
|
111
|
+
"type": "Rubbish and recycling",
|
112
|
+
"collectionDate": next_collection,
|
113
|
+
}
|
114
|
+
)
|
115
|
+
|
116
|
+
# Glass collection
|
117
|
+
glass_collection = soup.find("span", class_="CTID-78-_ eb-78-textControl")
|
118
|
+
if glass_collection:
|
119
|
+
match = re.search(
|
120
|
+
r"next collection is\s+(\d{2}/\d{2}/\d{4})", glass_collection.text
|
121
|
+
)
|
122
|
+
if match:
|
123
|
+
bins.append(
|
124
|
+
{"type": "Glass collection", "collectionDate": match.group(1)}
|
125
|
+
)
|
126
|
+
|
127
|
+
# Garden waste
|
128
|
+
garden_waste = soup.find("span", class_="CTID-17-_ eb-17-textControl")
|
129
|
+
if garden_waste:
|
130
|
+
match = re.search(
|
131
|
+
r"next collection is\s+(\d{2}/\d{2}/\d{4})", garden_waste.text
|
132
|
+
)
|
133
|
+
if match:
|
134
|
+
bins.append(
|
135
|
+
{"type": "Garden waste", "collectionDate": match.group(1)}
|
136
|
+
)
|
137
|
+
|
138
|
+
return {"bins": bins}
|
139
|
+
|
140
|
+
except Exception as e:
|
141
|
+
logging.error(f"An error occurred: {e}")
|
142
|
+
raise
|
143
|
+
|
144
|
+
finally:
|
145
|
+
if driver:
|
146
|
+
driver.quit()
|
@@ -3,6 +3,7 @@ from datetime import *
|
|
3
3
|
|
4
4
|
import requests
|
5
5
|
from bs4 import BeautifulSoup
|
6
|
+
|
6
7
|
from uk_bin_collection.uk_bin_collection.common import *
|
7
8
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
8
9
|
|
@@ -24,7 +25,7 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
24
25
|
check_postcode(user_postcode)
|
25
26
|
|
26
27
|
# Get form data
|
27
|
-
s = requests.
|
28
|
+
s = requests.Session()
|
28
29
|
cookies = {
|
29
30
|
"ntc-cookie-policy": "1",
|
30
31
|
"SSESS6ec6d5d2d471c0357053d5993a839bce": "qBdR7XhmSMd5_PDBIqG0It2R0Fq67igrejRY-WOcskE",
|
@@ -1,4 +1,5 @@
|
|
1
1
|
from bs4 import BeautifulSoup
|
2
|
+
|
2
3
|
from uk_bin_collection.uk_bin_collection.common import *
|
3
4
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
4
5
|
|
@@ -21,7 +22,7 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
21
22
|
|
22
23
|
# Make Request
|
23
24
|
requests.packages.urllib3.disable_warnings()
|
24
|
-
s = requests.
|
25
|
+
s = requests.Session()
|
25
26
|
page = s.get(url)
|
26
27
|
|
27
28
|
# Make a BS4 object
|
@@ -0,0 +1,69 @@
|
|
1
|
+
from bs4 import BeautifulSoup
|
2
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
3
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
4
|
+
|
5
|
+
|
6
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
7
|
+
class CouncilClass(AbstractGetBinDataClass):
|
8
|
+
"""
|
9
|
+
Concrete classes have to implement all abstract operations of the
|
10
|
+
base class. They can also override some operations with a default
|
11
|
+
implementation.
|
12
|
+
"""
|
13
|
+
|
14
|
+
def parse_date(self, date_str):
|
15
|
+
months = {
|
16
|
+
"Jan": "01",
|
17
|
+
"Feb": "02",
|
18
|
+
"Mar": "03",
|
19
|
+
"Apr": "04",
|
20
|
+
"May": "05",
|
21
|
+
"Jun": "06",
|
22
|
+
"Jul": "07",
|
23
|
+
"Aug": "08",
|
24
|
+
"Sep": "09",
|
25
|
+
"Oct": "10",
|
26
|
+
"Nov": "11",
|
27
|
+
"Dec": "12",
|
28
|
+
}
|
29
|
+
day, date, month_abbr, year = date_str.split()
|
30
|
+
month = months[month_abbr]
|
31
|
+
return f"{date}/{month}/{year}"
|
32
|
+
|
33
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
34
|
+
# Make a BS4 object
|
35
|
+
soup = BeautifulSoup(page.text, features="html.parser")
|
36
|
+
soup.prettify()
|
37
|
+
|
38
|
+
# Initialize the bin data structure
|
39
|
+
bin_data = {"bins": []}
|
40
|
+
|
41
|
+
# Find the table with collection dates
|
42
|
+
table = soup.find("table", class_="my-area")
|
43
|
+
|
44
|
+
# Extract the rows containing the bin collection information
|
45
|
+
rows = table.find_all("tr")
|
46
|
+
|
47
|
+
# Loop through the rows and extract bin data
|
48
|
+
for row in rows:
|
49
|
+
cells = row.find_all("td")
|
50
|
+
if len(cells) == 2:
|
51
|
+
bin_type = cells[0].get_text(strip=True)
|
52
|
+
collection_date = cells[1].get_text(strip=True)
|
53
|
+
|
54
|
+
if "Next refuse" in bin_type:
|
55
|
+
bin_data["bins"].append(
|
56
|
+
{
|
57
|
+
"type": "refuse",
|
58
|
+
"collectionDate": self.parse_date(collection_date),
|
59
|
+
}
|
60
|
+
)
|
61
|
+
elif "Next recycling" in bin_type:
|
62
|
+
bin_data["bins"].append(
|
63
|
+
{
|
64
|
+
"type": "recycling",
|
65
|
+
"collectionDate": self.parse_date(collection_date),
|
66
|
+
}
|
67
|
+
)
|
68
|
+
|
69
|
+
return bin_data
|
@@ -0,0 +1,70 @@
|
|
1
|
+
import logging
|
2
|
+
from datetime import datetime
|
3
|
+
|
4
|
+
import requests
|
5
|
+
from bs4 import BeautifulSoup
|
6
|
+
|
7
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
8
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
9
|
+
|
10
|
+
HEADERS = {
|
11
|
+
"user-agent": "Mozilla/5.0",
|
12
|
+
}
|
13
|
+
|
14
|
+
|
15
|
+
class CouncilClass(AbstractGetBinDataClass):
|
16
|
+
"""
|
17
|
+
Concrete class implementing all abstract operations of the base class.
|
18
|
+
"""
|
19
|
+
|
20
|
+
def get_session_variable(self, soup, id) -> str:
|
21
|
+
"""Extract ASP.NET variable from the HTML."""
|
22
|
+
element = soup.find("input", {"id": id})
|
23
|
+
if element:
|
24
|
+
return element.get("value")
|
25
|
+
else:
|
26
|
+
raise ValueError(f"Unable to find element with id: {id}")
|
27
|
+
|
28
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
29
|
+
# Create a session to handle cookies and headers
|
30
|
+
session = requests.Session()
|
31
|
+
session.headers.update(HEADERS)
|
32
|
+
user_uprn = kwargs.get("uprn")
|
33
|
+
user_postcode = kwargs.get("postcode")
|
34
|
+
URL = "https://www1.swansea.gov.uk/recyclingsearch/"
|
35
|
+
|
36
|
+
# Get initial ASP.NET variables
|
37
|
+
response = session.get(URL)
|
38
|
+
response.raise_for_status()
|
39
|
+
|
40
|
+
soup = BeautifulSoup(response.text, "html.parser")
|
41
|
+
|
42
|
+
data = {
|
43
|
+
"__VIEWSTATE": self.get_session_variable(soup, "__VIEWSTATE"),
|
44
|
+
"__VIEWSTATEGENERATOR": self.get_session_variable(
|
45
|
+
soup, "__VIEWSTATEGENERATOR"
|
46
|
+
),
|
47
|
+
"__VIEWSTATEENCRYPTED": "",
|
48
|
+
"__EVENTVALIDATION": self.get_session_variable(soup, "__EVENTVALIDATION"),
|
49
|
+
"txtRoadName": user_uprn,
|
50
|
+
"txtPostCode": user_postcode,
|
51
|
+
"btnSearch": "Search",
|
52
|
+
}
|
53
|
+
|
54
|
+
# Get the collection calendar
|
55
|
+
response = session.post(URL, data=data)
|
56
|
+
response.raise_for_status()
|
57
|
+
|
58
|
+
soup = BeautifulSoup(response.text, "html.parser")
|
59
|
+
|
60
|
+
next_refuse_date = soup.find("span", {"id": "lblNextRefuse"}).text.strip()
|
61
|
+
next_recycling_date = soup.find("span", {"id": "lblNextRecycling"}).text.strip()
|
62
|
+
|
63
|
+
bin_data = {
|
64
|
+
"bins": [
|
65
|
+
{"type": "Pink Week", "collectionDate": next_refuse_date},
|
66
|
+
{"type": "Green Week", "collectionDate": next_recycling_date},
|
67
|
+
]
|
68
|
+
}
|
69
|
+
|
70
|
+
return bin_data
|
@@ -0,0 +1,140 @@
|
|
1
|
+
import logging
|
2
|
+
import time
|
3
|
+
|
4
|
+
from bs4 import BeautifulSoup
|
5
|
+
from selenium.webdriver.common.by import By
|
6
|
+
from selenium.webdriver.support import expected_conditions as EC
|
7
|
+
from selenium.webdriver.support.ui import Select
|
8
|
+
from selenium.webdriver.support.wait import WebDriverWait
|
9
|
+
|
10
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
11
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
12
|
+
|
13
|
+
# Set up logging
|
14
|
+
logging.basicConfig(
|
15
|
+
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
|
16
|
+
)
|
17
|
+
|
18
|
+
|
19
|
+
class CouncilClass(AbstractGetBinDataClass):
|
20
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
21
|
+
driver = None
|
22
|
+
try:
|
23
|
+
data = {"bins": []}
|
24
|
+
collections = []
|
25
|
+
user_uprn = kwargs.get("uprn")
|
26
|
+
user_postcode = kwargs.get("postcode")
|
27
|
+
web_driver = kwargs.get("web_driver")
|
28
|
+
headless = kwargs.get("headless")
|
29
|
+
check_postcode(user_postcode)
|
30
|
+
|
31
|
+
# Create Selenium webdriver
|
32
|
+
driver = create_webdriver(web_driver, headless, None, __name__)
|
33
|
+
if headless:
|
34
|
+
driver.set_window_size(1920, 1080)
|
35
|
+
|
36
|
+
driver.get(
|
37
|
+
"https://my.threerivers.gov.uk/en/AchieveForms/?mode=fill&consentMessage=yes&form_uri=sandbox-publish://AF-Process-52df96e3-992a-4b39-bba3-06cfaabcb42b/AF-Stage-01ee28aa-1584-442c-8d1f-119b6e27114a/definition.json&process=1&process_uri=sandbox-processes://AF-Process-52df96e3-992a-4b39-bba3-06cfaabcb42b&process_id=AF-Process-52df96e3-992a-4b39-bba3-06cfaabcb42b&noLoginPrompt=1"
|
38
|
+
)
|
39
|
+
wait = WebDriverWait(driver, 60)
|
40
|
+
|
41
|
+
def click_element(by, value):
|
42
|
+
element = wait.until(EC.element_to_be_clickable((by, value)))
|
43
|
+
driver.execute_script("arguments[0].scrollIntoView();", element)
|
44
|
+
element.click()
|
45
|
+
|
46
|
+
click_element(By.XPATH, "//button[contains(text(), 'Continue')]")
|
47
|
+
|
48
|
+
logging.info("Switching to iframe")
|
49
|
+
iframe_presence = wait.until(
|
50
|
+
EC.presence_of_element_located((By.ID, "fillform-frame-1"))
|
51
|
+
)
|
52
|
+
driver.switch_to.frame(iframe_presence)
|
53
|
+
|
54
|
+
logging.info("Entering postcode")
|
55
|
+
input_element_postcode = wait.until(
|
56
|
+
EC.presence_of_element_located(
|
57
|
+
(By.XPATH, '//input[@id="postcode_search"]')
|
58
|
+
)
|
59
|
+
)
|
60
|
+
input_element_postcode.send_keys(user_postcode)
|
61
|
+
|
62
|
+
logging.info("Selecting address")
|
63
|
+
dropdown = wait.until(EC.element_to_be_clickable((By.ID, "chooseAddress")))
|
64
|
+
dropdown_options = wait.until(
|
65
|
+
EC.presence_of_element_located((By.CLASS_NAME, "lookup-option"))
|
66
|
+
)
|
67
|
+
drop_down_values = Select(dropdown)
|
68
|
+
option_element = wait.until(
|
69
|
+
EC.presence_of_element_located(
|
70
|
+
(By.CSS_SELECTOR, f'option.lookup-option[value="{str(user_uprn)}"]')
|
71
|
+
)
|
72
|
+
)
|
73
|
+
|
74
|
+
driver.execute_script("arguments[0].scrollIntoView();", option_element)
|
75
|
+
drop_down_values.select_by_value(str(user_uprn))
|
76
|
+
|
77
|
+
option_element = wait.until(
|
78
|
+
EC.presence_of_element_located(
|
79
|
+
(By.XPATH, '//div[@class="fieldContent"][1]')
|
80
|
+
)
|
81
|
+
)
|
82
|
+
|
83
|
+
time.sleep(2)
|
84
|
+
|
85
|
+
click_element(By.XPATH, "//button/span[contains(text(), 'Next')]")
|
86
|
+
|
87
|
+
logging.info("Waiting for bin schedule")
|
88
|
+
bin_results = wait.until(
|
89
|
+
EC.presence_of_element_located(
|
90
|
+
(By.XPATH, "//div[@data-field-name='subCollectionCalendar']//table")
|
91
|
+
)
|
92
|
+
)
|
93
|
+
|
94
|
+
logging.info("Extracting bin collection data")
|
95
|
+
soup = BeautifulSoup(driver.page_source, features="html.parser")
|
96
|
+
bin_cards = soup.find_all(
|
97
|
+
"div", {"data-field-name": "subCollectionCalendar"}
|
98
|
+
)
|
99
|
+
|
100
|
+
bins = []
|
101
|
+
|
102
|
+
for bin_card in bin_cards:
|
103
|
+
# Try to find the table within the bin_card
|
104
|
+
table = bin_card.find(
|
105
|
+
"table",
|
106
|
+
{
|
107
|
+
"class": "repeatable-table table table-responsive table-hover table-condensed"
|
108
|
+
},
|
109
|
+
)
|
110
|
+
|
111
|
+
if table:
|
112
|
+
print("Table found")
|
113
|
+
rows = table.select("tr.repeatable-value")
|
114
|
+
for row in rows:
|
115
|
+
cols = row.find_all("td", class_="value")
|
116
|
+
if len(cols) >= 3: # Ensure there are enough columns
|
117
|
+
bin_type = cols[1].find_all("span")[-1].text.strip()
|
118
|
+
collection_date = (
|
119
|
+
cols[2]
|
120
|
+
.find_all("span")[-1]
|
121
|
+
.text.strip()
|
122
|
+
.replace("-", "/")
|
123
|
+
)
|
124
|
+
bins.append(
|
125
|
+
{"type": bin_type, "collectionDate": collection_date}
|
126
|
+
)
|
127
|
+
else:
|
128
|
+
print("Table not found within bin_card")
|
129
|
+
|
130
|
+
bin_data = {"bins": bins}
|
131
|
+
logging.info("Data extraction complete")
|
132
|
+
return bin_data
|
133
|
+
|
134
|
+
except Exception as e:
|
135
|
+
logging.error(f"An error occurred: {e}")
|
136
|
+
raise
|
137
|
+
|
138
|
+
finally:
|
139
|
+
if driver:
|
140
|
+
driver.quit()
|
@@ -0,0 +1,117 @@
|
|
1
|
+
import logging
|
2
|
+
import pickle
|
3
|
+
import time
|
4
|
+
|
5
|
+
import requests
|
6
|
+
from bs4 import BeautifulSoup
|
7
|
+
from selenium import webdriver
|
8
|
+
from selenium.webdriver.common.by import By
|
9
|
+
from selenium.webdriver.common.keys import Keys
|
10
|
+
from selenium.webdriver.support import expected_conditions as EC
|
11
|
+
from selenium.webdriver.support.ui import Select
|
12
|
+
from selenium.webdriver.support.wait import WebDriverWait
|
13
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
14
|
+
|
15
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
16
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
17
|
+
|
18
|
+
# Set up logging
|
19
|
+
logging.basicConfig(
|
20
|
+
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
|
21
|
+
)
|
22
|
+
|
23
|
+
|
24
|
+
class CouncilClass(AbstractGetBinDataClass):
|
25
|
+
|
26
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
27
|
+
driver = None
|
28
|
+
try:
|
29
|
+
user_paon = kwargs.get("paon")
|
30
|
+
user_postcode = kwargs.get("postcode")
|
31
|
+
web_driver = kwargs.get("web_driver")
|
32
|
+
headless = kwargs.get("headless")
|
33
|
+
check_postcode(user_postcode)
|
34
|
+
url = "https://bins.uttlesford.gov.uk/"
|
35
|
+
|
36
|
+
# Get session cookies using requests
|
37
|
+
|
38
|
+
user_agent = """general.useragent.override", "userAgent=Mozilla/5.0
|
39
|
+
(iPhone; CPU iPhone OS 15_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like
|
40
|
+
Gecko) CriOS/101.0.4951.44 Mobile/15E148 Safari/604.1"""
|
41
|
+
|
42
|
+
# Create Selenium webdriver
|
43
|
+
driver = create_webdriver(web_driver, headless, user_agent, __name__)
|
44
|
+
|
45
|
+
# Navigate to the page first
|
46
|
+
driver.get(url)
|
47
|
+
|
48
|
+
wait = WebDriverWait(driver, 60)
|
49
|
+
|
50
|
+
logging.info("Entering postcode")
|
51
|
+
input_element_postcode = wait.until(
|
52
|
+
EC.presence_of_element_located((By.XPATH, '//input[@id="postcode"]'))
|
53
|
+
)
|
54
|
+
|
55
|
+
input_element_postcode.send_keys(user_postcode)
|
56
|
+
input_element_postcode.send_keys(Keys.ENTER)
|
57
|
+
|
58
|
+
logging.info("Searching for postcode")
|
59
|
+
input_element_postcode_dd = wait.until(
|
60
|
+
EC.element_to_be_clickable((By.XPATH, '//select[@id="housenn"]'))
|
61
|
+
)
|
62
|
+
|
63
|
+
logging.info("Selecting address")
|
64
|
+
drop_down_values = Select(input_element_postcode_dd)
|
65
|
+
|
66
|
+
drop_down_values.select_by_visible_text(str(user_paon))
|
67
|
+
|
68
|
+
input_element_address_btn = wait.until(
|
69
|
+
EC.element_to_be_clickable(
|
70
|
+
(By.XPATH, '//input[@alt="View your waste collection days"]')
|
71
|
+
)
|
72
|
+
)
|
73
|
+
|
74
|
+
input_element_address_btn.click()
|
75
|
+
|
76
|
+
logging.info("Waiting for bin collection page")
|
77
|
+
h3_element = wait.until(
|
78
|
+
EC.presence_of_element_located(
|
79
|
+
(By.XPATH, "//h3[contains(text(), 'Future Collection Dates')]")
|
80
|
+
)
|
81
|
+
)
|
82
|
+
|
83
|
+
logging.info("Extracting bin collection data")
|
84
|
+
soup = BeautifulSoup(driver.page_source, features="html.parser")
|
85
|
+
|
86
|
+
bins = []
|
87
|
+
rows = soup.select("div.wrap table tbody tr")
|
88
|
+
|
89
|
+
for row in rows:
|
90
|
+
cols = row.find_all("td")
|
91
|
+
if len(cols) == 2:
|
92
|
+
bin_types = [img["alt"] for img in cols[0].find_all("img")]
|
93
|
+
collection_date_str = cols[1].text
|
94
|
+
collection_date = datetime.strptime(
|
95
|
+
collection_date_str, "%A %dth %B"
|
96
|
+
)
|
97
|
+
collection_date = collection_date.replace(
|
98
|
+
year=2024
|
99
|
+
) # Assuming the year is 2024
|
100
|
+
collection_date_str = collection_date.strftime("%d/%m/%Y")
|
101
|
+
|
102
|
+
for bin_type in bin_types:
|
103
|
+
bins.append(
|
104
|
+
{"type": bin_type, "collectionDate": collection_date_str}
|
105
|
+
)
|
106
|
+
|
107
|
+
bin_data = {"bins": bins}
|
108
|
+
|
109
|
+
return bin_data
|
110
|
+
|
111
|
+
except Exception as e:
|
112
|
+
logging.error(f"An error occurred: {e}")
|
113
|
+
raise
|
114
|
+
|
115
|
+
finally:
|
116
|
+
if driver:
|
117
|
+
driver.quit()
|
@@ -2,6 +2,7 @@ from datetime import date, datetime
|
|
2
2
|
|
3
3
|
import requests
|
4
4
|
from bs4 import BeautifulSoup
|
5
|
+
|
5
6
|
from uk_bin_collection.uk_bin_collection.common import *
|
6
7
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
7
8
|
|
@@ -24,7 +25,7 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
24
25
|
# WBC use a url parameter called "Track" that's generated when you start a form session.
|
25
26
|
# So first off, open the page, find the page link and copy it with the Track
|
26
27
|
start_url = "https://wav-wrp.whitespacews.com/"
|
27
|
-
s = requests.
|
28
|
+
s = requests.Session()
|
28
29
|
response = s.get(start_url)
|
29
30
|
soup = BeautifulSoup(response.content, features="html.parser")
|
30
31
|
soup.prettify()
|
@@ -2,6 +2,7 @@ from datetime import datetime
|
|
2
2
|
|
3
3
|
import requests
|
4
4
|
from bs4 import BeautifulSoup
|
5
|
+
|
5
6
|
from uk_bin_collection.uk_bin_collection.common import *
|
6
7
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
7
8
|
|
@@ -27,7 +28,7 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
27
28
|
|
28
29
|
# Start a new session to walk through the form
|
29
30
|
requests.packages.urllib3.disable_warnings()
|
30
|
-
s = requests.
|
31
|
+
s = requests.Session()
|
31
32
|
|
32
33
|
# Get our initial session running
|
33
34
|
response = s.get("https://apps.wigan.gov.uk/MyNeighbourhood/")
|
@@ -3,6 +3,7 @@ from urllib.parse import urlparse
|
|
3
3
|
|
4
4
|
import requests
|
5
5
|
from bs4 import BeautifulSoup
|
6
|
+
|
6
7
|
from uk_bin_collection.uk_bin_collection.common import *
|
7
8
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
8
9
|
|
@@ -25,7 +26,7 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
25
26
|
data = {"bins": []}
|
26
27
|
|
27
28
|
requests.packages.urllib3.disable_warnings()
|
28
|
-
s = requests.
|
29
|
+
s = requests.Session()
|
29
30
|
# Form start
|
30
31
|
headers = {
|
31
32
|
"authority": "my.rbwm.gov.uk",
|
@@ -1,5 +1,7 @@
|
|
1
|
-
from bs4 import BeautifulSoup
|
2
1
|
import urllib
|
2
|
+
|
3
|
+
from bs4 import BeautifulSoup
|
4
|
+
|
3
5
|
from uk_bin_collection.uk_bin_collection.common import *
|
4
6
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
5
7
|
|
@@ -27,7 +29,7 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
27
29
|
start = BeautifulSoup(req.text, features="html.parser")
|
28
30
|
start.prettify()
|
29
31
|
base_link = start.select(
|
30
|
-
"#menu-content > div > div:nth-child(1) > p.govuk-body.govuk
|
32
|
+
"#menu-content > div > div:nth-child(1) > p.govuk-body.govuk-\\!-margin-bottom-0.colorblue.lineheight15 > a"
|
31
33
|
)[0].attrs.get("href")
|
32
34
|
|
33
35
|
# We need to reorder the query parts from the unique URL, so split them up to make it easier
|