uk_bin_collection 0.77.0__py3-none-any.whl → 0.79.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- uk_bin_collection/tests/council_feature_input_parity.py +38 -57
- uk_bin_collection/tests/features/validate_council_outputs.feature +5 -770
- uk_bin_collection/tests/input.json +70 -1
- uk_bin_collection/tests/step_defs/test_validate_council.py +44 -16
- uk_bin_collection/tests/test_common_functions.py +4 -2
- uk_bin_collection/uk_bin_collection/common.py +4 -1
- uk_bin_collection/uk_bin_collection/councils/ChichesterDistrictCouncil.py +110 -0
- uk_bin_collection/uk_bin_collection/councils/DartfordBoroughCouncil.py +44 -0
- uk_bin_collection/uk_bin_collection/councils/DoverDistrictCouncil.py +11 -6
- uk_bin_collection/uk_bin_collection/councils/EppingForestDistrictCouncil.py +51 -0
- uk_bin_collection/uk_bin_collection/councils/FarehamBoroughCouncil.py +68 -0
- uk_bin_collection/uk_bin_collection/councils/HounslowCouncil.py +122 -0
- uk_bin_collection/uk_bin_collection/councils/KirkleesCouncil.py +3 -1
- uk_bin_collection/uk_bin_collection/councils/MoleValleyDistrictCouncil.py +13 -10
- uk_bin_collection/uk_bin_collection/councils/SouthKestevenDistrictCouncil.py +151 -0
- uk_bin_collection/uk_bin_collection/councils/StroudDistrictCouncil.py +94 -0
- uk_bin_collection/uk_bin_collection/councils/TendringDistrictCouncil.py +110 -0
- uk_bin_collection/uk_bin_collection/councils/WakefieldCityCouncil.py +3 -1
- uk_bin_collection/uk_bin_collection/councils/WalthamForest.py +127 -0
- uk_bin_collection/uk_bin_collection/create_new_council.py +51 -0
- {uk_bin_collection-0.77.0.dist-info → uk_bin_collection-0.79.0.dist-info}/METADATA +1 -1
- {uk_bin_collection-0.77.0.dist-info → uk_bin_collection-0.79.0.dist-info}/RECORD +25 -15
- {uk_bin_collection-0.77.0.dist-info → uk_bin_collection-0.79.0.dist-info}/LICENSE +0 -0
- {uk_bin_collection-0.77.0.dist-info → uk_bin_collection-0.79.0.dist-info}/WHEEL +0 -0
- {uk_bin_collection-0.77.0.dist-info → uk_bin_collection-0.79.0.dist-info}/entry_points.txt +0 -0
@@ -222,6 +222,15 @@
|
|
222
222
|
"web_driver": "http://selenium:4444",
|
223
223
|
"wiki_name": "Cheshire West and Chester Council"
|
224
224
|
},
|
225
|
+
"ChichesterDistrictCouncil": {
|
226
|
+
"house_number": "7, Plaistow Road, Kirdford, Billingshurst, West Sussex",
|
227
|
+
"postcode": "RH14 0JT",
|
228
|
+
"skip_get_url": true,
|
229
|
+
"url": "https://www.chichester.gov.uk/checkyourbinday",
|
230
|
+
"web_driver": "http://selenium:4444",
|
231
|
+
"wiki_name": "ChichesterDistrictCouncil",
|
232
|
+
"wiki_note": "Needs the full address and postcode as appears on page https://www.chichester.gov.uk/checkyourbinday"
|
233
|
+
},
|
225
234
|
"ChorleyCouncil": {
|
226
235
|
"postcode": "PR6 7PG",
|
227
236
|
"skip_get_url": true,
|
@@ -253,6 +262,12 @@
|
|
253
262
|
"url": "https://service.croydon.gov.uk/wasteservices/w/webpage/bin-day-enter-address",
|
254
263
|
"wiki_name": "Croydon Council"
|
255
264
|
},
|
265
|
+
"DartfordBoroughCouncil": {
|
266
|
+
"url": "https://windmz.dartford.gov.uk/ufs/WS_CHECK_COLLECTIONS.eb?UPRN=010094157511",
|
267
|
+
"wiki_name": "DartfordBoroughCouncil",
|
268
|
+
"uprn": "010094157511",
|
269
|
+
"wiki_note": "Use https://uprn.uk/ to find your UPRN "
|
270
|
+
},
|
256
271
|
"DerbyshireDalesDistrictCouncil": {
|
257
272
|
"postcode": "DE4 3AS",
|
258
273
|
"skip_get_url": true,
|
@@ -336,12 +351,23 @@
|
|
336
351
|
"wiki_name": "Environment First",
|
337
352
|
"wiki_note": "For properties with collections managed by Environment First, such as Lewes and Eastbourne.\nReplace the XXXXXXXXXXX with the UPRN of your property - you can use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find this."
|
338
353
|
},
|
354
|
+
"EppingForestDistrictCouncil": {
|
355
|
+
"postcode": "IG9 6EP",
|
356
|
+
"url": "https://eppingforestdc.maps.arcgis.com/apps/instant/lookup/index.html?appid=bfca32b46e2a47cd9c0a84f2d8cdde17&find=IG9%206EP",
|
357
|
+
"wiki_name": "Epping Forest District Council"
|
358
|
+
},
|
339
359
|
"ErewashBoroughCouncil": {
|
340
360
|
"skip_get_url": true,
|
341
361
|
"uprn": "10003582028",
|
342
362
|
"url": "https://map.erewash.gov.uk/isharelive.web/myerewash.aspx",
|
343
363
|
"wiki_name": "Erewash Borough Council"
|
344
364
|
},
|
365
|
+
"FarehamBoroughCouncil": {
|
366
|
+
"postcode": "PO14 4NR",
|
367
|
+
"skip_get_url": true,
|
368
|
+
"url": "https://www.fareham.gov.uk/internetlookups/search_data.aspx?type=JSON&list=DomesticBinCollections&Road=&Postcode=PO14%204NR",
|
369
|
+
"wiki_name": "Fareham Borough Council"
|
370
|
+
},
|
345
371
|
"FenlandDistrictCouncil": {
|
346
372
|
"skip_get_url": true,
|
347
373
|
"uprn": "200002981143",
|
@@ -421,6 +447,15 @@
|
|
421
447
|
"wiki_name": "High Peak Council",
|
422
448
|
"wiki_note": "Pass the name of the street with the house number parameter, wrapped in double quotes"
|
423
449
|
},
|
450
|
+
"HounslowCouncil": {
|
451
|
+
"house_number": "17A LAMPTON PARK ROAD, HOUNSLOW",
|
452
|
+
"postcode": "TW3 4HS",
|
453
|
+
"skip_get_url": true,
|
454
|
+
"uprn": "10091596698",
|
455
|
+
"url": "https://www.hounslow.gov.uk/info/20272/recycling_and_waste_collection_day_finder",
|
456
|
+
"web_driver": "http://selenium:4444",
|
457
|
+
"wiki_name": "HounslowCouncil"
|
458
|
+
},
|
424
459
|
"HullCityCouncil": {
|
425
460
|
"skip_get_url": true,
|
426
461
|
"uprn": "21033995",
|
@@ -555,7 +590,7 @@
|
|
555
590
|
"postcode": "RH4 1SJ",
|
556
591
|
"skip_get_url": true,
|
557
592
|
"uprn": "200000171235",
|
558
|
-
"url": "https://molevalley.
|
593
|
+
"url": "https://myproperty.molevalley.gov.uk/molevalley/",
|
559
594
|
"wiki_name": "Mole Valley District Council",
|
560
595
|
"wiki_note": "UPRN can only be parsed with a valid postcode."
|
561
596
|
},
|
@@ -828,6 +863,15 @@
|
|
828
863
|
"url": "https://beta.southglos.gov.uk/waste-and-recycling-collection-date",
|
829
864
|
"wiki_name": "South Gloucestershire Council"
|
830
865
|
},
|
866
|
+
"SouthKestevenDistrictCouncil": {
|
867
|
+
"house_number": "2 Althorpe Close, Market Deeping, PE6 8BL",
|
868
|
+
"postcode": "PE68BL",
|
869
|
+
"skip_get_url": true,
|
870
|
+
"url": "https://pre.southkesteven.gov.uk/BinSearch.aspx",
|
871
|
+
"web_driver": "http://selenium:4444",
|
872
|
+
"wiki_name": "SouthKestevenDistrictCouncil",
|
873
|
+
"wiki_note": ""
|
874
|
+
},
|
831
875
|
"SouthLanarkshireCouncil": {
|
832
876
|
"url": "https://www.southlanarkshire.gov.uk/directory_record/579973/abbeyhill_crescent_lesmahagow",
|
833
877
|
"wiki_command_url_override": "https://www.southlanarkshire.gov.uk/directory_record/XXXXX/XXXXX",
|
@@ -892,6 +936,13 @@
|
|
892
936
|
"url": "https://www.stratford.gov.uk/waste-recycling/when-we-collect.cfm/part/calendar",
|
893
937
|
"wiki_name": "Stratford Upon Avon Council"
|
894
938
|
},
|
939
|
+
"StroudDistrictCouncil": {
|
940
|
+
"postcode": "GL10 3BH",
|
941
|
+
"uprn": "100120512183",
|
942
|
+
"url": "https://www.stroud.gov.uk/my-house?uprn=100120512183&postcode=GL10+3BH",
|
943
|
+
"wiki_name": "Stroud District Council",
|
944
|
+
"wiki_note": "Find your uprn and replace it in the url do the same for the postcode."
|
945
|
+
},
|
895
946
|
"SunderlandCityCouncil": {
|
896
947
|
"house_number": "13",
|
897
948
|
"postcode": "SR4 6BJ",
|
@@ -926,6 +977,14 @@
|
|
926
977
|
"url": "https://dac.telford.gov.uk/bindayfinder/",
|
927
978
|
"wiki_name": "Telford and Wrekin Co-operative Council"
|
928
979
|
},
|
980
|
+
"TendringDistrictCouncil": {
|
981
|
+
"postcode": "CO15 4EU",
|
982
|
+
"skip_get_url": true,
|
983
|
+
"uprn": "100090604247",
|
984
|
+
"url": "https://tendring-self.achieveservice.com/en/service/Rubbish_and_recycling_collection_days",
|
985
|
+
"web_driver": "http://selenium:4444",
|
986
|
+
"wiki_name": "Tendring District Council"
|
987
|
+
},
|
929
988
|
"TestValleyBoroughCouncil": {
|
930
989
|
"postcode": "SO51 9ZD",
|
931
990
|
"skip_get_url": true,
|
@@ -974,6 +1033,16 @@
|
|
974
1033
|
"wiki_name": "Wakefield City Council",
|
975
1034
|
"wiki_note": "Follow the instructions [here](https://www.wakefield.gov.uk/where-i-live/) until you get the page that includes a \"Bin Collections\" section then copy the URL and replace the URL in the command."
|
976
1035
|
},
|
1036
|
+
"WalthamForest": {
|
1037
|
+
"house_number": "17 Chingford Road, Walthamstow",
|
1038
|
+
"postcode": "E17 4PW",
|
1039
|
+
"skip_get_url": true,
|
1040
|
+
"uprn": "200001415697",
|
1041
|
+
"url": "https://portal.walthamforest.gov.uk/AchieveForms/?mode=fill&consentMessage=yes&form_uri=sandbox-publish://AF-Process-d62ccdd2-3de9-48eb-a229-8e20cbdd6393/AF-Stage-8bf39bf9-5391-4c24-857f-0dc2025c67f4/definition.json&process=1&process_uri=sandbox-processes://AF-Process-d62ccdd2-3de9-48eb-a229-8e20cbdd6393&process_id=AF-Process-d62ccdd2-3de9-48eb-a229-8e20cbdd6393",
|
1042
|
+
"web_driver": "http://selenium:4444",
|
1043
|
+
"wiki_name": "Waltham Forest",
|
1044
|
+
"wiki_note": "Use to find your UPRN https://uprn.uk/"
|
1045
|
+
},
|
977
1046
|
"WarwickDistrictCouncil": {
|
978
1047
|
"url": "https://estates7.warwickdc.gov.uk/PropertyPortal/Property/Recycling/100070263793",
|
979
1048
|
"wiki_command_url_override": "https://estates7.warwickdc.gov.uk/PropertyPortal/Property/Recycling/XXXXXXXX",
|
@@ -1,6 +1,7 @@
|
|
1
1
|
import logging
|
2
2
|
import traceback
|
3
3
|
from typing import Any, Generator, Callable
|
4
|
+
import json
|
4
5
|
|
5
6
|
import pytest
|
6
7
|
from pytest_bdd import scenario, given, when, then, parsers
|
@@ -11,10 +12,26 @@ from uk_bin_collection.uk_bin_collection import collect_data
|
|
11
12
|
|
12
13
|
logging.basicConfig(level=logging.INFO, format="%(levelname)s - %(message)s")
|
13
14
|
|
15
|
+
|
16
|
+
def get_council_list():
|
17
|
+
json_file_path = "uk_bin_collection/tests/input.json" # Specify the correct path to the JSON file
|
18
|
+
with open(json_file_path, "r") as file:
|
19
|
+
data = json.load(file)
|
20
|
+
logging.info(f"Council List: {list(data.keys())}")
|
21
|
+
return list(data.keys())
|
22
|
+
|
23
|
+
|
24
|
+
@pytest.fixture(params=get_council_list())
|
25
|
+
def council(request):
|
26
|
+
print(f"Running test for council: {request.param}")
|
27
|
+
return request.param
|
28
|
+
|
29
|
+
|
14
30
|
@scenario("../features/validate_council_outputs.feature", "Validate Council Output")
|
15
|
-
def test_scenario_outline() -> None:
|
31
|
+
def test_scenario_outline(council) -> None:
|
16
32
|
pass
|
17
33
|
|
34
|
+
|
18
35
|
def handle_test_errors(func: Callable[..., Any]) -> Callable[..., Any]:
|
19
36
|
@wraps(func)
|
20
37
|
def wrapper(*args: Any, **kwargs: Any) -> Any:
|
@@ -24,30 +41,37 @@ def handle_test_errors(func: Callable[..., Any]) -> Callable[..., Any]:
|
|
24
41
|
logging.error(f"Error in test '{func.__name__}': {e}")
|
25
42
|
logging.error(traceback.format_exc())
|
26
43
|
raise e
|
44
|
+
|
27
45
|
return wrapper
|
28
46
|
|
29
|
-
@pytest.fixture
|
30
|
-
@handle_test_errors
|
31
|
-
def context() -> Generator[Any, None, None]:
|
32
|
-
class Context:
|
33
|
-
metadata: dict[str, Any]
|
34
|
-
council: str
|
35
|
-
parse_result: Any
|
36
47
|
|
48
|
+
class Context:
|
49
|
+
def __init__(self):
|
50
|
+
self.metadata: dict[str, Any] = {}
|
51
|
+
self.council: str = ""
|
52
|
+
self.parse_result: Any = None
|
53
|
+
|
54
|
+
|
55
|
+
@pytest.fixture(scope="module")
|
56
|
+
def context():
|
37
57
|
return Context()
|
38
58
|
|
59
|
+
|
39
60
|
@handle_test_errors
|
40
|
-
@given(parsers.parse("the council
|
41
|
-
def get_council_step(context
|
61
|
+
@given(parsers.parse("the council"))
|
62
|
+
def get_council_step(context, council) -> None:
|
42
63
|
council_input_data = file_handler.load_json_file("input.json")
|
43
|
-
context.metadata = council_input_data[
|
64
|
+
context.metadata = council_input_data[council]
|
65
|
+
context.council = council
|
66
|
+
|
44
67
|
|
45
68
|
@handle_test_errors
|
46
|
-
@when(parsers.parse("we scrape the data from
|
47
|
-
def scrape_step(
|
48
|
-
context
|
69
|
+
@when(parsers.parse("we scrape the data from the council"))
|
70
|
+
def scrape_step(
|
71
|
+
context: Any, headless_mode: str, local_browser: str, selenium_url: str
|
72
|
+
) -> None:
|
49
73
|
|
50
|
-
args = [council, context.metadata["url"]]
|
74
|
+
args = [context.council, context.metadata["url"]]
|
51
75
|
|
52
76
|
if "uprn" in context.metadata:
|
53
77
|
uprn = context.metadata["uprn"]
|
@@ -75,13 +99,17 @@ def scrape_step(context: Any, council: str, headless_mode: str, local_browser: s
|
|
75
99
|
CollectData.set_args(args)
|
76
100
|
context.parse_result = CollectData.run()
|
77
101
|
|
102
|
+
|
78
103
|
@handle_test_errors
|
79
104
|
@then("the result is valid json")
|
80
105
|
def validate_json_step(context: Any) -> None:
|
81
106
|
assert file_handler.validate_json(context.parse_result), "Invalid JSON output"
|
82
107
|
|
108
|
+
|
83
109
|
@handle_test_errors
|
84
110
|
@then("the output should validate against the schema")
|
85
111
|
def validate_output_step(context: Any) -> None:
|
86
112
|
council_schema = file_handler.load_json_file("output.schema")
|
87
|
-
assert file_handler.validate_json_schema(
|
113
|
+
assert file_handler.validate_json_schema(
|
114
|
+
context.parse_result, council_schema
|
115
|
+
), "Schema validation failed"
|
@@ -332,8 +332,10 @@ def test_contains_date_with_mixed_content():
|
|
332
332
|
|
333
333
|
|
334
334
|
def test_create_webdriver_local():
|
335
|
-
result = create_webdriver(
|
336
|
-
|
335
|
+
result = create_webdriver(
|
336
|
+
None, headless=True, user_agent="FireFox", session_name="test-session"
|
337
|
+
)
|
338
|
+
assert result.name in ["chrome", "chrome-headless-shell"]
|
337
339
|
|
338
340
|
|
339
341
|
def test_create_webdriver_remote_failure():
|
@@ -258,7 +258,10 @@ def contains_date(string, fuzzy=False) -> bool:
|
|
258
258
|
|
259
259
|
|
260
260
|
def create_webdriver(
|
261
|
-
web_driver: str = None,
|
261
|
+
web_driver: str = None,
|
262
|
+
headless: bool = True,
|
263
|
+
user_agent: str = None,
|
264
|
+
session_name: str = None,
|
262
265
|
) -> webdriver.Chrome:
|
263
266
|
"""
|
264
267
|
Create and return a Chrome WebDriver configured for optional headless operation.
|
@@ -0,0 +1,110 @@
|
|
1
|
+
import time
|
2
|
+
from datetime import datetime
|
3
|
+
|
4
|
+
from selenium.webdriver.support.ui import Select
|
5
|
+
from bs4 import BeautifulSoup
|
6
|
+
from selenium.webdriver.common.by import By
|
7
|
+
from selenium.webdriver.support import expected_conditions as EC
|
8
|
+
from selenium.webdriver.support.ui import Select
|
9
|
+
from selenium.webdriver.support.wait import WebDriverWait
|
10
|
+
from selenium.webdriver.common.keys import Keys
|
11
|
+
|
12
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
13
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
14
|
+
|
15
|
+
|
16
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
17
|
+
class CouncilClass(AbstractGetBinDataClass):
|
18
|
+
"""
|
19
|
+
Concrete classes have to implement all abstract operations of the
|
20
|
+
base class. They can also override some operations with a default
|
21
|
+
implementation.
|
22
|
+
"""
|
23
|
+
|
24
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
25
|
+
driver = None
|
26
|
+
try:
|
27
|
+
# Make a BS4 object
|
28
|
+
|
29
|
+
page = "https://www.chichester.gov.uk/checkyourbinday"
|
30
|
+
|
31
|
+
user_postcode = kwargs.get("postcode")
|
32
|
+
user_uprn = kwargs.get("uprn")
|
33
|
+
web_driver = kwargs.get("web_driver")
|
34
|
+
headless = kwargs.get("headless")
|
35
|
+
house_number = kwargs.get("paon")
|
36
|
+
|
37
|
+
driver = create_webdriver(web_driver, headless, None, __name__)
|
38
|
+
driver.get(page)
|
39
|
+
|
40
|
+
wait = WebDriverWait(driver, 60)
|
41
|
+
|
42
|
+
inputElement_postcodesearch = wait.until(
|
43
|
+
EC.visibility_of_element_located(
|
44
|
+
(By.ID, "WASTECOLLECTIONCALENDARV5_CALENDAR_ADDRESSLOOKUPPOSTCODE")
|
45
|
+
)
|
46
|
+
)
|
47
|
+
|
48
|
+
inputElement_postcodesearch.send_keys(user_postcode)
|
49
|
+
|
50
|
+
inputElement_postcodesearch_btn = wait.until(
|
51
|
+
EC.visibility_of_element_located(
|
52
|
+
(By.ID, "WASTECOLLECTIONCALENDARV5_CALENDAR_ADDRESSLOOKUPSEARCH")
|
53
|
+
)
|
54
|
+
)
|
55
|
+
inputElement_postcodesearch_btn.send_keys(Keys.ENTER)
|
56
|
+
|
57
|
+
inputElement_select_address = wait.until(
|
58
|
+
EC.element_to_be_clickable(
|
59
|
+
(By.ID, "WASTECOLLECTIONCALENDARV5_CALENDAR_ADDRESSLOOKUPADDRESS")
|
60
|
+
)
|
61
|
+
)
|
62
|
+
dropdown_element = driver.find_element(
|
63
|
+
By.ID, "WASTECOLLECTIONCALENDARV5_CALENDAR_ADDRESSLOOKUPADDRESS"
|
64
|
+
)
|
65
|
+
|
66
|
+
# Now create a Select object based on the found element
|
67
|
+
dropdown = Select(dropdown_element)
|
68
|
+
|
69
|
+
# Select the option by visible text
|
70
|
+
dropdown.select_by_visible_text(house_number)
|
71
|
+
|
72
|
+
results = wait.until(
|
73
|
+
EC.element_to_be_clickable(
|
74
|
+
(By.CLASS_NAME, "bin-collection-dates-container")
|
75
|
+
)
|
76
|
+
)
|
77
|
+
|
78
|
+
soup = BeautifulSoup(driver.page_source, features="html.parser")
|
79
|
+
soup.prettify()
|
80
|
+
|
81
|
+
# Extract data from the table
|
82
|
+
bin_collection_data = []
|
83
|
+
rows = soup.find(
|
84
|
+
"table", class_="defaultgeneral bin-collection-dates"
|
85
|
+
).find_all("tr")
|
86
|
+
for row in rows:
|
87
|
+
cells = row.find_all("td")
|
88
|
+
if cells:
|
89
|
+
date_str = cells[0].text.strip()
|
90
|
+
bin_type = cells[1].text.strip()
|
91
|
+
# Convert date string to the required format DD/MM/YYYY
|
92
|
+
date_obj = datetime.strptime(date_str, "%d %B %Y")
|
93
|
+
date_formatted = date_obj.strftime(date_format)
|
94
|
+
bin_collection_data.append(
|
95
|
+
{"collectionDate": date_formatted, "type": bin_type}
|
96
|
+
)
|
97
|
+
|
98
|
+
# Convert to JSON
|
99
|
+
json_data = {"bins": bin_collection_data}
|
100
|
+
|
101
|
+
except Exception as e:
|
102
|
+
# Here you can log the exception if needed
|
103
|
+
print(f"An error occurred: {e}")
|
104
|
+
# Optionally, re-raise the exception if you want it to propagate
|
105
|
+
raise
|
106
|
+
finally:
|
107
|
+
# This block ensures that the driver is closed regardless of an exception
|
108
|
+
if driver:
|
109
|
+
driver.quit()
|
110
|
+
return json_data
|
@@ -0,0 +1,44 @@
|
|
1
|
+
from bs4 import BeautifulSoup
|
2
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
3
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
4
|
+
|
5
|
+
|
6
|
+
# import the wonderful Beautiful Soup and the URL grabber
|
7
|
+
class CouncilClass(AbstractGetBinDataClass):
|
8
|
+
"""
|
9
|
+
Concrete classes have to implement all abstract operations of the
|
10
|
+
base class. They can also override some operations with a default
|
11
|
+
implementation.
|
12
|
+
"""
|
13
|
+
|
14
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
15
|
+
# Make a BS4 object
|
16
|
+
soup = BeautifulSoup(page.text, features="html.parser")
|
17
|
+
soup.prettify()
|
18
|
+
|
19
|
+
# Extract data
|
20
|
+
bin_data = {
|
21
|
+
"bins": []
|
22
|
+
}
|
23
|
+
|
24
|
+
# Find the table containing the bin collection data
|
25
|
+
table = soup.find('table', {'class': 'eb-EVDNdR1G-tableContent'})
|
26
|
+
|
27
|
+
if table:
|
28
|
+
rows = table.find_all('tr', class_='eb-EVDNdR1G-tableRow')
|
29
|
+
|
30
|
+
for row in rows:
|
31
|
+
columns = row.find_all('td')
|
32
|
+
if len(columns) >= 4:
|
33
|
+
collection_type = columns[1].get_text(strip=True)
|
34
|
+
collection_date = columns[3].get_text(strip=True)
|
35
|
+
|
36
|
+
# Validate collection_date format
|
37
|
+
if re.match(r"\d{2}/\d{2}/\d{4}", collection_date):
|
38
|
+
bin_entry = {
|
39
|
+
"type": collection_type,
|
40
|
+
"collectionDate": collection_date
|
41
|
+
}
|
42
|
+
bin_data["bins"].append(bin_entry)
|
43
|
+
|
44
|
+
return bin_data
|
@@ -4,9 +4,10 @@ import re
|
|
4
4
|
from uk_bin_collection.uk_bin_collection.common import * # Consider specific imports
|
5
5
|
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
6
6
|
|
7
|
+
|
7
8
|
class CouncilClass(AbstractGetBinDataClass):
|
8
9
|
def parse_data(self, page: str, **kwargs) -> dict:
|
9
|
-
soup = BeautifulSoup(page.text,
|
10
|
+
soup = BeautifulSoup(page.text, "html.parser")
|
10
11
|
|
11
12
|
bins_data = {"bins": []}
|
12
13
|
bin_collections = []
|
@@ -23,7 +24,9 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
23
24
|
if service_name and next_service:
|
24
25
|
bin_type = service_name.get_text().replace("Collection", "bin").strip()
|
25
26
|
date_span = next_service.find("span", {"class": "table-label"})
|
26
|
-
date_text =
|
27
|
+
date_text = (
|
28
|
+
date_span.next_sibling.get_text().strip() if date_span else None
|
29
|
+
)
|
27
30
|
|
28
31
|
if date_text and re.match(r"\d{2}/\d{2}/\d{4}", date_text):
|
29
32
|
try:
|
@@ -33,9 +36,11 @@ class CouncilClass(AbstractGetBinDataClass):
|
|
33
36
|
continue
|
34
37
|
|
35
38
|
for bin_type, bin_date in sorted(bin_collections, key=lambda x: x[1]):
|
36
|
-
bins_data["bins"].append(
|
37
|
-
|
38
|
-
|
39
|
-
|
39
|
+
bins_data["bins"].append(
|
40
|
+
{
|
41
|
+
"type": bin_type.capitalize(),
|
42
|
+
"collectionDate": bin_date.strftime("%d/%m/%Y"),
|
43
|
+
}
|
44
|
+
)
|
40
45
|
|
41
46
|
return bins_data
|
@@ -0,0 +1,51 @@
|
|
1
|
+
from bs4 import BeautifulSoup
|
2
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
3
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
4
|
+
from selenium import webdriver
|
5
|
+
from selenium.webdriver.common.keys import Keys
|
6
|
+
from selenium.webdriver.common.by import By
|
7
|
+
from selenium.webdriver.support.ui import WebDriverWait
|
8
|
+
from selenium.webdriver.support import expected_conditions as EC
|
9
|
+
from datetime import datetime
|
10
|
+
from uk_bin_collection.uk_bin_collection.common import date_format
|
11
|
+
|
12
|
+
|
13
|
+
class CouncilClass(AbstractGetBinDataClass):
|
14
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
15
|
+
postcode = kwargs.get("postcode", "")
|
16
|
+
web_driver = kwargs.get("web_driver")
|
17
|
+
headless = kwargs.get("headless")
|
18
|
+
|
19
|
+
options = webdriver.ChromeOptions()
|
20
|
+
if headless:
|
21
|
+
options.add_argument("--headless")
|
22
|
+
driver = create_webdriver(web_driver, headless)
|
23
|
+
|
24
|
+
try:
|
25
|
+
driver.get(
|
26
|
+
f"https://eppingforestdc.maps.arcgis.com/apps/instant/lookup/index.html?appid=bfca32b46e2a47cd9c0a84f2d8cdde17&find={postcode}"
|
27
|
+
)
|
28
|
+
wait = WebDriverWait(driver, 10)
|
29
|
+
WebDriverWait(driver, 10).until(
|
30
|
+
EC.visibility_of_element_located(
|
31
|
+
(By.CSS_SELECTOR, ".esri-feature-content")
|
32
|
+
)
|
33
|
+
)
|
34
|
+
html_content = driver.page_source
|
35
|
+
|
36
|
+
soup = BeautifulSoup(html_content, "html.parser")
|
37
|
+
bin_info_divs = soup.select(".esri-feature-content p")
|
38
|
+
data = {"bins": []}
|
39
|
+
for div in bin_info_divs:
|
40
|
+
if "collection day is" in div.text:
|
41
|
+
bin_type, date_str = div.text.split(" collection day is ")
|
42
|
+
bin_dates = datetime.strptime(
|
43
|
+
date_str.strip(), "%d/%m/%Y"
|
44
|
+
).strftime(date_format)
|
45
|
+
data["bins"].append(
|
46
|
+
{"type": bin_type.strip(), "collectionDate": bin_dates}
|
47
|
+
)
|
48
|
+
|
49
|
+
return data
|
50
|
+
finally:
|
51
|
+
driver.quit()
|
@@ -0,0 +1,68 @@
|
|
1
|
+
import json
|
2
|
+
|
3
|
+
import requests
|
4
|
+
from uk_bin_collection.uk_bin_collection.common import *
|
5
|
+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
|
6
|
+
|
7
|
+
|
8
|
+
class CouncilClass(AbstractGetBinDataClass):
|
9
|
+
"""
|
10
|
+
Concrete classes have to implement all abstract operations of the
|
11
|
+
base class. They can also override some operations with a default
|
12
|
+
implementation.
|
13
|
+
"""
|
14
|
+
|
15
|
+
def parse_data(self, page: str, **kwargs) -> dict:
|
16
|
+
user_postcode = kwargs.get("postcode")
|
17
|
+
check_postcode(user_postcode)
|
18
|
+
|
19
|
+
headers = {
|
20
|
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36",
|
21
|
+
}
|
22
|
+
params = {
|
23
|
+
"type": "JSON",
|
24
|
+
"list": "DomesticBinCollections",
|
25
|
+
"Road": "",
|
26
|
+
"Postcode": user_postcode,
|
27
|
+
}
|
28
|
+
|
29
|
+
response = requests.get(
|
30
|
+
"https://www.fareham.gov.uk/internetlookups/search_data.aspx",
|
31
|
+
params=params,
|
32
|
+
headers=headers,
|
33
|
+
)
|
34
|
+
|
35
|
+
bin_data = response.json()["data"]
|
36
|
+
data = {"bins": []}
|
37
|
+
|
38
|
+
if "rows" in bin_data:
|
39
|
+
collection_str = bin_data["rows"][0]["DomesticBinDay"]
|
40
|
+
|
41
|
+
results = re.findall(r"(\d\d?\/\d\d?\/\d{4}) \((\w*)\)", collection_str)
|
42
|
+
|
43
|
+
if results:
|
44
|
+
for result in results:
|
45
|
+
collection_date = datetime.strptime(result[0], "%d/%m/%Y")
|
46
|
+
dict_data = {
|
47
|
+
"type": result[1],
|
48
|
+
"collectionDate": collection_date.strftime(date_format),
|
49
|
+
}
|
50
|
+
data["bins"].append(dict_data)
|
51
|
+
|
52
|
+
# Garden waste is also collected on recycling day
|
53
|
+
if dict_data["type"] == "Recycling":
|
54
|
+
garden_data = {
|
55
|
+
"type": "Garden",
|
56
|
+
"collectionDate": dict_data["collectionDate"],
|
57
|
+
}
|
58
|
+
data["bins"].append(garden_data)
|
59
|
+
else:
|
60
|
+
raise RuntimeError("Dates not parsed correctly.")
|
61
|
+
else:
|
62
|
+
raise ValueError("Postcode not found on website.")
|
63
|
+
|
64
|
+
data["bins"].sort(
|
65
|
+
key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y")
|
66
|
+
)
|
67
|
+
|
68
|
+
return data
|