uk_bin_collection 0.81.0__py3-none-any.whl → 0.82.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -283,10 +283,10 @@
283
283
  "wiki_name": "Doncaster Council"
284
284
  },
285
285
  "DorsetCouncil": {
286
- "url": "https://gi.dorsetcouncil.gov.uk/mapping/mylocal/viewresults/100040711049",
287
- "wiki_command_url_override": "https://gi.dorsetcouncil.gov.uk/mapping/mylocal/viewresults/XXXXXXXX",
288
- "wiki_name": "Dorset Council",
289
- "wiki_note": "Replace XXXXXXXX with UPRN."
286
+ "skip_get_url": true,
287
+ "url": "https://www.dorsetcouncil.gov.uk/",
288
+ "uprn": "100040711049",
289
+ "wiki_name": "Dorset Council"
290
290
  },
291
291
  "DoverDistrictCouncil": {
292
292
  "url": "https://collections.dover.gov.uk/property/100060908340",
@@ -573,12 +573,13 @@
573
573
  "wiki_note": "Pass the house name/number plus the name of the street with the postcode parameter, wrapped in double quotes. Check the address in the web site first. This version will only pick the first SHOW button returned by the search or if it is fully unique. The search is not very predictable (e.g. house number 4 returns 14,24,4,44 etc.)."
574
574
  },
575
575
  "MidSussexDistrictCouncil": {
576
- "house_number": "OAKLANDS",
576
+ "house_number": "OAKLANDS, OAKLANDS ROAD RH16 1SS",
577
577
  "postcode": "RH16 1SS",
578
578
  "skip_get_url": true,
579
579
  "url": "https://www.midsussex.gov.uk/waste-recycling/bin-collection/",
580
580
  "wiki_name": "Mid Sussex District Council",
581
- "wiki_note": "Pass the name of the street with the house number parameter, wrapped in double quotes"
581
+ "wiki_note": "Pass the name of the street with the house number parameter, wrapped in double quotes",
582
+ "web_driver": "http://selenium:4444"
582
583
  },
583
584
  "MiltonKeynesCityCouncil": {
584
585
  "uprn": "Fullers Slade",
@@ -1,13 +1,13 @@
1
+ import json
1
2
  import logging
2
3
  import traceback
3
- from typing import Any, Generator, Callable
4
- import json
5
-
6
- import pytest
7
- from pytest_bdd import scenario, given, when, then, parsers
8
4
  from functools import wraps
5
+ from typing import Any, Callable, Generator
9
6
 
7
+ import pytest
8
+ from pytest_bdd import given, parsers, scenario, then, when
10
9
  from step_helpers import file_handler
10
+
11
11
  from uk_bin_collection.uk_bin_collection import collect_data
12
12
 
13
13
  logging.basicConfig(level=logging.INFO, format="%(levelname)s - %(message)s")
@@ -11,8 +11,8 @@ from uk_bin_collection.uk_bin_collection.get_bin_data import (
11
11
  _LOGGER = logging.getLogger(__name__)
12
12
 
13
13
 
14
- # Dynamically importing the council processor
15
14
  def import_council_module(module_name, src_path="councils"):
15
+ """Dynamically import the council processor module."""
16
16
  module_path = os.path.realpath(os.path.join(os.path.dirname(__file__), src_path))
17
17
  if module_path not in sys.path:
18
18
  sys.path.append(module_path)
@@ -25,6 +25,7 @@ class UKBinCollectionApp:
25
25
  self.parsed_args = None
26
26
 
27
27
  def setup_arg_parser(self):
28
+ """Setup the argument parser for the script."""
28
29
  self.parser = argparse.ArgumentParser(
29
30
  description="UK Bin Collection Data Parser"
30
31
  )
@@ -38,8 +39,7 @@ class UKBinCollectionApp:
38
39
  "-p",
39
40
  "--postcode",
40
41
  type=str,
41
- help="Postcode to parse - should include a space and be wrapped in "
42
- "double-quotes",
42
+ help="Postcode to parse - should include a space and be wrapped in double quotes",
43
43
  required=False,
44
44
  )
45
45
  self.parser.add_argument(
@@ -58,6 +58,7 @@ class UKBinCollectionApp:
58
58
  self.parser.add_argument(
59
59
  "-w",
60
60
  "--web_driver",
61
+ type=str,
61
62
  help="URL for remote Selenium web driver - should be wrapped in double quotes",
62
63
  required=False,
63
64
  )
@@ -74,7 +75,13 @@ class UKBinCollectionApp:
74
75
  help="Should Selenium be headless. Defaults to true. Can be set to false to debug council",
75
76
  )
76
77
  self.parser.set_defaults(headless=True)
77
-
78
+ self.parser.add_argument(
79
+ "--local_browser",
80
+ dest="local_browser",
81
+ action="store_true",
82
+ help="Should Selenium be run on a remote server or locally. Defaults to false.",
83
+ required=False,
84
+ )
78
85
  self.parser.add_argument(
79
86
  "-d",
80
87
  "--dev_mode",
@@ -85,43 +92,37 @@ class UKBinCollectionApp:
85
92
  self.parsed_args = None
86
93
 
87
94
  def set_args(self, args):
95
+ """Parse the arguments from the command line."""
88
96
  self.parsed_args = self.parser.parse_args(args)
89
97
 
90
98
  def run(self):
91
- council_module_str = self.parsed_args.module
92
- council_module = import_council_module(council_module_str)
93
- address_url = self.parsed_args.URL
94
- postcode = self.parsed_args.postcode
95
- paon = self.parsed_args.number
96
- uprn = self.parsed_args.uprn
97
- skip_get_url = self.parsed_args.skip_get_url
98
- web_driver = self.parsed_args.web_driver
99
- headless = self.parsed_args.headless
100
- dev_mode = self.parsed_args.dev_mode
101
-
99
+ """Run the application with the provided arguments."""
100
+ council_module = import_council_module(self.parsed_args.module)
102
101
  return self.client_code(
103
102
  council_module.CouncilClass(),
104
- address_url,
105
- postcode=postcode,
106
- paon=paon,
107
- uprn=uprn,
108
- skip_get_url=skip_get_url,
109
- web_driver=web_driver,
110
- headless=headless,
111
- dev_mode=dev_mode,
112
- council_module_str=council_module_str,
103
+ self.parsed_args.URL,
104
+ postcode=self.parsed_args.postcode,
105
+ paon=self.parsed_args.number,
106
+ uprn=self.parsed_args.uprn,
107
+ skip_get_url=self.parsed_args.skip_get_url,
108
+ web_driver=self.parsed_args.web_driver,
109
+ headless=self.parsed_args.headless,
110
+ local_browser=self.parsed_args.local_browser,
111
+ dev_mode=self.parsed_args.dev_mode,
112
+ council_module_str=self.parsed_args.module,
113
113
  )
114
114
 
115
- def client_code(self, get_bin_data_class, address_url, **kwargs) -> None:
115
+ def client_code(self, get_bin_data_class, address_url, **kwargs):
116
116
  """
117
- The client code calls the template method to execute the algorithm. Client
118
- code does not have to know the concrete class of an object it works with,
119
- as long as it works with objects through the interface of their base class.
117
+ Call the template method to execute the algorithm. Client code does not need
118
+ to know the concrete class of an object it works with, as long as it works with
119
+ objects through the interface of their base class.
120
120
  """
121
121
  return get_bin_data_class.template_method(address_url, **kwargs)
122
122
 
123
123
 
124
124
  def run():
125
+ """Set up logging and run the application."""
125
126
  global _LOGGER
126
127
  _LOGGER = setup_logging(LOGGING_CONFIG, None)
127
128
  app = UKBinCollectionApp()
@@ -1,18 +1,19 @@
1
1
  import calendar
2
- import holidays
3
2
  import json
4
3
  import os
5
- import pandas as pd
6
4
  import re
7
- import requests
8
5
  from datetime import datetime, timedelta
9
- from dateutil.parser import parse
10
6
  from enum import Enum
7
+
8
+ import holidays
9
+ import pandas as pd
10
+ import requests
11
+ from dateutil.parser import parse
11
12
  from selenium import webdriver
12
- from selenium.webdriver.chrome.service import Service as ChromeService
13
- from webdriver_manager.chrome import ChromeDriverManager
14
13
  from selenium.common.exceptions import WebDriverException
14
+ from selenium.webdriver.chrome.service import Service as ChromeService
15
15
  from urllib3.exceptions import MaxRetryError
16
+ from webdriver_manager.chrome import ChromeDriverManager
16
17
 
17
18
  date_format = "%d/%m/%Y"
18
19
  days_of_week = {
@@ -14,38 +14,22 @@ class CouncilClass(AbstractGetBinDataClass):
14
14
  def parse_data(self, page: str, **kwargs) -> dict:
15
15
  data = {"bins": []}
16
16
  collections = []
17
-
18
- # Parse the page and find all the result boxes
19
- soup = BeautifulSoup(page.text, features="html.parser")
20
- soup.prettify()
21
- results = soup.find_all("li", {"class": "resultListItem"})
22
-
23
- # If the result box has a wanted string in, we can use it. Check the contents of each box and find the
24
- # desired text and dates
25
- for r in results:
26
- if "Your next" in r.text:
27
- if type(r.contents[10]) is element.NavigableString:
28
- bin_text = r.contents[10].text.split(" ")[2].title() + " bin"
29
- bin_date = datetime.strptime(
30
- remove_ordinal_indicator_from_date_string(
31
- r.contents[11].text.strip()
32
- ),
33
- "%A %d %B %Y",
34
- )
35
- else:
36
- bin_text = r.contents[11].text.split(" ")[2].title() + " bin"
37
- bin_date = datetime.strptime(
38
- remove_ordinal_indicator_from_date_string(
39
- r.contents[12].text.strip()
40
- ),
41
- "%A %d %B %Y",
42
- )
43
-
44
- if bin_date.date() >= datetime.now().date():
45
- collections.append((bin_text, bin_date))
46
-
47
- # Sort the text and date elements by date
48
- ordered_data = sorted(collections, key=lambda x: x[1])
17
+ url_base = "https://geoapi.dorsetcouncil.gov.uk/v1/services/"
18
+ url_types = ["recyclingday", "refuseday", "foodwasteday", "gardenwasteday"]
19
+
20
+ uprn = kwargs.get("uprn")
21
+ # Check the UPRN is valid
22
+ check_uprn(uprn)
23
+
24
+ for url_type in url_types:
25
+ response = requests.get(f"{url_base}{url_type}/{uprn}")
26
+ if response.status_code != 200:
27
+ raise ConnectionError(f"Could not fetch from {url_type} endpoint")
28
+ json_data = response.json()["values"][0]
29
+ collections.append((f"{json_data.get('type')} bin", datetime.strptime(json_data.get('dateNextVisit'), "%Y-%m-%d")))
30
+
31
+ # Sort the text and date elements by date
32
+ ordered_data = sorted(collections, key=lambda x: x[1])
49
33
 
50
34
  # Put the elements into the dictionary
51
35
  for item in ordered_data:
@@ -1,80 +1,168 @@
1
- import re
2
- from datetime import datetime
1
+ import logging
2
+ import time
3
3
 
4
- import requests
5
4
  from bs4 import BeautifulSoup
5
+ from selenium.webdriver.common.by import By
6
+ from selenium.webdriver.support import expected_conditions as EC
7
+ from selenium.webdriver.support.ui import Select
8
+ from selenium.webdriver.support.wait import WebDriverWait
9
+ from selenium.webdriver.common.keys import Keys
10
+
6
11
  from uk_bin_collection.uk_bin_collection.common import *
7
12
  from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
8
13
 
9
-
10
- def get_token(res) -> str:
11
- """
12
- Get a UFPRT code for the form data to be processed
13
- :param res:
14
- :return:
15
- """
16
- soup = BeautifulSoup(res, features="html.parser")
17
- soup.prettify()
18
- token = soup.find("input", {"name": "ufprt"}).get("value")
19
- return token
14
+ # Set up logging
15
+ logging.basicConfig(
16
+ level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
17
+ )
20
18
 
21
19
 
22
- # import the wonderful Beautiful Soup and the URL grabber
23
20
  class CouncilClass(AbstractGetBinDataClass):
24
21
  """
25
- Concrete classes have to implement all abstract operations of the
26
- base class. They can also override some operations with a default
27
- implementation.
22
+ Concrete class for Mid-Sussex District Council implementing AbstractGetBinDataClass.
28
23
  """
29
24
 
30
25
  def parse_data(self, page: str, **kwargs) -> dict:
31
- api_url = "https://www.midsussex.gov.uk/waste-recycling/bin-collection/"
32
- user_postcode = kwargs.get("postcode")
33
- user_paon = kwargs.get("paon")
34
- postcode_re = "^([A-Za-z][A-Ha-hJ-Yj-y]?[0-9][A-Za-z0-9]? ?[0-9][A-Za-z]{2}|[Gg][Ii][Rr] ?0[Aa]{2})$"
35
- user_full_addr = f"{user_paon} {user_postcode}"
36
-
37
- check_postcode(user_postcode)
38
- check_paon(user_paon)
39
-
40
- form_data = {
41
- "PostCodeStep.strAddressSearch": user_postcode,
42
- "AddressStep.strAddressSelect": user_full_addr,
43
- "Next": "true",
44
- "StepIndex": "1",
45
- }
46
-
47
- # Get a ufprt by posting here (I have no idea how ufprt works, so may as well grab one from the server)
48
- requests.packages.urllib3.disable_warnings()
49
- init = requests.post(api_url, data=form_data)
50
- ufprt = get_token(init.text)
51
- form_data.update({"ufprt": ufprt})
52
-
53
- response = requests.post(api_url, data=form_data)
54
-
55
- # Make a BS4 object
56
- soup = BeautifulSoup(response.text, features="html.parser")
57
- soup.prettify()
58
-
59
- data = {"bins": []}
60
-
61
- table_element = soup.find("table", {"class": "collDates"})
62
- table_rows = table_element.find_all_next("tr")
63
-
64
- row_index = 0
65
- for row in table_rows:
66
- if row_index < 1:
67
- row_index += 1
68
- continue
26
+ driver = None
27
+ try:
28
+ data = {"bins": []}
29
+ collections = []
30
+ user_paon = kwargs.get("paon")
31
+ user_postcode = kwargs.get("postcode")
32
+ web_driver = kwargs.get("web_driver")
33
+ headless = kwargs.get("headless")
34
+ check_postcode(user_postcode)
35
+
36
+ # Create Selenium webdriver
37
+ driver = create_webdriver(web_driver, headless, None, __name__)
38
+
39
+ driver.get("https://www.midsussex.gov.uk/waste-recycling/bin-collection/")
40
+ wait = WebDriverWait(driver, 60)
41
+
42
+ try:
43
+ logging.info("Cookies")
44
+ cookie_window = wait.until(
45
+ EC.presence_of_element_located(
46
+ (By.XPATH, '//div[@id="ccc-content"]')
47
+ )
48
+ )
49
+ time.sleep(2)
50
+ accept_cookies = WebDriverWait(driver, timeout=10).until(
51
+ EC.presence_of_element_located(
52
+ (By.XPATH, '//button[@id="ccc-recommended-settings"]')
53
+ )
54
+ )
55
+ accept_cookies.send_keys(Keys.ENTER)
56
+ accept_cookies.click()
57
+ accept_cookies_close = WebDriverWait(driver, timeout=10).until(
58
+ EC.presence_of_element_located(
59
+ (By.XPATH, '//button[@id="ccc-close"]')
60
+ )
61
+ )
62
+ accept_cookies_close.send_keys(Keys.ENTER)
63
+ accept_cookies_close.click()
64
+ except:
65
+ print(
66
+ "Accept cookies banner not found or clickable within the specified time."
67
+ )
68
+ pass
69
+
70
+ def click_element(by, value):
71
+ element = wait.until(EC.element_to_be_clickable((by, value)))
72
+ driver.execute_script("arguments[0].scrollIntoView();", element)
73
+ element.click()
74
+
75
+ logging.info("Entering postcode")
76
+ input_element_postcode = wait.until(
77
+ EC.presence_of_element_located(
78
+ (By.XPATH, '//input[@id="PostCodeStep_strAddressSearch"]')
79
+ )
80
+ )
81
+
82
+ input_element_postcode.send_keys(user_postcode)
83
+
84
+ logging.info("Entering postcode")
85
+
86
+ click_element(By.XPATH, "//button[contains(text(), 'Search')]")
87
+
88
+ logging.info("Selecting address")
89
+ dropdown = wait.until(
90
+ EC.element_to_be_clickable((By.ID, "StrAddressSelect"))
91
+ )
92
+
93
+ dropdown_options = wait.until(
94
+ EC.presence_of_element_located(
95
+ (By.XPATH, "//select[@id='StrAddressSelect']/option")
96
+ )
97
+ )
98
+ dropdownSelect = Select(dropdown)
99
+ dropdownSelect.select_by_visible_text(str(user_paon))
100
+
101
+ click_element(By.XPATH, "//button[contains(text(), 'Select')]")
102
+
103
+ logging.info("Waiting for bin schedule")
104
+ bin_results = wait.until(
105
+ EC.presence_of_element_located(
106
+ (By.XPATH, f"//strong[contains(text(), '{user_paon}')]")
107
+ )
108
+ )
109
+
110
+ # Make a BS4 object
111
+ soup = BeautifulSoup(driver.page_source, features="html.parser")
112
+
113
+ # Find the table with bin collection data
114
+ table = soup.find("table", class_="collDates")
115
+ if table:
116
+ rows = table.find_all("tr")[1:] # Skip the header row
69
117
  else:
70
- details = row.find_all_next("td")
71
- dict_data = {
72
- "type": details[1].get_text().replace("collection", "").strip(),
73
- "collectionDate": datetime.strptime(
74
- details[2].get_text(), "%A %d %B %Y"
75
- ).strftime(date_format),
76
- }
77
- data["bins"].append(dict_data)
78
- row_index += 1
79
-
80
- return data
118
+ rows = []
119
+
120
+ # Extract the data from the table and format it according to the JSON schema
121
+ bins = []
122
+ date_pattern = re.compile(r"(\d{2}) (\w+) (\d{4})")
123
+
124
+ for row in rows:
125
+ cols = row.find_all("td")
126
+ if len(cols) < 3:
127
+ print("Skipping row, not enough columns:", row)
128
+ continue # Skip rows that do not have enough columns
129
+
130
+ collection_type = cols[1].text.strip()
131
+ collection_date = cols[2].text.strip()
132
+
133
+ # Convert the collection date to the required format
134
+ date_match = date_pattern.search(collection_date)
135
+ if date_match:
136
+ day, month, year = date_match.groups()
137
+ month_number = {
138
+ "January": "01",
139
+ "February": "02",
140
+ "March": "03",
141
+ "April": "04",
142
+ "May": "05",
143
+ "June": "06",
144
+ "July": "07",
145
+ "August": "08",
146
+ "September": "09",
147
+ "October": "10",
148
+ "November": "11",
149
+ "December": "12",
150
+ }.get(month, "00")
151
+
152
+ formatted_date = f"{day}/{month_number}/{year}"
153
+ bins.append(
154
+ {"type": collection_type, "collectionDate": formatted_date}
155
+ )
156
+ else:
157
+ print("Date pattern not found in:", collection_date)
158
+
159
+ # Create the final JSON structure
160
+ bin_data = {"bins": bins}
161
+ return bin_data
162
+ except Exception as e:
163
+ logging.error(f"An error occurred: {e}")
164
+ raise
165
+
166
+ finally:
167
+ if driver:
168
+ driver.quit()
@@ -30,7 +30,7 @@ class CouncilClass(AbstractGetBinDataClass):
30
30
 
31
31
  # Create Selenium webdriver
32
32
  driver = create_webdriver(web_driver, headless, None, __name__)
33
- if headless:
33
+ if not headless:
34
34
  driver.set_window_size(1920, 1080)
35
35
 
36
36
  driver.get(
@@ -1,16 +1,15 @@
1
- """Module that contains an abstract class that can be imported to
2
- handle the data recieved from the provided council class.
1
+ """Get Bin Data
3
2
 
4
- Keyword arguments: None
3
+ Keyword arguments:
4
+ None
5
5
  """
6
6
 
7
7
  import json
8
- import logging
8
+ import logging, logging.config
9
9
  from abc import ABC, abstractmethod
10
- from logging.config import dictConfig
11
10
  import os
12
-
13
11
  import requests
12
+ import urllib3
14
13
 
15
14
  from uk_bin_collection.uk_bin_collection.common import update_input_json
16
15
 
@@ -27,6 +26,12 @@ LOGGING_CONFIG = dict(
27
26
 
28
27
 
29
28
  def setup_logging(logging_config, logger_name):
29
+ """Set up logging configuration.
30
+
31
+ Keyword arguments:
32
+ logging_config -- the logging configuration dictionary
33
+ logger_name -- the name of the logger
34
+ """
30
35
  try:
31
36
  logging.config.dictConfig(logging_config)
32
37
  logger = logging.getLogger(logger_name)
@@ -35,11 +40,8 @@ def setup_logging(logging_config, logger_name):
35
40
  raise exp
36
41
 
37
42
 
38
- # import the wonderful Beautiful Soup and the URL grabber
39
-
40
-
41
43
  class AbstractGetBinDataClass(ABC):
42
- """An abstract class that can be imported to handle the data recieved from the provided
44
+ """An abstract class that can be imported to handle the data received from the provided
43
45
  council class.
44
46
 
45
47
  Keyword arguments: None
@@ -52,63 +54,58 @@ class AbstractGetBinDataClass(ABC):
52
54
  address_url -- the url to get the data from
53
55
  """
54
56
  this_url = address_url
55
- this_postcode = kwargs.get("postcode", None)
56
- this_paon = kwargs.get("paon", None)
57
- this_uprn = kwargs.get("uprn", None)
58
- this_usrn = kwargs.get("usrn", None)
59
- this_web_driver = kwargs.get("web_driver", None)
60
- this_headless = kwargs.get("headless", None)
61
- skip_get_url = kwargs.get("skip_get_url", None)
62
- dev_mode = kwargs.get("dev_mode", False)
63
- council_module_str = kwargs.get("council_module_str", None)
64
- if (
65
- not skip_get_url or skip_get_url is False
66
- ): # we will not use the generic way to get data - needs a get data in the council class itself
67
- page = self.get_data(address_url)
68
- bin_data_dict = self.parse_data(
69
- page,
70
- postcode=this_postcode,
71
- paon=this_paon,
72
- uprn=this_uprn,
73
- usrn=this_usrn,
74
- web_driver=this_web_driver,
75
- headless=this_headless,
76
- url=this_url,
77
- )
78
- json_output = self.output_json(bin_data_dict)
79
- else:
80
- bin_data_dict = self.parse_data(
81
- "",
82
- postcode=this_postcode,
83
- paon=this_paon,
84
- uprn=this_uprn,
85
- usrn=this_usrn,
86
- web_driver=this_web_driver,
87
- headless=this_headless,
88
- url=this_url,
89
- )
90
- json_output = self.output_json(bin_data_dict)
57
+ this_local_browser = kwargs.get("local_browser", False)
58
+ if not this_local_browser:
59
+ kwargs["web_driver"] = kwargs.get("web_driver", None)
60
+
61
+ bin_data_dict = self.get_and_parse_data(this_url, **kwargs)
62
+ json_output = self.output_json(bin_data_dict)
91
63
 
92
64
  # if dev mode create/update council's entry in the input.json
93
- if dev_mode is not None and dev_mode is True:
94
- cwd = os.getcwd()
95
- input_file_path = os.path.join(
96
- cwd, "uk_bin_collection", "tests", "input.json"
97
- )
98
- update_input_json(
99
- council_module_str,
100
- this_url,
101
- input_file_path,
102
- postcode=this_postcode,
103
- paon=this_paon,
104
- uprn=this_uprn,
105
- usrn=this_usrn,
106
- web_driver=this_web_driver,
107
- skip_get_url=skip_get_url,
65
+ if kwargs.get("dev_mode"):
66
+ self.update_dev_mode_data(
67
+ council_module_str=kwargs.get("council_module_str"),
68
+ this_url=this_url,
69
+ **kwargs,
108
70
  )
109
71
 
110
72
  return json_output
111
73
 
74
+ def get_and_parse_data(self, address_url, **kwargs):
75
+ """Get and parse data from the URL
76
+
77
+ Keyword arguments:
78
+ address_url -- the URL to get the data from
79
+ """
80
+ if not kwargs.get("skip_get_url"):
81
+ page = self.get_data(address_url)
82
+ bin_data_dict = self.parse_data(page, url=address_url, **kwargs)
83
+ else:
84
+ bin_data_dict = self.parse_data("", url=address_url, **kwargs)
85
+
86
+ return bin_data_dict
87
+
88
+ def update_dev_mode_data(self, council_module_str, this_url, **kwargs):
89
+ """Update input.json if in development mode
90
+
91
+ Keyword arguments:
92
+ council_module_str -- the council module string
93
+ this_url -- the URL used
94
+ """
95
+ cwd = os.getcwd()
96
+ input_file_path = os.path.join(cwd, "uk_bin_collection", "tests", "input.json")
97
+ update_input_json(
98
+ council_module_str,
99
+ this_url,
100
+ input_file_path,
101
+ postcode=kwargs.get("postcode"),
102
+ paon=kwargs.get("paon"),
103
+ uprn=kwargs.get("uprn"),
104
+ usrn=kwargs.get("usrn"),
105
+ web_driver=kwargs.get("web_driver"),
106
+ skip_get_url=kwargs.get("skip_get_url"),
107
+ )
108
+
112
109
  @classmethod
113
110
  def get_data(cls, url) -> str:
114
111
  """This method makes the request to the council
@@ -116,29 +113,18 @@ class AbstractGetBinDataClass(ABC):
116
113
  Keyword arguments:
117
114
  url -- the url to get the data from
118
115
  """
119
- # Set a user agent so we look like a browser ;-)
120
116
  user_agent = (
121
117
  "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
122
118
  "Chrome/108.0.0.0 Safari/537.36"
123
119
  )
124
120
  headers = {"User-Agent": user_agent}
125
- requests.packages.urllib3.disable_warnings()
121
+ urllib3.disable_warnings(category=urllib3.exceptions.InsecureRequestWarning)
126
122
 
127
- # Make the Request - change the URL - find out your property number
128
123
  try:
129
- full_page = requests.get(url, headers, verify=False)
124
+ full_page = requests.get(url, headers, verify=False, timeout=120)
130
125
  return full_page
131
- except requests.exceptions.HTTPError as errh:
132
- _LOGGER.error(f"Http Error: {errh}")
133
- raise
134
- except requests.exceptions.ConnectionError as errc:
135
- _LOGGER.error(f"Error Connecting: {errc}")
136
- raise
137
- except requests.exceptions.Timeout as errt:
138
- _LOGGER.error(f"Timeout Error: {errt}")
139
- raise
140
126
  except requests.exceptions.RequestException as err:
141
- _LOGGER.error(f"Oops: Something Else {err}")
127
+ _LOGGER.error(f"Request Error: {err}")
142
128
  raise
143
129
 
144
130
  @abstractmethod
@@ -154,12 +140,7 @@ class AbstractGetBinDataClass(ABC):
154
140
  """Method to output the json as a pretty printed string
155
141
 
156
142
  Keyword arguments:
157
- bin_data_dict -- a dict parsed data
143
+ bin_data_dict -- a dict of parsed data
158
144
  """
159
- # Form a JSON wrapper
160
- # Make the JSON
161
-
162
145
  json_data = json.dumps(bin_data_dict, sort_keys=False, indent=4)
163
-
164
- # Output the data
165
146
  return json_data
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: uk_bin_collection
3
- Version: 0.81.0
3
+ Version: 0.82.1
4
4
  Summary: Python Lib to collect UK Bin Data
5
5
  Author: Robert Bradley
6
6
  Author-email: robbrad182@gmail.com
@@ -126,6 +126,124 @@ Install can be done via
126
126
  ## UPRN Finder
127
127
  Some councils make use of the UPRN (Unique property reference number) to identify your property. You can find yours [here](https://www.findmyaddress.co.uk/search) or [here](https://uprn.uk/).
128
128
 
129
+ ---
130
+ ## Selenium
131
+ Some councils need Selenium to run the scrape on behalf of Home Assistant. The easiest way to do this is run Selenium as in a Docker container. However you do this the Home Assistant server must be able to reach the Selenium server
132
+
133
+ ### Instructions for Windows, Linux, and Mac
134
+
135
+ #### Step 1: Install Docker
136
+
137
+ ##### Windows
138
+
139
+ 1. **Download Docker Desktop for Windows:**
140
+
141
+ * Go to the Docker website: Docker Desktop for Windows
142
+ * Download and install Docker Desktop.
143
+ 2. **Run Docker Desktop:**
144
+
145
+ * After installation, run Docker Desktop.
146
+ * Follow the on-screen instructions to complete the setup.
147
+ * Ensure Docker is running by checking the Docker icon in the system tray.
148
+
149
+ ##### Linux
150
+
151
+ 1. **Install Docker:**
152
+
153
+ * Open a terminal and run the following commands:
154
+
155
+ ```bash
156
+ sudo apt-get update
157
+ sudo apt-get install \
158
+ apt-transport-https \
159
+ ca-certificates \
160
+ curl \
161
+ gnupg \
162
+ lsb-release
163
+ curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg
164
+ echo \
165
+ "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu \
166
+ $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
167
+ sudo apt-get update
168
+ sudo apt-get install docker-ce docker-ce-cli containerd.io
169
+ ```
170
+
171
+ 2. **Start Docker:**
172
+
173
+ * Run the following command to start Docker:
174
+
175
+ ```bash
176
+ sudo systemctl start docker
177
+ ```
178
+
179
+ 3. **Enable Docker to start on boot:**
180
+
181
+ bash
182
+
183
+ Copy code
184
+
185
+ ```bash
186
+ sudo systemctl enable docker
187
+ ```
188
+
189
+
190
+ ##### Mac
191
+
192
+ 1. **Download Docker Desktop for Mac:**
193
+
194
+ * Go to the Docker website: Docker Desktop for Mac
195
+ * Download and install Docker Desktop.
196
+ 2. **Run Docker Desktop:**
197
+
198
+ * After installation, run Docker Desktop.
199
+ * Follow the on-screen instructions to complete the setup.
200
+ * Ensure Docker is running by checking the Docker icon in the menu bar.
201
+
202
+ #### Step 2: Pull and Run Selenium Standalone Chrome Docker Image
203
+
204
+ 1. **Open a terminal or command prompt:**
205
+
206
+ 2. **Pull the Selenium Standalone Chrome image:**
207
+
208
+ ```bash
209
+ docker pull selenium/standalone-chrome
210
+ ```
211
+
212
+ 4. **Run the Selenium Standalone Chrome container:**
213
+
214
+ ```bash
215
+ docker run -d -p 4444:4444 --name selenium-chrome selenium/standalone-chrome
216
+ ```
217
+
218
+
219
+ #### Step 3: Test the Selenium Server
220
+
221
+ 1. **Navigate to the Selenium server URL in your web browser:**
222
+ * Open a web browser and go to `http://localhost:4444`
223
+ * You should see the Selenium Grid console.
224
+
225
+ #### Step 4: Supply the Selenium Server URL to UKBinCollectionData
226
+
227
+ 1. **Find the `UKBinCollectionData` project:**
228
+
229
+ * Go to the GitHub repository: [UKBinCollectionData](https://github.com/robbrad/UKBinCollectionData)
230
+ 2. **Supply the Selenium Server URL:**
231
+
232
+ * Typically, the URL will be `http://localhost:4444/wd/hub`
233
+ * You might need to update a configuration file or environment variable in the project to use this URL. Check the project's documentation for specific instructions.
234
+
235
+ ### Summary of Commands
236
+
237
+ **Windows/Linux/Mac:**
238
+
239
+ ```bash
240
+ docker pull selenium/standalone-chrome docker run -d -p 4444:4444 --name selenium-chrome selenium/standalone-chrome
241
+ ```
242
+
243
+ **Selenium Server URL:**
244
+
245
+ * `http://localhost:4444/wd/hub`
246
+
129
247
  ---
130
248
 
131
249
  ## Requesting your council
@@ -2,14 +2,14 @@ uk_bin_collection/README.rst,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,
2
2
  uk_bin_collection/tests/council_feature_input_parity.py,sha256=DO6Mk4ImYgM5ZCZ-cutwz5RoYYWZRLYx2tr6zIs_9Rc,3843
3
3
  uk_bin_collection/tests/features/environment.py,sha256=VQZjJdJI_kZn08M0j5cUgvKT4k3iTw8icJge1DGOkoA,127
4
4
  uk_bin_collection/tests/features/validate_council_outputs.feature,sha256=SJK-Vc737hrf03tssxxbeg_JIvAH-ddB8f6gU1LTbuQ,251
5
- uk_bin_collection/tests/input.json,sha256=tsLMMV6SegrudMLCU-F99uuUl3-RGY-dsG39rjeijhA,59231
5
+ uk_bin_collection/tests/input.json,sha256=R_hD1z-Wyc5VMuuQpBay7xLoo-ALHNSZX4OrLxOp_VQ,59163
6
6
  uk_bin_collection/tests/output.schema,sha256=ZwKQBwYyTDEM4G2hJwfLUVM-5v1vKRvRK9W9SS1sd18,1086
7
7
  uk_bin_collection/tests/step_defs/step_helpers/file_handler.py,sha256=Ygzi4V0S1MIHqbdstUlIqtRIwnynvhu4UtpweJ6-5N8,1474
8
- uk_bin_collection/tests/step_defs/test_validate_council.py,sha256=zDLF5UUBp2n5S3ZlJod7AmYHt92TYQMMhHbX1keNn8U,3424
8
+ uk_bin_collection/tests/step_defs/test_validate_council.py,sha256=LrOSt_loA1Mw3vTqaO2LpaDMu7rYJy6k5Kr-EOBln7s,3424
9
9
  uk_bin_collection/tests/test_collect_data.py,sha256=C5w1qfz_1CU37_T7UPdBk3nisMLobIeVHNl6EXrgfrQ,3316
10
10
  uk_bin_collection/tests/test_common_functions.py,sha256=IvTNmuFKm9u5j2A9PumMB6eScQS165sPknFakc0l-eg,11190
11
- uk_bin_collection/uk_bin_collection/collect_data.py,sha256=txdtYoPgWDHLPohGz_Wu9OLXOr8TpXyuIGn0WnsFbZQ,4383
12
- uk_bin_collection/uk_bin_collection/common.py,sha256=ZvylERlKexeHtqBRb2_fZhYxC8aoEpUtcGo0dzJnWUo,9837
11
+ uk_bin_collection/uk_bin_collection/collect_data.py,sha256=dB7wWXsJX4fm5bIf84lexkvHIcO54CZ3JPxqmS-60YY,4654
12
+ uk_bin_collection/uk_bin_collection/common.py,sha256=1dNfE6hlDhgnLhIdAq426Pji_F3tiO8V7aEAq1bFfpQ,9838
13
13
  uk_bin_collection/uk_bin_collection/councils/AdurAndWorthingCouncils.py,sha256=ppbrmm-MzB1wOulK--CU_0j4P-djNf3ozMhHnmQFqLo,1511
14
14
  uk_bin_collection/uk_bin_collection/councils/ArunCouncil.py,sha256=yfhthv9nuogP19VOZ3TYQrq51qqjiCZcSel4sXhiKjs,4012
15
15
  uk_bin_collection/uk_bin_collection/councils/AylesburyValeCouncil.py,sha256=LouqjspEMt1TkOGqWHs2zkxwOETIy3n7p64uKIlAgUg,2401
@@ -48,7 +48,7 @@ uk_bin_collection/uk_bin_collection/councils/CroydonCouncil.py,sha256=QJH27plySb
48
48
  uk_bin_collection/uk_bin_collection/councils/DartfordBoroughCouncil.py,sha256=SPirUUoweMwX5Txtsr0ocdcFtKxCQ9LhzTTJN20tM4w,1550
49
49
  uk_bin_collection/uk_bin_collection/councils/DerbyshireDalesDistrictCouncil.py,sha256=MQC1-jXezXczrxTcvPQvkpGgyyAbzSKlX38WsmftHak,4007
50
50
  uk_bin_collection/uk_bin_collection/councils/DoncasterCouncil.py,sha256=b7pxoToXu6dBBYXsXmlwfPXE8BjHxt0hjCOBNlNgvX8,3118
51
- uk_bin_collection/uk_bin_collection/councils/DorsetCouncil.py,sha256=sdSR5ryOSWHtK_0OKJ75LGOvMiUAnQCc3uwT3xfqZ_0,2304
51
+ uk_bin_collection/uk_bin_collection/councils/DorsetCouncil.py,sha256=zfXQJpywfEQvBOhv5uPSkHdTrAg114yXIuTYDjHSXsA,1629
52
52
  uk_bin_collection/uk_bin_collection/councils/DoverDistrictCouncil.py,sha256=3Zgap6kaVpDXtRfBKEL1Ms6eb0iFIipYKNtOq3Hrdd4,1891
53
53
  uk_bin_collection/uk_bin_collection/councils/DurhamCouncil.py,sha256=6O8bNsQVYQbrCYQE9Rp0c_rtkcXuxR3s9J6jn4MK4_s,1695
54
54
  uk_bin_collection/uk_bin_collection/councils/EastCambridgeshireCouncil.py,sha256=aYUVE5QqTxdj8FHhCB4EiFVDJahWJD9Pq0d1upBEvXg,1501
@@ -89,7 +89,7 @@ uk_bin_collection/uk_bin_collection/councils/ManchesterCityCouncil.py,sha256=RFX
89
89
  uk_bin_collection/uk_bin_collection/councils/MansfieldDistrictCouncil.py,sha256=F5AiTxImrnjE1k3ry96bfstOf5XSNBJS_4qqmymmh3w,1386
90
90
  uk_bin_collection/uk_bin_collection/councils/MertonCouncil.py,sha256=3Y2Un4xXo1sCcMsudynODSzocV_mMofWkX2JqONDb5o,1997
91
91
  uk_bin_collection/uk_bin_collection/councils/MidAndEastAntrimBoroughCouncil.py,sha256=oOWwU5FSgGej2Mv7FQ66N-EzS5nZgmGsd0WnfLWUc1I,5238
92
- uk_bin_collection/uk_bin_collection/councils/MidSussexDistrictCouncil.py,sha256=TUKo2GIY-S2m_RDOQoWdLeBWqTbAjW8tovx1luTtJfE,2705
92
+ uk_bin_collection/uk_bin_collection/councils/MidSussexDistrictCouncil.py,sha256=AZgC9wmDLEjUOtIFvf0ehF5LHturXTH4DkE3ioPSVBA,6254
93
93
  uk_bin_collection/uk_bin_collection/councils/MiltonKeynesCityCouncil.py,sha256=3olsWa77L34vz-c7NgeGK9xmNuR4Ws_oAk5D4UpIkPw,2005
94
94
  uk_bin_collection/uk_bin_collection/councils/MoleValleyDistrictCouncil.py,sha256=54-autRRGAM4pBxlqmUE6g825rmUF-gRqrcgHL_lkIk,3994
95
95
  uk_bin_collection/uk_bin_collection/councils/NeathPortTalbotCouncil.py,sha256=2DJ0gK385CWTdJzWyDdmNBTTrwX6F5YbIXg7di2oXDQ,5506
@@ -153,7 +153,7 @@ uk_bin_collection/uk_bin_collection/councils/TandridgeDistrictCouncil.py,sha256=
153
153
  uk_bin_collection/uk_bin_collection/councils/TelfordAndWrekinCouncil.py,sha256=p1ZS5R4EGxbEWlRBrkGXgKwE_lkyBT-R60yKFFhVObc,1844
154
154
  uk_bin_collection/uk_bin_collection/councils/TendringDistrictCouncil.py,sha256=DJbYI8m6lIISDrK5h8V5Jo-9kGG7kr9dz7GD8St4nc8,4274
155
155
  uk_bin_collection/uk_bin_collection/councils/TestValleyBoroughCouncil.py,sha256=Dtfkyrwt795W7gqFJxVGRR8t3R5WMNQZwTWJckLpZWE,8480
156
- uk_bin_collection/uk_bin_collection/councils/ThreeRiversDistrictCouncil.py,sha256=oZSW0Y5QHdqVy2fLys4sdB13tHgeNLha9WCUqG_R-gU,5527
156
+ uk_bin_collection/uk_bin_collection/councils/ThreeRiversDistrictCouncil.py,sha256=RHt3e9oeKzwxjjY-M8aC0nk-ZXhHIoyC81JzxkPVxsE,5531
157
157
  uk_bin_collection/uk_bin_collection/councils/TonbridgeAndMallingBC.py,sha256=UlgnHDoi8ecav2H5-HqKNDpqW1J3RN-c___5c08_Q7I,4859
158
158
  uk_bin_collection/uk_bin_collection/councils/TorbayCouncil.py,sha256=JW_BS7wkfxFsmx6taQtPAQWdBp1AfLrxs0XRQ2XZcSw,2029
159
159
  uk_bin_collection/uk_bin_collection/councils/TorridgeDistrictCouncil.py,sha256=6gOO02pYU0cbj3LAHiBVNG4zkFMyIGbkE2jAye3KcGM,6386
@@ -180,9 +180,9 @@ uk_bin_collection/uk_bin_collection/councils/WyreCouncil.py,sha256=zDDa7n4K_zm5P
180
180
  uk_bin_collection/uk_bin_collection/councils/YorkCouncil.py,sha256=I2kBYMlsD4bIdsvmoSzBjJAvTTi6yPfJa8xjJx1ys2w,1490
181
181
  uk_bin_collection/uk_bin_collection/councils/council_class_template/councilclasstemplate.py,sha256=4s9ODGPAwPqwXc8SrTX5Wlfmizs3_58iXUtHc4Ir86o,1162
182
182
  uk_bin_collection/uk_bin_collection/create_new_council.py,sha256=m-IhmWmeWQlFsTZC4OxuFvtw5ZtB8EAJHxJTH4O59lQ,1536
183
- uk_bin_collection/uk_bin_collection/get_bin_data.py,sha256=9qppF2oPkhmOoK8-ZkRIU1M6vhBh-yUCWAZEEd07iLk,5414
184
- uk_bin_collection-0.81.0.dist-info/LICENSE,sha256=vABBUOzcrgfaTKpzeo-si9YVEun6juDkndqA8RKdKGs,1071
185
- uk_bin_collection-0.81.0.dist-info/METADATA,sha256=FY2xzwnZMQ0FiEomvCzrJHbqIVc1RuPvo9dPwPxexR4,12594
186
- uk_bin_collection-0.81.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
187
- uk_bin_collection-0.81.0.dist-info/entry_points.txt,sha256=36WCSGMWSc916S3Hi1ZkazzDKHaJ6CD-4fCEFm5MIao,90
188
- uk_bin_collection-0.81.0.dist-info/RECORD,,
183
+ uk_bin_collection/uk_bin_collection/get_bin_data.py,sha256=YvmHfZqanwrJ8ToGch34x-L-7yPe31nB_x77_Mgl_vo,4545
184
+ uk_bin_collection-0.82.1.dist-info/LICENSE,sha256=vABBUOzcrgfaTKpzeo-si9YVEun6juDkndqA8RKdKGs,1071
185
+ uk_bin_collection-0.82.1.dist-info/METADATA,sha256=eF66OGftcUZmlsUAk4B5mGDaJrfHErD-tINXzxW6Ta8,16231
186
+ uk_bin_collection-0.82.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
187
+ uk_bin_collection-0.82.1.dist-info/entry_points.txt,sha256=36WCSGMWSc916S3Hi1ZkazzDKHaJ6CD-4fCEFm5MIao,90
188
+ uk_bin_collection-0.82.1.dist-info/RECORD,,