warn-scraper 1.2.113__py3-none-any.whl → 1.2.115__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -112,7 +112,7 @@ class Site:
112
112
  logger.debug("Fetching from cache")
113
113
  return self.cache.fetch(url, params)
114
114
  else:
115
- logger.debug("Pulling from the web")
115
+ logger.debug(f"Pulling from the web: {url} with params {params}")
116
116
  response = requests.get(url, params=params, verify=self.verify)
117
117
  logger.debug(f"Response code: {response.status_code}")
118
118
  html = response.text
warn/scrapers/ok.py CHANGED
@@ -1,16 +1,19 @@
1
+ import logging
1
2
  from pathlib import Path
2
3
 
3
- from warn.platforms.job_center.utils import scrape_state
4
+ import requests
4
5
 
5
6
  from .. import utils
6
7
 
7
- __authors__ = ["zstumgoren", "Dilcia19"]
8
- __tags__ = ["jobcenter"]
8
+ __authors__ = ["zstumgoren", "Dilcia19", "stucka"]
9
+ __tags__ = [""]
9
10
  __source__ = {
10
11
  "name": "Oklahoma Office of Workforces Development",
11
- "url": "https://okjobmatch.com/search/warn_lookups/new",
12
+ "url": "https://www.employoklahoma.gov/Participants/s/warnnotices",
12
13
  }
13
14
 
15
+ logger = logging.getLogger(__name__)
16
+
14
17
 
15
18
  def scrape(
16
19
  data_dir: Path = utils.WARN_DATA_DIR,
@@ -28,13 +31,86 @@ def scrape(
28
31
  Returns: the Path where the file is written
29
32
  """
30
33
  output_csv = data_dir / "ok.csv"
31
- search_url = "https://okjobmatch.com/search/warn_lookups"
32
- # Date chosen based on manual research
33
- stop_year = 1999
34
- # Use cache for years before current and prior year
35
- scrape_state(
36
- "OK", search_url, output_csv, stop_year, cache_dir, use_cache=use_cache
37
- )
34
+ # search_url = "https://okjobmatch.com/search/warn_lookups"
35
+ # search_url = "https://www.employoklahoma.gov/Participants/s/warnnotices"
36
+ posturl = "https://www.employoklahoma.gov/Participants/s/sfsites/aura?r=2&aura.ApexAction.execute=6"
37
+
38
+ # There are a bunch of hard-coded values in here that seem to work for at least a day.
39
+ # Undetermined:
40
+ # -- Will this continue working in the short- or medium-term?
41
+ # -- What is the signficance of each variable?
42
+ # -- How do we refresh these?
43
+
44
+ headers = {
45
+ "User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:142.0) Gecko/20100101 Firefox/142.0",
46
+ "Accept": "*/*",
47
+ "Accept-Language": "en-US,en;q=0.5",
48
+ "Accept-Encoding": "gzip, deflate, br, zstd",
49
+ "Referer": "https://www.employoklahoma.gov/Participants/s/warnnotices",
50
+ "X-SFDC-LDS-Endpoints": "ApexActionController.execute:ConfigurableLoginAndMaintenanceMessages.hasDocument, ApexActionController.execute:ConfigurableLoginAndMaintenanceMessages.checkJobExpiry, ApexActionController.execute:ConfigurableLoginAndMaintenanceMessages.checkResumeExpiry, ApexActionController.execute:ConfigurableLoginAndMaintenanceMessages.checkUIRegistered, ApexActionController.execute:ConfigurableLoginAndMaintenanceMessages.getLoginMaintenanceMessage, ApexActionController.execute:OESC_JS_getWARNLayoffNotices.getListofLayoffAccService",
51
+ "X-SFDC-Page-Scope-Id": "9c659a19-8020-41b0-a81c-36335e22801a",
52
+ "X-SFDC-Request-Id": "16140000007a08bd2f",
53
+ "X-SFDC-Page-Cache": "9439898463d86806",
54
+ "Content-Type": "application/x-www-form-urlencoded;charset=UTF-8",
55
+ "X-B3-TraceId": "856a2236ba7d283e",
56
+ "X-B3-SpanId": "b79b2da3a7dc4544",
57
+ "X-B3-Sampled": "0",
58
+ "Origin": "https://www.employoklahoma.gov",
59
+ "Connection": "keep-alive",
60
+ "Cookie": "renderCtx=%7B%22pageId%22%3A%223823bba2-3b00-4db7-aca6-5ca0eb67fc63%22%2C%22schema%22%3A%22Published%22%2C%22viewType%22%3A%22Published%22%2C%22brandingSetId%22%3A%22fa0b6362-0214-44b9-947d-2543eaab22c7%22%2C%22audienceIds%22%3A%22%22%7D; CookieConsentPolicy=0:1; LSKey-c$CookieConsentPolicy=0:1; pctrk=f3070d0c-7078-4062-96bb-de9e82cbb1db",
61
+ "Sec-Fetch-Dest": "empty",
62
+ "Sec-Fetch-Mode": "cors",
63
+ "Sec-Fetch-Site": "same-origin",
64
+ }
65
+
66
+ payload = "message=%7B%22actions%22%3A%5B%7B%22id%22%3A%22156%3Ba%22%2C%22descriptor%22%3A%22aura%3A%2F%2FApexActionController%2FACTION%24execute%22%2C%22callingDescriptor%22%3A%22UNKNOWN%22%2C%22params%22%3A%7B%22namespace%22%3A%22%22%2C%22classname%22%3A%22ConfigurableLoginAndMaintenanceMessages%22%2C%22method%22%3A%22hasDocument%22%2C%22params%22%3A%7B%7D%2C%22cacheable%22%3Afalse%2C%22isContinuation%22%3Afalse%7D%7D%2C%7B%22id%22%3A%22157%3Ba%22%2C%22descriptor%22%3A%22aura%3A%2F%2FApexActionController%2FACTION%24execute%22%2C%22callingDescriptor%22%3A%22UNKNOWN%22%2C%22params%22%3A%7B%22namespace%22%3A%22%22%2C%22classname%22%3A%22ConfigurableLoginAndMaintenanceMessages%22%2C%22method%22%3A%22checkJobExpiry%22%2C%22params%22%3A%7B%7D%2C%22cacheable%22%3Afalse%2C%22isContinuation%22%3Afalse%7D%7D%2C%7B%22id%22%3A%22158%3Ba%22%2C%22descriptor%22%3A%22aura%3A%2F%2FApexActionController%2FACTION%24execute%22%2C%22callingDescriptor%22%3A%22UNKNOWN%22%2C%22params%22%3A%7B%22namespace%22%3A%22%22%2C%22classname%22%3A%22ConfigurableLoginAndMaintenanceMessages%22%2C%22method%22%3A%22checkResumeExpiry%22%2C%22params%22%3A%7B%7D%2C%22cacheable%22%3Afalse%2C%22isContinuation%22%3Afalse%7D%7D%2C%7B%22id%22%3A%22159%3Ba%22%2C%22descriptor%22%3A%22aura%3A%2F%2FApexActionController%2FACTION%24execute%22%2C%22callingDescriptor%22%3A%22UNKNOWN%22%2C%22params%22%3A%7B%22namespace%22%3A%22%22%2C%22classname%22%3A%22ConfigurableLoginAndMaintenanceMessages%22%2C%22method%22%3A%22checkUIRegistered%22%2C%22params%22%3A%7B%7D%2C%22cacheable%22%3Afalse%2C%22isContinuation%22%3Afalse%7D%7D%2C%7B%22id%22%3A%22160%3Ba%22%2C%22descriptor%22%3A%22aura%3A%2F%2FApexActionController%2FACTION%24execute%22%2C%22callingDescriptor%22%3A%22UNKNOWN%22%2C%22params%22%3A%7B%22namespace%22%3A%22%22%2C%22classname%22%3A%22ConfigurableLoginAndMaintenanceMessages%22%2C%22method%22%3A%22getLoginMaintenanceMessage%22%2C%22params%22%3A%7B%22displayTo%22%3A%22Job%20Seekers%22%2C%22messageType%22%3A%22Portal%20Login%20Messages%22%7D%2C%22cacheable%22%3Afalse%2C%22isContinuation%22%3Afalse%7D%7D%2C%7B%22id%22%3A%22161%3Ba%22%2C%22descriptor%22%3A%22aura%3A%2F%2FApexActionController%2FACTION%24execute%22%2C%22callingDescriptor%22%3A%22UNKNOWN%22%2C%22params%22%3A%7B%22namespace%22%3A%22%22%2C%22classname%22%3A%22OESC_JS_getWARNLayoffNotices%22%2C%22method%22%3A%22getListofLayoffAccService%22%2C%22cacheable%22%3Afalse%2C%22isContinuation%22%3Afalse%7D%7D%5D%7D&aura.context=%7B%22mode%22%3A%22PROD%22%2C%22fwuid%22%3A%22eE5UbjZPdVlRT3M0d0xtOXc5MzVOQWg5TGxiTHU3MEQ5RnBMM0VzVXc1cmcxMi42MjkxNDU2LjE2Nzc3MjE2%22%2C%22app%22%3A%22siteforce%3AcommunityApp%22%2C%22loaded%22%3A%7B%22APPLICATION%40markup%3A%2F%2Fsiteforce%3AcommunityApp%22%3A%221305_7pTC6grCTP7M16KdvDQ-Xw%22%7D%2C%22dn%22%3A%5B%5D%2C%22globals%22%3A%7B%7D%2C%22uad%22%3Atrue%7D&aura.pageURI=%2FParticipants%2Fs%2Fwarnnotices&aura.token=null"
67
+
68
+ logger.debug(f"Attempting to send hard-coded data to {posturl}")
69
+ r = requests.post(posturl, headers=headers, data=payload)
70
+ rawdata = r.json()
71
+
72
+ for entry in rawdata["actions"]:
73
+ if (
74
+ entry["id"] == "161;a"
75
+ ): # What is this value? Will this change? Also no idea.
76
+ cleanerdata = entry["returnValue"]["returnValue"]
77
+ """
78
+ fields = set()
79
+ for entry in cleanerdata:
80
+ for field in entry:
81
+ fields.add(field)
82
+ {'Id',
83
+ 'Launchpad__Layoff_Closure_Type__c',
84
+ 'Launchpad__Notice_Date__c',
85
+ 'OESC_Employer_City__c',
86
+ 'OESC_Employer_Name__c',
87
+ 'OESC_Employer_Zip_Code__c',
88
+ 'RecordTypeId',
89
+ 'Select_Local_Workforce_Board__c'}
90
+ """
91
+ fields = {
92
+ "Id": "id",
93
+ "Launchpad__Layoff_Closure_Type__c": "closure_type",
94
+ "Launchpad__Notice_Date__c": "notice_date",
95
+ "OESC_Employer_City__c": "city",
96
+ "OESC_Employer_Name__c": "company_name",
97
+ "OESC_Employer_Zip_Code__c": "zip_code",
98
+ "RecordTypeId": "record_type_id",
99
+ "Select_Local_Workforce_Board__c": "workforce_board",
100
+ "jobs-not-in-this": "jobs",
101
+ }
102
+
103
+ masterlist = []
104
+ for entry in cleanerdata:
105
+ line = {}
106
+ for item in fields:
107
+ if item in entry:
108
+ line[fields[item]] = entry[item]
109
+ else:
110
+ line[fields[item]] = None
111
+ masterlist.append(line)
112
+
113
+ utils.write_dict_rows_to_csv(output_csv, list(fields.values()), masterlist)
38
114
  return output_csv
39
115
 
40
116
 
warn/utils.py CHANGED
@@ -209,12 +209,12 @@ def write_rows_to_csv(output_path: Path, rows: list, mode="w"):
209
209
 
210
210
 
211
211
  def write_dict_rows_to_csv(output_path, headers, rows, mode="w", extrasaction="raise"):
212
- """Write the provided dictionary to the provided path as comma-separated values.
212
+ """Write the provided list of dictionaries to the provided path as comma-separated values.
213
213
 
214
214
  Args:
215
215
  output_path (Path): the Path were the result will be saved
216
216
  headers (list): a list of the headers for the output file
217
- rows (list): the dict to be saved
217
+ rows (list): the list of dictionaries to be saved
218
218
  mode (str): the mode to be used when opening the file (default 'w')
219
219
  extrasaction (str): what to do if the if a field isn't in the headers (default 'raise')
220
220
  """
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: warn-scraper
3
- Version: 1.2.113
3
+ Version: 1.2.115
4
4
  Summary: Command-line interface for downloading WARN Act notices of qualified plant closings and mass layoffs from state government websites
5
5
  Home-page: https://github.com/biglocalnews/warn-scraper
6
6
  Author: Big Local News
@@ -17,11 +17,11 @@ warn/__init__.py,sha256=A07JFY1TyaPtVIndBa7IvTk13DETqIkLgRdk0A-MCoE,85
17
17
  warn/cache.py,sha256=hyta04_G-ALGwcKl4xNc7EgHS_xklyVD5d8SXNrJekY,5520
18
18
  warn/cli.py,sha256=ZqyJwICdHFkn2hEgbArj_upbElR9-TSDlYDqyEGeexE,2019
19
19
  warn/runner.py,sha256=oeGRybGwpnkQKlPzRMlKxhsDt1GN4PZoX-vUwrsPgos,1894
20
- warn/utils.py,sha256=GOZE2koct8JlZkHFahPuTt1w8DLRyqeh58DRqGq6TTg,10382
20
+ warn/utils.py,sha256=Jd1pIVtfUXxDweKa_6vHTNX13E47Ms7FHSw110unDHk,10408
21
21
  warn/platforms/__init__.py,sha256=wIZRDf4tbTuC8oKM4ZrTAtwNgbtMQGzPXMwDYCFyrog,81
22
22
  warn/platforms/job_center/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
23
  warn/platforms/job_center/cache.py,sha256=yhA3sE46lNFg8vEewSoRYVByi0YSlkBiKm7qoSUiTdM,1868
24
- warn/platforms/job_center/site.py,sha256=Voo2PG2YC_kbAa6RUoBiqVBEoOCzIZ8imGko45KBWuY,10207
24
+ warn/platforms/job_center/site.py,sha256=J_J6WYrfrdP9AyOuRJ8Myg_Gh5aB3-lxxp_-PBuB_A4,10236
25
25
  warn/platforms/job_center/urls.py,sha256=IWhpuzN_xcNdHh23GbZPGvuHCsMcmb03qx3pRn1Gy-k,414
26
26
  warn/platforms/job_center/utils.py,sha256=HdUKgKirmpPP7e4Cu_ZyB3zPVS_p-_ylo-lXFhxK2QM,5696
27
27
  warn/scrapers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -53,7 +53,7 @@ warn/scrapers/nj.py,sha256=nwbMbeQuUJbYRVoyUyKZBmNqvqsXu3Habt-10r8DvZE,2230
53
53
  warn/scrapers/nm.py,sha256=HZpfLzn0LvLeRztYvqJ9n6FR5PYpyMndo8tzI8h9S2o,3581
54
54
  warn/scrapers/ny.py,sha256=hXbxPhiK-Eyc9h_05wkAsfdVIT0vayKX4EE5aiJVdBc,2291
55
55
  warn/scrapers/oh.py,sha256=2MEB_0AT37dsAsrhdl_Y0LUNHu0xGy4B1F7aSMhuUu0,3151
56
- warn/scrapers/ok.py,sha256=qJE49VY6dMhbokFB9IAOL2XyuYSJpEKKxITPO9sUHS4,1197
56
+ warn/scrapers/ok.py,sha256=ZZciyR1jPS4SzS2JSQwhJsDXP_VxA9UkEQvLpxzWzp4,7676
57
57
  warn/scrapers/or.py,sha256=0PjyrW3CHdxtHhqEo3Ob-9B6YckACoBD3K0c4FPQUcg,5208
58
58
  warn/scrapers/ri.py,sha256=EUyLy59eNiYHqiJR8C0YcJrZtp09KyVc45AFD0_Uc0U,4497
59
59
  warn/scrapers/sc.py,sha256=p3kscSNSW9C8C5QaSUbCAo6XibgB7G2iH6zaMH7Mnsc,4819
@@ -65,9 +65,9 @@ warn/scrapers/va.py,sha256=7Nle7qL0VNPiE653XyaP9HQqSfuJFDRr2kEkjOqLvFM,11269
65
65
  warn/scrapers/vt.py,sha256=d-bo4WK2hkrk4BhCCmLpEovcoZltlvdIUB6O0uaMx5A,1186
66
66
  warn/scrapers/wa.py,sha256=UXdVtHZo_a-XfoiyOooTRfTb9W3PErSZdKca6SRORgs,4282
67
67
  warn/scrapers/wi.py,sha256=ClEzXkwZbop0W4fkQgsb5oHAPUrb4luUPGV-jOKwkcg,4855
68
- warn_scraper-1.2.113.dist-info/licenses/LICENSE,sha256=ZV-QHyqPwyMuwuj0lI05JeSjV1NyzVEk8Yeu7FPtYS0,585
69
- warn_scraper-1.2.113.dist-info/METADATA,sha256=EZuXeaW5PRvvefKsYRGscdPoLjr61_5Bz8WKVTmwtm4,2385
70
- warn_scraper-1.2.113.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
71
- warn_scraper-1.2.113.dist-info/entry_points.txt,sha256=poh_oSweObGlBSs1_2qZmnTodlOYD0KfO7-h7W2UQIw,47
72
- warn_scraper-1.2.113.dist-info/top_level.txt,sha256=gOhHgNEkrUvajlzoKkVOo-TlQht9MoXnKOErjzqLGHo,11
73
- warn_scraper-1.2.113.dist-info/RECORD,,
68
+ warn_scraper-1.2.115.dist-info/licenses/LICENSE,sha256=ZV-QHyqPwyMuwuj0lI05JeSjV1NyzVEk8Yeu7FPtYS0,585
69
+ warn_scraper-1.2.115.dist-info/METADATA,sha256=1kh60cERBlPu4DfdWFgIFchc15NTGb3V4z4tnigCrVc,2385
70
+ warn_scraper-1.2.115.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
71
+ warn_scraper-1.2.115.dist-info/entry_points.txt,sha256=poh_oSweObGlBSs1_2qZmnTodlOYD0KfO7-h7W2UQIw,47
72
+ warn_scraper-1.2.115.dist-info/top_level.txt,sha256=gOhHgNEkrUvajlzoKkVOo-TlQht9MoXnKOErjzqLGHo,11
73
+ warn_scraper-1.2.115.dist-info/RECORD,,