warn-scraper 1.2.114__py3-none-any.whl → 1.2.116__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- warn/scrapers/co.py +15 -2
- warn/scrapers/ok.py +2 -1
- {warn_scraper-1.2.114.dist-info → warn_scraper-1.2.116.dist-info}/METADATA +1 -1
- {warn_scraper-1.2.114.dist-info → warn_scraper-1.2.116.dist-info}/RECORD +8 -8
- {warn_scraper-1.2.114.dist-info → warn_scraper-1.2.116.dist-info}/WHEEL +0 -0
- {warn_scraper-1.2.114.dist-info → warn_scraper-1.2.116.dist-info}/entry_points.txt +0 -0
- {warn_scraper-1.2.114.dist-info → warn_scraper-1.2.116.dist-info}/licenses/LICENSE +0 -0
- {warn_scraper-1.2.114.dist-info → warn_scraper-1.2.116.dist-info}/top_level.txt +0 -0
warn/scrapers/co.py
CHANGED
@@ -263,6 +263,13 @@ def scrape(
|
|
263
263
|
for row in cleaned_data:
|
264
264
|
row_dict = {}
|
265
265
|
mangled = []
|
266
|
+
|
267
|
+
line = {}
|
268
|
+
for key in row:
|
269
|
+
if not key.startswith("blank_cell_"):
|
270
|
+
line[key] = row[key]
|
271
|
+
row = line
|
272
|
+
|
266
273
|
for key in row:
|
267
274
|
if (
|
268
275
|
key not in header_crosswalk and key not in header_garbage
|
@@ -321,13 +328,19 @@ def scrape_google_sheets(table, header_list=None):
|
|
321
328
|
# Parse the header row into a list,
|
322
329
|
# preserving its order in the sheet
|
323
330
|
header_list = []
|
331
|
+
blanks = 0
|
324
332
|
for cellindex, cell in enumerate(header_soup.find_all("td")):
|
325
333
|
cell_text = cell.text.strip()
|
326
|
-
#
|
334
|
+
# Handle empty header cells
|
327
335
|
if cell_text:
|
328
336
|
header_list.append(cell_text)
|
329
|
-
|
337
|
+
elif not cell_text and cellindex == 0:
|
330
338
|
header_list.append("Company Name")
|
339
|
+
elif not cell_text and cellindex > 0:
|
340
|
+
blanks += 1
|
341
|
+
cell_text = f"blank_cell_{blanks}"
|
342
|
+
logger.debug(f"Adding {cell_text}")
|
343
|
+
header_list.append(cell_text)
|
331
344
|
|
332
345
|
# Loop through all the data rows, which start
|
333
346
|
# after the header and the little bar
|
warn/scrapers/ok.py
CHANGED
@@ -93,10 +93,11 @@ def scrape(
|
|
93
93
|
"Launchpad__Layoff_Closure_Type__c": "closure_type",
|
94
94
|
"Launchpad__Notice_Date__c": "notice_date",
|
95
95
|
"OESC_Employer_City__c": "city",
|
96
|
-
"OESC_Employer_Name__c": "
|
96
|
+
"OESC_Employer_Name__c": "company_name",
|
97
97
|
"OESC_Employer_Zip_Code__c": "zip_code",
|
98
98
|
"RecordTypeId": "record_type_id",
|
99
99
|
"Select_Local_Workforce_Board__c": "workforce_board",
|
100
|
+
"jobs-not-in-this": "jobs",
|
100
101
|
}
|
101
102
|
|
102
103
|
masterlist = []
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: warn-scraper
|
3
|
-
Version: 1.2.
|
3
|
+
Version: 1.2.116
|
4
4
|
Summary: Command-line interface for downloading WARN Act notices of qualified plant closings and mass layoffs from state government websites
|
5
5
|
Home-page: https://github.com/biglocalnews/warn-scraper
|
6
6
|
Author: Big Local News
|
@@ -29,7 +29,7 @@ warn/scrapers/ak.py,sha256=h7BYMTV0whwWAPhbzVDVKMMoVCFphKly70aiTHabPq4,1847
|
|
29
29
|
warn/scrapers/al.py,sha256=D0rT9GQ0vwfkRuveVAt-Po-T6b2TI1EPGeLOBy2m3_M,2240
|
30
30
|
warn/scrapers/az.py,sha256=elGbue01Gjf_DQ66Wy9qqGIOJsiY-KIKJOVeft8pCXg,1447
|
31
31
|
warn/scrapers/ca.py,sha256=VQOfjHXPCc-jYwh-EPGVVfnzvXB7pdmCt2uJ6QnMPRM,8600
|
32
|
-
warn/scrapers/co.py,sha256=
|
32
|
+
warn/scrapers/co.py,sha256=fnd_dz4esjulFm1C27VtYyXMZoqtZkVl6gOWc3wNn6E,17914
|
33
33
|
warn/scrapers/ct.py,sha256=PKeZtlB0-z2wCmYmGl_WYoVo2gzwKV36upZcJVaJxjM,4852
|
34
34
|
warn/scrapers/dc.py,sha256=p1_c7O2R3O-41DmvcLVUIRhQKUewvZZKkzWkBxytN5M,5165
|
35
35
|
warn/scrapers/de.py,sha256=GyM92A-lFwZAfRxgbO-sIWhRfmBEKirzchaPIv-u0o4,1364
|
@@ -53,7 +53,7 @@ warn/scrapers/nj.py,sha256=nwbMbeQuUJbYRVoyUyKZBmNqvqsXu3Habt-10r8DvZE,2230
|
|
53
53
|
warn/scrapers/nm.py,sha256=HZpfLzn0LvLeRztYvqJ9n6FR5PYpyMndo8tzI8h9S2o,3581
|
54
54
|
warn/scrapers/ny.py,sha256=hXbxPhiK-Eyc9h_05wkAsfdVIT0vayKX4EE5aiJVdBc,2291
|
55
55
|
warn/scrapers/oh.py,sha256=2MEB_0AT37dsAsrhdl_Y0LUNHu0xGy4B1F7aSMhuUu0,3151
|
56
|
-
warn/scrapers/ok.py,sha256=
|
56
|
+
warn/scrapers/ok.py,sha256=ZZciyR1jPS4SzS2JSQwhJsDXP_VxA9UkEQvLpxzWzp4,7676
|
57
57
|
warn/scrapers/or.py,sha256=0PjyrW3CHdxtHhqEo3Ob-9B6YckACoBD3K0c4FPQUcg,5208
|
58
58
|
warn/scrapers/ri.py,sha256=EUyLy59eNiYHqiJR8C0YcJrZtp09KyVc45AFD0_Uc0U,4497
|
59
59
|
warn/scrapers/sc.py,sha256=p3kscSNSW9C8C5QaSUbCAo6XibgB7G2iH6zaMH7Mnsc,4819
|
@@ -65,9 +65,9 @@ warn/scrapers/va.py,sha256=7Nle7qL0VNPiE653XyaP9HQqSfuJFDRr2kEkjOqLvFM,11269
|
|
65
65
|
warn/scrapers/vt.py,sha256=d-bo4WK2hkrk4BhCCmLpEovcoZltlvdIUB6O0uaMx5A,1186
|
66
66
|
warn/scrapers/wa.py,sha256=UXdVtHZo_a-XfoiyOooTRfTb9W3PErSZdKca6SRORgs,4282
|
67
67
|
warn/scrapers/wi.py,sha256=ClEzXkwZbop0W4fkQgsb5oHAPUrb4luUPGV-jOKwkcg,4855
|
68
|
-
warn_scraper-1.2.
|
69
|
-
warn_scraper-1.2.
|
70
|
-
warn_scraper-1.2.
|
71
|
-
warn_scraper-1.2.
|
72
|
-
warn_scraper-1.2.
|
73
|
-
warn_scraper-1.2.
|
68
|
+
warn_scraper-1.2.116.dist-info/licenses/LICENSE,sha256=ZV-QHyqPwyMuwuj0lI05JeSjV1NyzVEk8Yeu7FPtYS0,585
|
69
|
+
warn_scraper-1.2.116.dist-info/METADATA,sha256=azCMcdV2gteQF6fCJKmmGV54nWi3877lo1OYYcVwBhw,2385
|
70
|
+
warn_scraper-1.2.116.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
|
71
|
+
warn_scraper-1.2.116.dist-info/entry_points.txt,sha256=poh_oSweObGlBSs1_2qZmnTodlOYD0KfO7-h7W2UQIw,47
|
72
|
+
warn_scraper-1.2.116.dist-info/top_level.txt,sha256=gOhHgNEkrUvajlzoKkVOo-TlQht9MoXnKOErjzqLGHo,11
|
73
|
+
warn_scraper-1.2.116.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|