ugrc-sweeper 2.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ugrc-sweeper-2.0.1/LICENSE +9 -0
- ugrc-sweeper-2.0.1/MANIFEST.in +3 -0
- ugrc-sweeper-2.0.1/PKG-INFO +160 -0
- ugrc-sweeper-2.0.1/pyproject.toml +10 -0
- ugrc-sweeper-2.0.1/setup.cfg +4 -0
- ugrc-sweeper-2.0.1/setup.py +65 -0
- ugrc-sweeper-2.0.1/src/sweeper/__init__.py +0 -0
- ugrc-sweeper-2.0.1/src/sweeper/__main__.py +207 -0
- ugrc-sweeper-2.0.1/src/sweeper/address_parser.py +246 -0
- ugrc-sweeper-2.0.1/src/sweeper/backup.py +54 -0
- ugrc-sweeper-2.0.1/src/sweeper/config.py +36 -0
- ugrc-sweeper-2.0.1/src/sweeper/report.py +181 -0
- ugrc-sweeper-2.0.1/src/sweeper/street_types.json +321 -0
- ugrc-sweeper-2.0.1/src/sweeper/sweepers/UseLimitations.html +10 -0
- ugrc-sweeper-2.0.1/src/sweeper/sweepers/__init__.py +0 -0
- ugrc-sweeper-2.0.1/src/sweeper/sweepers/addresses.py +71 -0
- ugrc-sweeper-2.0.1/src/sweeper/sweepers/base.py +17 -0
- ugrc-sweeper-2.0.1/src/sweeper/sweepers/duplicates.py +128 -0
- ugrc-sweeper-2.0.1/src/sweeper/sweepers/empties.py +66 -0
- ugrc-sweeper-2.0.1/src/sweeper/sweepers/invalids.py +19 -0
- ugrc-sweeper-2.0.1/src/sweeper/sweepers/metadata.py +291 -0
- ugrc-sweeper-2.0.1/src/sweeper/utilities.py +27 -0
- ugrc-sweeper-2.0.1/src/sweeper/workspace_info.py +123 -0
- ugrc-sweeper-2.0.1/src/ugrc_sweeper.egg-info/PKG-INFO +160 -0
- ugrc-sweeper-2.0.1/src/ugrc_sweeper.egg-info/SOURCES.txt +28 -0
- ugrc-sweeper-2.0.1/src/ugrc_sweeper.egg-info/dependency_links.txt +1 -0
- ugrc-sweeper-2.0.1/src/ugrc_sweeper.egg-info/entry_points.txt +3 -0
- ugrc-sweeper-2.0.1/src/ugrc_sweeper.egg-info/not-zip-safe +1 -0
- ugrc-sweeper-2.0.1/src/ugrc_sweeper.egg-info/requires.txt +14 -0
- ugrc-sweeper-2.0.1/src/ugrc_sweeper.egg-info/top_level.txt +1 -0
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) UGRC contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
|
6
|
+
|
|
7
|
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
|
8
|
+
|
|
9
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: ugrc-sweeper
|
|
3
|
+
Version: 2.0.1
|
|
4
|
+
Summary: CLI tool for making good data
|
|
5
|
+
Home-page: https://github.com/agrc/sweeper
|
|
6
|
+
Author: UGRC
|
|
7
|
+
Author-email: ugrc-developers@utah.gov
|
|
8
|
+
License: MIT
|
|
9
|
+
Platform: UNKNOWN
|
|
10
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
+
Classifier: Operating System :: Unix
|
|
14
|
+
Classifier: Operating System :: POSIX
|
|
15
|
+
Classifier: Operating System :: Microsoft :: Windows
|
|
16
|
+
Classifier: Programming Language :: Python
|
|
17
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
18
|
+
Classifier: Topic :: Utilities
|
|
19
|
+
Requires-Python: >=3
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
Provides-Extra: tests
|
|
22
|
+
License-File: LICENSE
|
|
23
|
+
|
|
24
|
+
# ugrc-sweeper [](https://badge.fury.io/py/ugrc-sweeper)[](https://github.com/agrc/sweeper/actions/workflows/push.yml)
|
|
25
|
+
|
|
26
|
+
The data cleaning service.
|
|
27
|
+
|
|
28
|
+

|
|
29
|
+
|
|
30
|
+
## Available Sweepers
|
|
31
|
+
|
|
32
|
+
### Addresses
|
|
33
|
+
|
|
34
|
+
Checks that addresses have minimum required parts and optionally normalizes them.
|
|
35
|
+
|
|
36
|
+
### Duplicates
|
|
37
|
+
|
|
38
|
+
Checks for duplicate features.
|
|
39
|
+
|
|
40
|
+
### Empties
|
|
41
|
+
|
|
42
|
+
Checks for empty geometries.
|
|
43
|
+
|
|
44
|
+
### Metadata
|
|
45
|
+
|
|
46
|
+
Checks to make sure that the metadata meets [the Basic SGID Metadata Requirements](https://gis.utah.gov/about/policy/metadata/#basic-sgid-metadata).
|
|
47
|
+
|
|
48
|
+
#### Tags
|
|
49
|
+
|
|
50
|
+
Checks to make sure that existing tags are cased appropriately. This mean that the are title-cased other than known abbreviations (e.g. UGRC, BLM) and articles (e.g. a, the, of).
|
|
51
|
+
|
|
52
|
+
This check also verifies that the data set contains a tag that matches the database name (e.g. `SGID`) and the schema (e.g. `Cadastre`).
|
|
53
|
+
|
|
54
|
+
`--try-fix` adds missing required tags and title-cases any existing tags.
|
|
55
|
+
|
|
56
|
+
#### Summary
|
|
57
|
+
|
|
58
|
+
Checks to make sure that the summary is less than 2048 characters (a limitation of AGOL) and that it is shorter than the description.
|
|
59
|
+
|
|
60
|
+
#### Description
|
|
61
|
+
|
|
62
|
+
Checks to make sure that the description contains a link to a data page on gis.utah.gov.
|
|
63
|
+
|
|
64
|
+
#### Use Limitations
|
|
65
|
+
|
|
66
|
+
Checks to make sure that the text in this section matches the [official text for UGRC](src/sweeper/sweepers/UseLimitations.html).
|
|
67
|
+
|
|
68
|
+
`--try-fix` updates the text to match the official text.
|
|
69
|
+
|
|
70
|
+
## Parsing Addresses
|
|
71
|
+
|
|
72
|
+
This project contains a module that can be used as a standalone address parser, `sweeper.address_parser`. This allows developer to take advantage of sweepers advanced address parsing and normalization without having to run the entire sweeper process.
|
|
73
|
+
|
|
74
|
+
### Usage Example
|
|
75
|
+
|
|
76
|
+
```python
|
|
77
|
+
from sweeper.address_parser import Address
|
|
78
|
+
|
|
79
|
+
address = Address('123 South Main Street')
|
|
80
|
+
print(address)
|
|
81
|
+
|
|
82
|
+
'''
|
|
83
|
+
--> Parsed Address:
|
|
84
|
+
{'address_number': '123',
|
|
85
|
+
'normalized': '123 S MAIN ST',
|
|
86
|
+
'prefix_direction': 'S',
|
|
87
|
+
'street_name': 'MAIN',
|
|
88
|
+
'street_type': 'ST'}
|
|
89
|
+
'''
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
### Available Address class properties
|
|
93
|
+
|
|
94
|
+
All properties default to None if there is no parsed value.
|
|
95
|
+
|
|
96
|
+
`address_number`
|
|
97
|
+
|
|
98
|
+
`address_number_suffix`
|
|
99
|
+
|
|
100
|
+
`prefix_direction`
|
|
101
|
+
|
|
102
|
+
`street_name`
|
|
103
|
+
|
|
104
|
+
`street_direction`
|
|
105
|
+
|
|
106
|
+
`street_type`
|
|
107
|
+
|
|
108
|
+
`unit_type`
|
|
109
|
+
|
|
110
|
+
`unit_id`
|
|
111
|
+
If no `unit_type` is found, this property is prefixed with `#` (e.g. `# 3`). If `unit_type` is found, `#` is stripped from this property.
|
|
112
|
+
|
|
113
|
+
`city`
|
|
114
|
+
|
|
115
|
+
`zip_code`
|
|
116
|
+
|
|
117
|
+
`po_box`
|
|
118
|
+
The PO Box if a po-box-type address was entered (e.g. `po_box` would be `1` for `p.o. box 1`).
|
|
119
|
+
|
|
120
|
+
`normalized`
|
|
121
|
+
A normalized string representing the entire address that was passed into the constructor. PO Boxes are normalized in this format `PO BOX <number>`.
|
|
122
|
+
|
|
123
|
+
## Installation (requires Pro 2.7+)
|
|
124
|
+
|
|
125
|
+
<!-- Current conda install arcpy -c esri seems to be wonky; just clone to be safe -->
|
|
126
|
+
|
|
127
|
+
1. clone arcgis conda environment
|
|
128
|
+
- `conda create --name sweeper --clone arcgispro-py3`
|
|
129
|
+
1. activate environment
|
|
130
|
+
- `activate sweeper`
|
|
131
|
+
1. install sweeper
|
|
132
|
+
- `pip install ugrc-sweeper`
|
|
133
|
+
1. Optionally duplicate `config.sample.json` as `config.json` in the folder where you will run sweeper.
|
|
134
|
+
|
|
135
|
+
> [!CAUTION]
|
|
136
|
+
> This is required for the following functions:
|
|
137
|
+
>
|
|
138
|
+
> - `--scheduled` argument (required for sending emails)
|
|
139
|
+
> - `--change-detect` argument
|
|
140
|
+
> - using user-specific connection files via the `CONNECTIONS_FOLDER` config value
|
|
141
|
+
|
|
142
|
+
## Exclusions
|
|
143
|
+
|
|
144
|
+
Tables can be skipped by adding values to the `EXCLUSIONS.<sweeper_key>` config array. These values are matched against table names using [fnmatch](https://docs.python.org/3/library/fnmatch.html#fnmatch.fnmatch). Note that these do not apply when using the `--table-name` argument.
|
|
145
|
+
|
|
146
|
+
## Development
|
|
147
|
+
|
|
148
|
+
1. clone arcgis conda environment
|
|
149
|
+
- `conda create --name sweeper --clone arcgispro-py3`
|
|
150
|
+
1. activate environment
|
|
151
|
+
- `activate sweeper`
|
|
152
|
+
1. install required dependencies to work on sweeper
|
|
153
|
+
- `pip install -e ".[tests]"`
|
|
154
|
+
1. `test_metadata.py` uses a SQL database that needs to be restored via `src/sweeper/tests/data/Sweeper.bak` to your local SQL Server.
|
|
155
|
+
1. run sweeper: `sweeper`
|
|
156
|
+
1. test: `pytest`
|
|
157
|
+
1. lint: `ruff check .`
|
|
158
|
+
1. format: `ruff format .`
|
|
159
|
+
|
|
160
|
+
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
[tool.ruff]
|
|
2
|
+
line-length = 120
|
|
3
|
+
[tool.ruff.lint]
|
|
4
|
+
ignore = ["E501"]
|
|
5
|
+
[tool.pytest.ini_options]
|
|
6
|
+
minversion = "6.0"
|
|
7
|
+
testpaths = ["tests", "src"]
|
|
8
|
+
norecursedirs = [".env", "data", "maps", ".github", ".vscode"]
|
|
9
|
+
console_output_style = "count"
|
|
10
|
+
addopts = "--cov-branch --cov=sweeper --cov-report term --cov-report xml:cov.xml --instafail -p no:faulthandler"
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# -*- encoding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
setup.py
|
|
5
|
+
A module that installs sweeper as a module
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import glob
|
|
9
|
+
from os.path import basename, splitext
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
from setuptools import find_packages, setup
|
|
13
|
+
|
|
14
|
+
setup(
|
|
15
|
+
name="ugrc-sweeper",
|
|
16
|
+
version="2.0.1",
|
|
17
|
+
license="MIT",
|
|
18
|
+
description="CLI tool for making good data",
|
|
19
|
+
long_description=(Path(__file__).parent / "readme.md").read_text(),
|
|
20
|
+
long_description_content_type="text/markdown",
|
|
21
|
+
author="UGRC",
|
|
22
|
+
author_email="ugrc-developers@utah.gov",
|
|
23
|
+
url="https://github.com/agrc/sweeper",
|
|
24
|
+
packages=find_packages("src"),
|
|
25
|
+
package_dir={"": "src"},
|
|
26
|
+
py_modules=[splitext(basename(i))[0] for i in glob.glob("src/*.py")],
|
|
27
|
+
python_requires=">=3",
|
|
28
|
+
include_package_data=True,
|
|
29
|
+
zip_safe=False,
|
|
30
|
+
classifiers=[
|
|
31
|
+
"Development Status :: 5 - Production/Stable",
|
|
32
|
+
"Intended Audience :: Developers",
|
|
33
|
+
"License :: OSI Approved :: MIT License",
|
|
34
|
+
"Operating System :: Unix",
|
|
35
|
+
"Operating System :: POSIX",
|
|
36
|
+
"Operating System :: Microsoft :: Windows",
|
|
37
|
+
"Programming Language :: Python",
|
|
38
|
+
"Programming Language :: Python :: 3 :: Only",
|
|
39
|
+
"Topic :: Utilities",
|
|
40
|
+
],
|
|
41
|
+
keywords=[],
|
|
42
|
+
install_requires=[
|
|
43
|
+
"agrc-supervisor==3.*",
|
|
44
|
+
"agrc-usaddress==0.*",
|
|
45
|
+
"beautifulsoup4==4.*",
|
|
46
|
+
"docopt==0.*",
|
|
47
|
+
"html5lib==1.*",
|
|
48
|
+
"xxhash==3.*",
|
|
49
|
+
],
|
|
50
|
+
dependency_links=[],
|
|
51
|
+
extras_require={
|
|
52
|
+
"tests": [
|
|
53
|
+
"pytest-cov==5.*",
|
|
54
|
+
"pytest-instafail==0.5.*",
|
|
55
|
+
"pytest-mock==3.*",
|
|
56
|
+
"pytest-watch==4.*",
|
|
57
|
+
"pytest==8.*",
|
|
58
|
+
"ruff==0.*",
|
|
59
|
+
],
|
|
60
|
+
},
|
|
61
|
+
setup_requires=[
|
|
62
|
+
"pytest-runner",
|
|
63
|
+
],
|
|
64
|
+
entry_points={"console_scripts": ["sweeper = sweeper.__main__:main"]},
|
|
65
|
+
)
|
|
File without changes
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# * coding: utf8 *
|
|
3
|
+
"""
|
|
4
|
+
sweeper
|
|
5
|
+
|
|
6
|
+
Usage:
|
|
7
|
+
sweeper sweep duplicates --workspace=<workspace> [--table-name=<table_name> --verbose --try-fix --change-detect --scheduled --save-report=<report_path> --backup-to=<backup_path>]
|
|
8
|
+
sweeper sweep empties --workspace=<workspace> [--table-name=<table_name> --verbose --try-fix --change-detect --scheduled --save-report=<report_path> --backup-to=<backup_path>]
|
|
9
|
+
sweeper sweep invalids --workspace=<workspace> [--table-name=<table_name> --verbose --try-fix --change-detect --scheduled --save-report=<report_path> --backup-to=<backup_path>]
|
|
10
|
+
sweeper sweep addresses --workspace=<workspace> --table-name=<table-name> --field-name=<field_name> [--verbose --try-fix --save-report=<report_path> --backup-to=<backup_path>]
|
|
11
|
+
sweeper sweep metadata --workspace=<workspace> [--table-name=<table_name> --verbose --try-fix --change-detect --scheduled --save-report=<report_path> --backup-to=<backup_path>]
|
|
12
|
+
sweeper sweep --workspace=<workspace> [--table-name=<table_name> --verbose --try-fix --change-detect --scheduled --save-report=<report_path> --backup-to=<backup_path>]
|
|
13
|
+
|
|
14
|
+
Arguments:
|
|
15
|
+
workspace - path to workspace eg: `c:\\my.gdb`
|
|
16
|
+
table_name - name of feature class or table eg: `Roads` (needs to be fully qualified (eg: `SGID.Transportation.Roads`) for metadata sweeper)
|
|
17
|
+
report_path - folder to save report to eg: `c:\\temp`
|
|
18
|
+
backup_path - place to create a temp gdb and import original table
|
|
19
|
+
field_name - name of the field to check
|
|
20
|
+
|
|
21
|
+
Examples:
|
|
22
|
+
sweeper sweep --workspace=c:\\data\\thing --try-fix --save-report=c:\\temp --backup-to=c:\\temp\\backup.gdb
|
|
23
|
+
sweeper sweep addresses --workspace=c:\\data\\thing --try-fix --save-report=c:\\temp --backup-to=c:\\temp\\backup.gdb --field-name=ADDRESS
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
import datetime
|
|
27
|
+
import logging
|
|
28
|
+
import logging.handlers
|
|
29
|
+
import sys
|
|
30
|
+
from pathlib import Path
|
|
31
|
+
|
|
32
|
+
import pkg_resources
|
|
33
|
+
from docopt import docopt
|
|
34
|
+
from supervisor.message_handlers import SendGridHandler
|
|
35
|
+
from supervisor.models import MessageDetails, Supervisor
|
|
36
|
+
|
|
37
|
+
from . import backup, config, report, utilities, workspace_info
|
|
38
|
+
from .sweepers.addresses import AddressTest
|
|
39
|
+
from .sweepers.duplicates import DuplicateTest
|
|
40
|
+
from .sweepers.empties import EmptyTest
|
|
41
|
+
from .sweepers.metadata import MetadataTest
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def main():
|
|
45
|
+
"""Main entry point for program. Parse arguments and pass to sweeper modules."""
|
|
46
|
+
args = docopt(__doc__, version=pkg_resources.require("ugrc-sweeper")[0].version)
|
|
47
|
+
|
|
48
|
+
log = setup_logging(args["--save-report"], args["--scheduled"])
|
|
49
|
+
|
|
50
|
+
if args["--scheduled"]:
|
|
51
|
+
#: set up supervisor, add email handler
|
|
52
|
+
sweeper_supervisor = Supervisor()
|
|
53
|
+
sweeper_supervisor.add_message_handler(
|
|
54
|
+
SendGridHandler(
|
|
55
|
+
{
|
|
56
|
+
"from_address": "noreply@utah.gov",
|
|
57
|
+
"to_addresses": config.get_config("TO_ADDRESSES"),
|
|
58
|
+
"api_key": config.get_config("SENDGRID_API_KEY"),
|
|
59
|
+
},
|
|
60
|
+
client_name="ugrc-sweeper",
|
|
61
|
+
client_version=pkg_resources.require("ugrc-sweeper")[0].version,
|
|
62
|
+
)
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
#: backup input file before quality checks
|
|
66
|
+
if args["--backup-to"]:
|
|
67
|
+
backup.backup_data(args["--workspace"], args["--table-name"], args["--backup-to"])
|
|
68
|
+
|
|
69
|
+
#: create a list to hold the instantiated objects.
|
|
70
|
+
closet = []
|
|
71
|
+
|
|
72
|
+
#: check what quality check to run.
|
|
73
|
+
if args["duplicates"]:
|
|
74
|
+
closet.append(DuplicateTest(args["--workspace"], args["--table-name"]))
|
|
75
|
+
elif args["invalids"]:
|
|
76
|
+
raise NotImplementedError('"Invalids" sweep/check not implemented yet.')
|
|
77
|
+
elif args["empties"]:
|
|
78
|
+
closet.append(EmptyTest(args["--workspace"], args["--table-name"]))
|
|
79
|
+
elif args["addresses"]:
|
|
80
|
+
closet.append(AddressTest(args["--workspace"], args["--table-name"], args["--field-name"]))
|
|
81
|
+
elif args["metadata"]:
|
|
82
|
+
closet.append(MetadataTest(args["--workspace"], args["--table-name"]))
|
|
83
|
+
else:
|
|
84
|
+
closet.append(DuplicateTest(args["--workspace"], args["--table-name"]))
|
|
85
|
+
closet.append(EmptyTest(args["--workspace"], args["--table-name"]))
|
|
86
|
+
closet.append(MetadataTest(args["--workspace"], args["--table-name"]))
|
|
87
|
+
|
|
88
|
+
reports = execute_sweepers(closet, args["--try-fix"], args["--change-detect"], log)
|
|
89
|
+
|
|
90
|
+
report.print_report(reports)
|
|
91
|
+
|
|
92
|
+
if args["--save-report"]:
|
|
93
|
+
report.save_report(reports, args["--save-report"])
|
|
94
|
+
|
|
95
|
+
if args["--scheduled"]:
|
|
96
|
+
report.add_to_log(reports)
|
|
97
|
+
|
|
98
|
+
final_message = report.format_message(reports)
|
|
99
|
+
log.info(final_message.getvalue())
|
|
100
|
+
|
|
101
|
+
#: Build and send summary message
|
|
102
|
+
summary_message = MessageDetails()
|
|
103
|
+
summary_message.message = final_message.getvalue()
|
|
104
|
+
summary_message.attachments = [config.LOG_FILE_PATH]
|
|
105
|
+
summary_message.subject = f"Sweeper Report {datetime.datetime.today()}"
|
|
106
|
+
|
|
107
|
+
sweeper_supervisor.notify(summary_message)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def execute_sweepers(closet, try_fix, using_change_detection, log):
|
|
111
|
+
"""
|
|
112
|
+
orchestrate the sweeper calls.
|
|
113
|
+
|
|
114
|
+
closet: array of sweepers.
|
|
115
|
+
try_fix: bool whether to fix or not.
|
|
116
|
+
"""
|
|
117
|
+
|
|
118
|
+
feature_class_names = []
|
|
119
|
+
reports = []
|
|
120
|
+
|
|
121
|
+
def run_tool(tool):
|
|
122
|
+
reports.append(tool.sweep())
|
|
123
|
+
|
|
124
|
+
if try_fix:
|
|
125
|
+
reports.append(tool.try_fix())
|
|
126
|
+
|
|
127
|
+
#: run sweeper again to ensure all errors were fixed.
|
|
128
|
+
reports.append(tool.sweep())
|
|
129
|
+
|
|
130
|
+
log.info(f"running {len(closet)} sweepers. try fix: {try_fix}")
|
|
131
|
+
for tool in closet:
|
|
132
|
+
log.info(f"running sweeper: {tool.key}")
|
|
133
|
+
if tool.table_name:
|
|
134
|
+
run_tool(tool)
|
|
135
|
+
|
|
136
|
+
continue
|
|
137
|
+
|
|
138
|
+
#: get feature class names once
|
|
139
|
+
if len(feature_class_names) == 0:
|
|
140
|
+
if using_change_detection:
|
|
141
|
+
log.info("Getting table names from change detection table")
|
|
142
|
+
feature_class_names = workspace_info.get_change_detection()
|
|
143
|
+
else:
|
|
144
|
+
log.info("Missing table name, executing over workspace")
|
|
145
|
+
feature_class_names = workspace_info.get_featureclasses(tool.workspace)
|
|
146
|
+
if any("SGID." in fc for fc in feature_class_names):
|
|
147
|
+
feature_class_names = [fc.split("SGID.", 1)[1] for fc in feature_class_names if "SGID." in fc]
|
|
148
|
+
|
|
149
|
+
#: apply exclusions
|
|
150
|
+
if config.has_config():
|
|
151
|
+
try:
|
|
152
|
+
exclusions_config = config.get_config("EXCLUSIONS")
|
|
153
|
+
except KeyError:
|
|
154
|
+
exclusions_config = {}
|
|
155
|
+
|
|
156
|
+
exclusions = exclusions_config.get(tool.key, [])
|
|
157
|
+
feature_class_names = utilities.apply_exclusions(feature_class_names, exclusions)
|
|
158
|
+
|
|
159
|
+
log.info(f"feature_class_names is: {feature_class_names}")
|
|
160
|
+
|
|
161
|
+
if using_change_detection and feature_class_names is None:
|
|
162
|
+
#: reset variable to empty list
|
|
163
|
+
log.info("Change detection found no updated tables")
|
|
164
|
+
feature_class_names = []
|
|
165
|
+
|
|
166
|
+
continue
|
|
167
|
+
|
|
168
|
+
#: explode sweeper class for each feature class
|
|
169
|
+
for table_name in feature_class_names:
|
|
170
|
+
new_tool = tool.clone(table_name, tool.workspace)
|
|
171
|
+
|
|
172
|
+
run_tool(new_tool)
|
|
173
|
+
|
|
174
|
+
if using_change_detection:
|
|
175
|
+
workspace_info.update_last_check_date()
|
|
176
|
+
|
|
177
|
+
return reports
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def setup_logging(save_report, scheduled):
|
|
181
|
+
logger = logging.getLogger("sweeper")
|
|
182
|
+
logger.setLevel(logging.INFO)
|
|
183
|
+
|
|
184
|
+
formatter = logging.Formatter(
|
|
185
|
+
fmt="%(levelname)-7s %(asctime)s %(module)10s:%(lineno)5s %(message)s", datefmt="%m-%d %H:%M:%S"
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
#: always set up console_handler
|
|
189
|
+
console_handler = logging.StreamHandler(stream=sys.stdout)
|
|
190
|
+
console_handler.setFormatter(formatter)
|
|
191
|
+
|
|
192
|
+
#: use log file when report location not provided and when running from scheduled task
|
|
193
|
+
if scheduled and not save_report:
|
|
194
|
+
log_file = Path(config.LOG_FILE_PATH)
|
|
195
|
+
file_handler = logging.handlers.RotatingFileHandler(log_file, backupCount=10)
|
|
196
|
+
file_handler.doRollover()
|
|
197
|
+
file_handler.setFormatter(formatter)
|
|
198
|
+
|
|
199
|
+
logger.addHandler(file_handler)
|
|
200
|
+
|
|
201
|
+
logger.addHandler(console_handler)
|
|
202
|
+
|
|
203
|
+
return logger
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
if __name__ == "__main__":
|
|
207
|
+
sys.exit(main())
|