water-column-sonar-processing 0.0.6__py3-none-any.whl → 26.1.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- water_column_sonar_processing/__init__.py +2 -5
- water_column_sonar_processing/aws/__init__.py +2 -2
- water_column_sonar_processing/aws/dynamodb_manager.py +257 -72
- water_column_sonar_processing/aws/s3_manager.py +184 -112
- water_column_sonar_processing/aws/s3fs_manager.py +29 -33
- water_column_sonar_processing/aws/sqs_manager.py +1 -1
- water_column_sonar_processing/cruise/create_empty_zarr_store.py +38 -97
- water_column_sonar_processing/cruise/datatree_manager.py +21 -0
- water_column_sonar_processing/cruise/resample_regrid.py +144 -129
- water_column_sonar_processing/geometry/__init__.py +10 -2
- water_column_sonar_processing/geometry/elevation_manager.py +111 -0
- water_column_sonar_processing/geometry/geometry_manager.py +60 -44
- water_column_sonar_processing/geometry/line_simplification.py +176 -0
- water_column_sonar_processing/geometry/pmtile_generation.py +242 -51
- water_column_sonar_processing/geometry/spatiotemporal.py +106 -0
- water_column_sonar_processing/index/index_manager.py +157 -27
- water_column_sonar_processing/model/zarr_manager.py +663 -258
- water_column_sonar_processing/processing/__init__.py +4 -0
- water_column_sonar_processing/processing/raw_to_netcdf.py +320 -0
- water_column_sonar_processing/processing/raw_to_zarr.py +341 -0
- water_column_sonar_processing/utility/__init__.py +9 -2
- water_column_sonar_processing/utility/cleaner.py +1 -0
- water_column_sonar_processing/utility/constants.py +69 -14
- water_column_sonar_processing/utility/pipeline_status.py +11 -15
- water_column_sonar_processing/utility/timestamp.py +3 -4
- water_column_sonar_processing-26.1.9.dist-info/METADATA +239 -0
- water_column_sonar_processing-26.1.9.dist-info/RECORD +34 -0
- {water_column_sonar_processing-0.0.6.dist-info → water_column_sonar_processing-26.1.9.dist-info}/WHEEL +1 -1
- {water_column_sonar_processing-0.0.6.dist-info → water_column_sonar_processing-26.1.9.dist-info/licenses}/LICENSE +1 -1
- water_column_sonar_processing/geometry/geometry_simplification.py +0 -82
- water_column_sonar_processing/process.py +0 -147
- water_column_sonar_processing-0.0.6.dist-info/METADATA +0 -123
- water_column_sonar_processing-0.0.6.dist-info/RECORD +0 -29
- {water_column_sonar_processing-0.0.6.dist-info → water_column_sonar_processing-26.1.9.dist-info}/top_level.txt +0 -0
|
@@ -1,5 +1,13 @@
|
|
|
1
|
+
from .elevation_manager import ElevationManager
|
|
1
2
|
from .geometry_manager import GeometryManager
|
|
2
|
-
from .
|
|
3
|
+
from .line_simplification import LineSimplification
|
|
3
4
|
from .pmtile_generation import PMTileGeneration
|
|
5
|
+
from .spatiotemporal import Spatiotemporal
|
|
4
6
|
|
|
5
|
-
__all__ = [
|
|
7
|
+
__all__ = [
|
|
8
|
+
"ElevationManager",
|
|
9
|
+
"GeometryManager",
|
|
10
|
+
"LineSimplification",
|
|
11
|
+
"PMTileGeneration",
|
|
12
|
+
"Spatiotemporal",
|
|
13
|
+
]
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
"""
|
|
2
|
+
https://gis.ngdc.noaa.gov/arcgis/rest/services/DEM_mosaics/DEM_global_mosaic/ImageServer/identify?geometry=-31.70235%2C13.03332&geometryType=esriGeometryPoint&returnGeometry=false&returnCatalogItems=false&f=json
|
|
3
|
+
|
|
4
|
+
https://gis.ngdc.noaa.gov/arcgis/rest/services/DEM_mosaics/DEM_global_mosaic/ImageServer/
|
|
5
|
+
identify?
|
|
6
|
+
geometry=-31.70235%2C13.03332
|
|
7
|
+
&geometryType=esriGeometryPoint
|
|
8
|
+
&returnGeometry=false
|
|
9
|
+
&returnCatalogItems=false
|
|
10
|
+
&f=json
|
|
11
|
+
{"objectId":0,"name":"Pixel","value":"-5733","location":{"x":-31.702349999999999,"y":13.03332,"spatialReference":{"wkid":4326,"latestWkid":4326}},"properties":null,"catalogItems":null,"catalogItemVisibilities":[]}
|
|
12
|
+
-5733
|
|
13
|
+
|
|
14
|
+
(base) rudy:deleteME rudy$ curl https://api.opentopodata.org/v1/gebco2020?locations=13.03332,-31.70235
|
|
15
|
+
{
|
|
16
|
+
"results": [
|
|
17
|
+
{
|
|
18
|
+
"dataset": "gebco2020",
|
|
19
|
+
"elevation": -5729.0,
|
|
20
|
+
"location": {
|
|
21
|
+
"lat": 13.03332,
|
|
22
|
+
"lng": -31.70235
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
],
|
|
26
|
+
"status": "OK"
|
|
27
|
+
}
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
import json
|
|
31
|
+
import time
|
|
32
|
+
from collections.abc import Generator
|
|
33
|
+
|
|
34
|
+
import requests
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def chunked(ll: list, n: int) -> Generator:
|
|
38
|
+
# Yields successively n-sized chunks from ll.
|
|
39
|
+
for i in range(0, len(ll), n):
|
|
40
|
+
yield ll[i : i + n]
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class ElevationManager:
|
|
44
|
+
#######################################################
|
|
45
|
+
def __init__(
|
|
46
|
+
self,
|
|
47
|
+
):
|
|
48
|
+
self.DECIMAL_PRECISION = 5 # precision for GPS coordinates
|
|
49
|
+
self.TIMEOUT_SECONDS = 10
|
|
50
|
+
|
|
51
|
+
#######################################################
|
|
52
|
+
def get_arcgis_elevation(
|
|
53
|
+
self,
|
|
54
|
+
lngs: list,
|
|
55
|
+
lats: list,
|
|
56
|
+
chunk_size: int = 500, # I think this is the api limit
|
|
57
|
+
) -> int:
|
|
58
|
+
# Reference: https://developers.arcgis.com/rest/services-reference/enterprise/map-to-image/
|
|
59
|
+
# Info: https://www.arcgis.com/home/item.html?id=c876e3c96a8642ab8557646a3b4fa0ff
|
|
60
|
+
### 'https://gis.ngdc.noaa.gov/arcgis/rest/services/DEM_mosaics/DEM_global_mosaic/ImageServer/identify?geometry={"points":[[-31.70235,13.03332],[-32.70235,14.03332]]}&geometryType=esriGeometryMultipoint&returnGeometry=false&returnCatalogItems=false&f=json'
|
|
61
|
+
if len(lngs) != len(lats):
|
|
62
|
+
raise ValueError("lngs and lats must have same length")
|
|
63
|
+
|
|
64
|
+
geometryType = "esriGeometryMultipoint" # TODO: allow single point?
|
|
65
|
+
|
|
66
|
+
depths = []
|
|
67
|
+
|
|
68
|
+
list_of_points = [list(elem) for elem in list(zip(lngs, lats))]
|
|
69
|
+
for chunk in chunked(list_of_points, chunk_size):
|
|
70
|
+
time.sleep(0.1)
|
|
71
|
+
# order: (lng, lat)
|
|
72
|
+
geometry = f'{{"points":{str(chunk)}}}'
|
|
73
|
+
url = f"https://gis.ngdc.noaa.gov/arcgis/rest/services/DEM_mosaics/DEM_global_mosaic/ImageServer/identify?geometry={geometry}&geometryType={geometryType}&returnGeometry=false&returnCatalogItems=false&f=json"
|
|
74
|
+
result = requests.get(url, timeout=self.TIMEOUT_SECONDS)
|
|
75
|
+
res = json.loads(result.content.decode("utf8"))
|
|
76
|
+
if "results" in res:
|
|
77
|
+
for element in res["results"]:
|
|
78
|
+
depths.append(float(element["value"]))
|
|
79
|
+
elif "value" in res:
|
|
80
|
+
depths.append(float(res["value"]))
|
|
81
|
+
|
|
82
|
+
return depths
|
|
83
|
+
|
|
84
|
+
# def get_gebco_bathymetry_elevation(self) -> int:
|
|
85
|
+
# # Documentation: https://www.opentopodata.org/datasets/gebco2020/
|
|
86
|
+
# latitude = 13.03332
|
|
87
|
+
# longitude = -31.70235
|
|
88
|
+
# dataset = "gebco2020"
|
|
89
|
+
# url = f"https://api.opentopodata.org/v1/{dataset}?locations={latitude},{longitude}"
|
|
90
|
+
# pass
|
|
91
|
+
|
|
92
|
+
# def get_elevation(
|
|
93
|
+
# self,
|
|
94
|
+
# df,
|
|
95
|
+
# lat_column,
|
|
96
|
+
# lon_column,
|
|
97
|
+
# ) -> int:
|
|
98
|
+
# """Query service using lat, lon. add the elevation values as a new column."""
|
|
99
|
+
# url = r'https://epqs.nationalmap.gov/v1/json?'
|
|
100
|
+
# elevations = []
|
|
101
|
+
# for lat, lon in zip(df[lat_column], df[lon_column]):
|
|
102
|
+
# # define rest query params
|
|
103
|
+
# params = {
|
|
104
|
+
# 'output': 'json',
|
|
105
|
+
# 'x': lon,
|
|
106
|
+
# 'y': lat,
|
|
107
|
+
# 'units': 'Meters'
|
|
108
|
+
# }
|
|
109
|
+
# result = requests.get((url + urllib.parse.urlencode(params)))
|
|
110
|
+
# elevations.append(result.json()['value'])
|
|
111
|
+
# return elevations
|
|
@@ -4,20 +4,19 @@ import geopandas
|
|
|
4
4
|
import numpy as np
|
|
5
5
|
import pandas as pd
|
|
6
6
|
|
|
7
|
-
from water_column_sonar_processing.aws
|
|
8
|
-
from water_column_sonar_processing.utility
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
// [Decimal / Places / Degrees / Object that can be recognized at scale / N/S or E/W at equator, E/W at 23N/S, E/W at 45N/S, E/W at 67N/S]
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
"""
|
|
7
|
+
from water_column_sonar_processing.aws import S3Manager
|
|
8
|
+
from water_column_sonar_processing.utility import Cleaner
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
# // [Decimal / Places / Degrees / Object that can be recognized at scale / N/S or E/W at equator, E/W at 23N/S, E/W at 45N/S, E/W at 67N/S]
|
|
12
|
+
# // 0 1.0 1° 00′ 0″ country or large region 111.32 km 102.47 km 78.71 km 43.496 km
|
|
13
|
+
# // 1 0.1 0° 06′ 0″ large city or district 11.132 km 10.247 km 7.871 km 4.3496 km
|
|
14
|
+
# // 2 0.01 0° 00′ 36″ town or village 1.1132 km 1.0247 km 787.1 m 434.96 m
|
|
15
|
+
# // 3 0.001 0° 00′ 3.6″ neighborhood, street 111.32 m 102.47 m 78.71 m 43.496 m
|
|
16
|
+
# // 4 0.0001 0° 00′ 0.36″ individual street, land parcel 11.132 m 10.247 m 7.871 m 4.3496 m
|
|
17
|
+
# // 5 0.00001 0° 00′ 0.036″ individual trees, door entrance 1.1132 m 1.0247 m 787.1 mm 434.96 mm
|
|
18
|
+
# // 6 0.000001 0° 00′ 0.0036″ individual humans 111.32 mm 102.47 mm 78.71 mm 43.496 mm
|
|
19
|
+
# // 7 0.0000001 0° 00′ 0.00036″ practical limit of commercial surveying 11.132 mm 10.247 mm 7.871 mm 4.3496 mm
|
|
21
20
|
|
|
22
21
|
|
|
23
22
|
class GeometryManager:
|
|
@@ -25,43 +24,43 @@ class GeometryManager:
|
|
|
25
24
|
def __init__(
|
|
26
25
|
self,
|
|
27
26
|
):
|
|
28
|
-
self.DECIMAL_PRECISION =
|
|
29
|
-
self.SIMPLIFICATION_TOLERANCE = 0.0001 # RDP simplification to street level
|
|
27
|
+
self.DECIMAL_PRECISION = 6 # precision for GPS coordinates
|
|
28
|
+
self.SIMPLIFICATION_TOLERANCE = 0.0001 # RDP simplification to "street level"
|
|
30
29
|
|
|
31
30
|
#######################################################
|
|
32
31
|
def read_echodata_gps_data(
|
|
33
32
|
self,
|
|
34
33
|
echodata,
|
|
34
|
+
output_bucket_name,
|
|
35
35
|
ship_name,
|
|
36
36
|
cruise_name,
|
|
37
37
|
sensor_name,
|
|
38
38
|
file_name,
|
|
39
|
+
endpoint_url=None,
|
|
39
40
|
write_geojson=True,
|
|
40
41
|
) -> tuple:
|
|
41
42
|
file_name_stem = Path(file_name).stem
|
|
42
43
|
geo_json_name = f"{file_name_stem}.json"
|
|
43
44
|
|
|
44
|
-
print("Getting GPS
|
|
45
|
+
print("Getting GPS dataset from echopype object.")
|
|
45
46
|
try:
|
|
46
|
-
latitude =
|
|
47
|
-
echodata.platform.latitude.values
|
|
48
|
-
)
|
|
49
|
-
longitude =
|
|
50
|
-
echodata.platform.longitude.values, self.DECIMAL_PRECISION
|
|
51
|
-
)
|
|
47
|
+
latitude = (
|
|
48
|
+
echodata.platform.latitude.values
|
|
49
|
+
) # TODO: DONT get values from here!
|
|
50
|
+
longitude = echodata.platform.longitude.values
|
|
52
51
|
|
|
53
52
|
# RE: time coordinates: https://github.com/OSOceanAcoustics/echopype/issues/656#issue-1219104771
|
|
54
53
|
# 'nmea_times' are times from the nmea datalogger associated with GPS
|
|
55
54
|
# note that nmea_times, unlike time1, can be sorted
|
|
56
55
|
nmea_times = np.sort(echodata.platform.time1.values)
|
|
57
56
|
|
|
58
|
-
# 'time1' are times from the echosounder associated with the
|
|
57
|
+
# 'time1' are times from the echosounder associated with the dataset of the transducer measurement
|
|
59
58
|
time1 = echodata.environment.time1.values
|
|
60
59
|
|
|
61
60
|
if len(nmea_times) < len(time1):
|
|
62
61
|
raise Exception(
|
|
63
62
|
"Problem: Not enough NMEA times available to extrapolate time1."
|
|
64
|
-
)
|
|
63
|
+
) # TODO: explore this logic further...
|
|
65
64
|
|
|
66
65
|
# Align 'sv_times' to 'nmea_times'
|
|
67
66
|
if not (
|
|
@@ -97,14 +96,14 @@ class GeometryManager:
|
|
|
97
96
|
|
|
98
97
|
# create requirement for minimum linestring size
|
|
99
98
|
MIN_ALLOWED_SIZE = (
|
|
100
|
-
4 # don't want to process files with less than 4
|
|
99
|
+
4 # don't want to process files with less than 4 dataset points
|
|
101
100
|
)
|
|
102
101
|
if (
|
|
103
102
|
len(lat[~np.isnan(lat)]) < MIN_ALLOWED_SIZE
|
|
104
103
|
or len(lon[~np.isnan(lon)]) < MIN_ALLOWED_SIZE
|
|
105
104
|
):
|
|
106
105
|
raise Exception(
|
|
107
|
-
f"There was not enough
|
|
106
|
+
f"There was not enough dataset in lat or lon to create geojson, {len(lat[~np.isnan(lat)])} found, less than {MIN_ALLOWED_SIZE}."
|
|
108
107
|
)
|
|
109
108
|
|
|
110
109
|
# https://osoceanacoustics.github.io/echopype-examples/echopype_tour.html
|
|
@@ -123,32 +122,39 @@ class GeometryManager:
|
|
|
123
122
|
crs="epsg:4326",
|
|
124
123
|
)
|
|
125
124
|
# Note: We set np.nan to 0,0 so downstream missing values can be omitted
|
|
126
|
-
|
|
125
|
+
# TODO: so what ends up here is dataset with corruption at null island!!!
|
|
127
126
|
geo_json_line = gps_gdf.to_json()
|
|
128
127
|
if write_geojson:
|
|
129
128
|
print("Creating local copy of geojson file.")
|
|
130
129
|
with open(geo_json_name, "w") as write_file:
|
|
131
|
-
write_file.write(
|
|
130
|
+
write_file.write(
|
|
131
|
+
geo_json_line
|
|
132
|
+
) # NOTE: this file can include zeros for lat lon
|
|
132
133
|
|
|
133
134
|
geo_json_prefix = (
|
|
134
135
|
f"spatial/geojson/{ship_name}/{cruise_name}/{sensor_name}"
|
|
135
136
|
)
|
|
136
137
|
|
|
137
138
|
print("Checking s3 and deleting any existing GeoJSON file.")
|
|
138
|
-
s3_manager = S3Manager()
|
|
139
|
-
|
|
140
|
-
|
|
139
|
+
s3_manager = S3Manager(endpoint_url=endpoint_url)
|
|
140
|
+
geojson_object_exists = s3_manager.check_if_object_exists(
|
|
141
|
+
bucket_name=output_bucket_name,
|
|
142
|
+
key_name=f"{geo_json_prefix}/{geo_json_name}",
|
|
141
143
|
)
|
|
142
|
-
if
|
|
144
|
+
if geojson_object_exists:
|
|
143
145
|
print(
|
|
144
146
|
"GeoJSON already exists in s3, deleting existing and continuing."
|
|
145
147
|
)
|
|
146
|
-
s3_manager.
|
|
148
|
+
s3_manager.delete_nodd_object(
|
|
149
|
+
bucket_name=output_bucket_name,
|
|
150
|
+
key_name=f"{geo_json_prefix}/{geo_json_name}",
|
|
151
|
+
)
|
|
147
152
|
|
|
148
153
|
print("Upload GeoJSON to s3.")
|
|
149
154
|
s3_manager.upload_nodd_file(
|
|
150
155
|
file_name=geo_json_name, # file_name
|
|
151
156
|
key=f"{geo_json_prefix}/{geo_json_name}", # key
|
|
157
|
+
output_bucket_name=output_bucket_name,
|
|
152
158
|
)
|
|
153
159
|
|
|
154
160
|
# TODO: delete geo_json file
|
|
@@ -172,32 +178,36 @@ class GeometryManager:
|
|
|
172
178
|
#################################################################
|
|
173
179
|
# GeoJSON FeatureCollection with IDs as "time"
|
|
174
180
|
except Exception as err:
|
|
175
|
-
|
|
176
|
-
f"Exception encountered extracting gps coordinates creating geojson
|
|
181
|
+
raise RuntimeError(
|
|
182
|
+
f"Exception encountered extracting gps coordinates creating geojson, {err}"
|
|
177
183
|
)
|
|
178
|
-
|
|
184
|
+
|
|
179
185
|
# Note: returned lat/lon values can include np.nan because they need to be aligned with
|
|
180
|
-
# the Sv
|
|
181
|
-
return gps_df.index.values, gps_df.latitude.values, gps_df.longitude.values
|
|
186
|
+
# the Sv dataset! GeoJSON needs simplification but has been filtered.
|
|
187
|
+
# return gps_df.index.values, gps_df.latitude.values, gps_df.longitude.values
|
|
188
|
+
return gps_df.index.values, lat, lon
|
|
182
189
|
# TODO: if geojson is already returned with 0,0, the return here
|
|
183
190
|
# can include np.nan values?
|
|
184
191
|
|
|
185
192
|
#######################################################
|
|
193
|
+
@staticmethod
|
|
186
194
|
def read_s3_geo_json(
|
|
187
|
-
self,
|
|
188
195
|
ship_name,
|
|
189
196
|
cruise_name,
|
|
190
197
|
sensor_name,
|
|
191
198
|
file_name_stem,
|
|
192
199
|
input_xr_zarr_store,
|
|
200
|
+
endpoint_url,
|
|
201
|
+
output_bucket_name,
|
|
193
202
|
):
|
|
194
203
|
try:
|
|
195
|
-
s3_manager = S3Manager()
|
|
204
|
+
s3_manager = S3Manager(endpoint_url=endpoint_url)
|
|
196
205
|
geo_json = s3_manager.read_s3_json(
|
|
197
206
|
ship_name=ship_name,
|
|
198
207
|
cruise_name=cruise_name,
|
|
199
208
|
sensor_name=sensor_name,
|
|
200
209
|
file_name_stem=file_name_stem,
|
|
210
|
+
output_bucket_name=output_bucket_name,
|
|
201
211
|
)
|
|
202
212
|
###
|
|
203
213
|
geospatial = geopandas.GeoDataFrame.from_features(
|
|
@@ -217,9 +227,15 @@ class GeometryManager:
|
|
|
217
227
|
indices = np.searchsorted(a=aa, v=vv)
|
|
218
228
|
|
|
219
229
|
return indices, geospatial
|
|
220
|
-
except Exception as err:
|
|
221
|
-
|
|
222
|
-
|
|
230
|
+
except Exception as err:
|
|
231
|
+
raise RuntimeError(f"Exception encountered reading s3 GeoJSON, {err}")
|
|
232
|
+
|
|
233
|
+
############################################################################
|
|
234
|
+
# COMES from the raw-to-zarr conversion
|
|
235
|
+
# def __write_geojson_to_file(self, store_name, data) -> None:
|
|
236
|
+
# print("Writing GeoJSON to file.")
|
|
237
|
+
# with open(os.path.join(store_name, "geo.json"), "w") as outfile:
|
|
238
|
+
# outfile.write(data)
|
|
223
239
|
|
|
224
240
|
|
|
225
241
|
###########################################################
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
# import json
|
|
2
|
+
import geopandas as gpd
|
|
3
|
+
import numpy as np
|
|
4
|
+
from pykalman import KalmanFilter
|
|
5
|
+
from shapely.geometry import Point
|
|
6
|
+
|
|
7
|
+
# import hvplot.pandas
|
|
8
|
+
# from holoviews import opts
|
|
9
|
+
# hv.extension('bokeh')
|
|
10
|
+
|
|
11
|
+
# import matplotlib.pyplot as plt
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
# lambda for timestamp in form "yyyy-MM-ddTHH:mm:ssZ"
|
|
15
|
+
# dt = lambda: datetime.now().isoformat(timespec="seconds") + "Z"
|
|
16
|
+
|
|
17
|
+
# TODO: get line for example HB1906 ...save linestring to array for testing
|
|
18
|
+
|
|
19
|
+
MAX_SPEED_KNOTS = 50
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
# Lambert's formula ==> better accuracy than haversinte
|
|
23
|
+
# Lambert's formula (the formula used by the calculators above) is the method used to calculate the shortest distance along the surface of an ellipsoid. When used to approximate the Earth and calculate the distance on the Earth surface, it has an accuracy on the order of 10 meters over thousands of kilometers, which is more precise than the haversine formula.
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def mph_to_knots(mph_value):
|
|
27
|
+
"""TODO:"""
|
|
28
|
+
# 1 mile per hour === 0.868976 Knots
|
|
29
|
+
return mph_value * 0.868976
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def mps_to_knots(mps_value):
|
|
33
|
+
return mps_value * 1.94384
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
###############################################################################
|
|
37
|
+
# Colab Notebook:
|
|
38
|
+
# https://colab.research.google.com/drive/1Ihb1x0EeYRNwGJ4Bqi4RqQQHu9-40oDk?usp=sharing#scrollTo=hIPziqVO48Xg
|
|
39
|
+
###############################################################################
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
# https://shapely.readthedocs.io/en/stable/reference/shapely.MultiLineString.html#shapely.MultiLineString
|
|
43
|
+
class LineSimplification:
|
|
44
|
+
"""
|
|
45
|
+
// [Decimal / Places / Degrees / Object that can be recognized at scale / N/S or E/W at equator, E/W at 23N/S, E/W at 45N/S, E/W at 67N/S]
|
|
46
|
+
// 0 1.0 1° 00′ 0″ country or large region 111.32 km 102.47 km 78.71 km 43.496 km
|
|
47
|
+
// 1 0.1 0° 06′ 0″ large city or district 11.132 km 10.247 km 7.871 km 4.3496 km
|
|
48
|
+
// 2 0.01 0° 00′ 36″ town or village 1.1132 km 1.0247 km 787.1 m 434.96 m
|
|
49
|
+
// 3 0.001 0° 00′ 3.6″ neighborhood, street 111.32 m 102.47 m 78.71 m 43.496 m
|
|
50
|
+
// 4 0.0001 0° 00′ 0.36″ individual street, land parcel 11.132 m 10.247 m 7.871 m 4.3496 m
|
|
51
|
+
// 5 0.00001 0° 00′ 0.036″ individual trees, door entrance 1.1132 m 1.0247 m 787.1 mm 434.96 mm
|
|
52
|
+
// 6 0.000001 0° 00′ 0.0036″ individual humans 111.32 mm 102.47 mm 78.71 mm 43.496 mm
|
|
53
|
+
// 7 0.0000001 0° 00′ 0.00036″ practical limit of commercial surveying 11.132 mm 10.247 mm 7.871 mm 4.3496 mm
|
|
54
|
+
private static final int SRID = 8307;
|
|
55
|
+
private static final double simplificationTolerance = 0.0001;
|
|
56
|
+
private static final long splitGeometryMs = 900000L;
|
|
57
|
+
private static final int batchSize = 10000;
|
|
58
|
+
private static final int geoJsonPrecision = 5;
|
|
59
|
+
final int geoJsonPrecision = 5;
|
|
60
|
+
final double simplificationTolerance = 0.0001;
|
|
61
|
+
final int simplifierBatchSize = 3000;
|
|
62
|
+
final long maxCount = 0;
|
|
63
|
+
private static final double maxAllowedSpeedKnts = 60D;
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
# TODO: in the future move to standalone library
|
|
67
|
+
#######################################################
|
|
68
|
+
def __init__(
|
|
69
|
+
self,
|
|
70
|
+
):
|
|
71
|
+
pass
|
|
72
|
+
|
|
73
|
+
#######################################################
|
|
74
|
+
@staticmethod
|
|
75
|
+
def kalman_filter(
|
|
76
|
+
longitudes,
|
|
77
|
+
latitudes,
|
|
78
|
+
):
|
|
79
|
+
"""
|
|
80
|
+
# TODO: need to use masked array to get the right number of values
|
|
81
|
+
"""
|
|
82
|
+
### https://github.com/pykalman/pykalman
|
|
83
|
+
# https://stackoverflow.com/questions/43377626/how-to-use-kalman-filter-in-python-for-location-data
|
|
84
|
+
measurements = np.asarray([list(elem) for elem in zip(longitudes, latitudes)])
|
|
85
|
+
initial_state_mean = [measurements[0, 0], 0, measurements[0, 1], 0]
|
|
86
|
+
transition_matrix = [[1, 1, 0, 0], [0, 1, 0, 0], [0, 0, 1, 1], [0, 0, 0, 1]]
|
|
87
|
+
observation_matrix = [[1, 0, 0, 0], [0, 0, 1, 0]]
|
|
88
|
+
|
|
89
|
+
kf = KalmanFilter(
|
|
90
|
+
transition_matrices=transition_matrix,
|
|
91
|
+
observation_matrices=observation_matrix,
|
|
92
|
+
initial_state_mean=initial_state_mean,
|
|
93
|
+
)
|
|
94
|
+
kf = kf.em(measurements, n_iter=2) # TODO: 5
|
|
95
|
+
(smoothed_state_means, smoothed_state_covariances) = kf.smooth(measurements)
|
|
96
|
+
|
|
97
|
+
# plt.plot(longitudes, latitudes, label="original")
|
|
98
|
+
# plt.plot(smoothed_state_means[:, 0], smoothed_state_means[:, 2], label="smoothed")
|
|
99
|
+
# plt.legend()
|
|
100
|
+
# plt.show()
|
|
101
|
+
|
|
102
|
+
return smoothed_state_means[:, [0, 2]]
|
|
103
|
+
|
|
104
|
+
#######################################################
|
|
105
|
+
@staticmethod
|
|
106
|
+
def get_speeds(
|
|
107
|
+
times: np.ndarray, # don't really need time, do need to segment the dataset first
|
|
108
|
+
latitudes: np.ndarray,
|
|
109
|
+
longitudes: np.ndarray,
|
|
110
|
+
) -> np.ndarray:
|
|
111
|
+
print(MAX_SPEED_KNOTS) # TODO: too high
|
|
112
|
+
print(times[0], latitudes[0], longitudes[0])
|
|
113
|
+
# TODO: distance/time ==> need to take position2 - position1 to get speed
|
|
114
|
+
|
|
115
|
+
# get distance difference
|
|
116
|
+
geom = [Point(xy) for xy in zip(longitudes, latitudes)]
|
|
117
|
+
points_df = gpd.GeoDataFrame({"geometry": geom}, crs="EPSG:4326")
|
|
118
|
+
# Conversion to UTM, a rectilinear projection coordinate system where distance can be calculated with pythagorean theorem
|
|
119
|
+
# an alternative could be to use EPSG 32663
|
|
120
|
+
points_df.to_crs(
|
|
121
|
+
epsg=3310, inplace=True
|
|
122
|
+
) # https://gis.stackexchange.com/questions/293310/finding-distance-between-two-points-with-geoseries-distance
|
|
123
|
+
distance_diffs = points_df.distance(points_df.shift())
|
|
124
|
+
# distance_diffs_sorted = distance_diffs.sort_values(
|
|
125
|
+
# ascending=False
|
|
126
|
+
# ) # TODO: get avg cutoff time
|
|
127
|
+
#
|
|
128
|
+
time_diffs_ns = np.append(0, (times[1:] - times[:-1]).astype(int))
|
|
129
|
+
# time_diffs_ns_sorted = np.sort(time_diffs_ns)
|
|
130
|
+
# largest time diffs HB0707 [ 17. 17.93749786 21.0781271 54.82812723 85.09374797, 113.56249805 204.87500006 216. 440.68749798 544.81249818]
|
|
131
|
+
# largest diffs HB1906 [3.01015808e+00 3.01016013e+00 3.01017805e+00 3.01018701e+00, 3.01018701e+00 3.01018906e+00 3.01019802e+00 3.01021005e+00, 3.01021005e+00 3.01021414e+00 3.01022208e+00 3.01022899e+00, 3.01024998e+00 3.01025920e+00 3.01026202e+00 3.01028096e+00, 3.01119411e+00 3.01120896e+00 3.01120998e+00 3.01120998e+00, 3.01122099e+00 3.01122790e+00 3.01122790e+00 3.01124506e+00, 3.01125197e+00 3.01128090e+00 3.01142707e+00 3.01219814e+00, 3.01221120e+00 3.01223014e+00 3.01225498e+00 3.01225882e+00, 3.01226010e+00 3.01312998e+00 3.01316096e+00 3.01321190e+00, 3.01321293e+00 3.01322880e+00 3.01322906e+00 3.01323110e+00, 3.01323213e+00 3.01323290e+00 3.01326208e+00 3.01328512e+00, 3.01418112e+00 3.01420109e+00 3.01421107e+00 3.01421184e+00, 3.01421414e+00 3.01424819e+00 3.01512883e+00 3.01516006e+00, 3.01524198e+00 3.01619917e+00 3.01623194e+00 3.01623296e+00, 3.01917594e+00 3.01921408e+00 3.01921587e+00 3.02022195e+00, 3.02025216e+00 3.02121702e+00 3.02325811e+00 3.02410291e+00, 3.02421914e+00 3.02426701e+00 3.02523776e+00 3.02718694e+00, 3.02927590e+00 3.03621606e+00 3.03826304e+00 3.34047514e+00, 3.36345114e+00 3.39148595e+00 4.36819302e+00 4.50157901e+00, 4.50315699e+00 4.50330598e+00 4.50333491e+00 4.50428416e+00, 4.50430490e+00 4.50430694e+00 4.50526387e+00 4.50530790e+00, 4.50530995e+00 4.50532301e+00 4.50533478e+00 4.50629402e+00, 4.50730701e+00 4.50825882e+00 4.50939008e+00 6.50179098e+00, 2.25025029e+01 1.39939425e+02 1.54452331e+02 1.60632653e+03, 1.74574667e+05 4.33569587e+05 4.35150475e+05 8.00044883e+05]
|
|
132
|
+
nanoseconds_per_second = 1e9
|
|
133
|
+
speed_meters_per_second = (
|
|
134
|
+
distance_diffs / time_diffs_ns * nanoseconds_per_second
|
|
135
|
+
)
|
|
136
|
+
# returns the speed in meters per second #TODO: get speed in knots
|
|
137
|
+
return speed_meters_per_second.to_numpy(dtype="float32") # includes nan
|
|
138
|
+
|
|
139
|
+
# def remove_null_island_values(
|
|
140
|
+
# self,
|
|
141
|
+
# epsilon=1e-5,
|
|
142
|
+
# ) -> None:
|
|
143
|
+
# # TODO: low priority
|
|
144
|
+
# print(epsilon)
|
|
145
|
+
# pass
|
|
146
|
+
|
|
147
|
+
def break_linestring_into_multi_linestring(
|
|
148
|
+
self,
|
|
149
|
+
) -> None:
|
|
150
|
+
# TODO: medium priority
|
|
151
|
+
# For any line-strings across the antimeridian, break into multilinestring
|
|
152
|
+
# average cadence is measurements every 1 second
|
|
153
|
+
# break when over 1 minute
|
|
154
|
+
pass
|
|
155
|
+
|
|
156
|
+
def simplify(
|
|
157
|
+
self,
|
|
158
|
+
) -> None:
|
|
159
|
+
# TODO: medium-high priority
|
|
160
|
+
pass
|
|
161
|
+
|
|
162
|
+
#######################################################
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
# [(-72.2001724243164, 40.51750183105469), # latBB
|
|
166
|
+
# (-72.20023345947266, 40.51749038696289),
|
|
167
|
+
# (-72.20033264160156, 40.51750183105469), # lonAA, latBB
|
|
168
|
+
# (-72.20030212402344, 40.517391204833984),
|
|
169
|
+
# (-72.20033264160156, 40.517330169677734), # lonAA, latCC
|
|
170
|
+
# (-72.2003402709961, 40.51729965209961),
|
|
171
|
+
# (-72.20033264160156, 40.517330169677734), # lonAA, latCC
|
|
172
|
+
# (-72.20040130615234, 40.5172004699707),
|
|
173
|
+
# (-72.20050048828125, 40.51716995239258),
|
|
174
|
+
# (-72.2004623413086, 40.51710891723633)]
|
|
175
|
+
|
|
176
|
+
###########################################################
|