water-column-sonar-processing 25.1.7__py3-none-any.whl → 25.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of water-column-sonar-processing might be problematic. Click here for more details.
- water_column_sonar_processing/aws/dynamodb_manager.py +27 -32
- water_column_sonar_processing/aws/s3_manager.py +52 -64
- water_column_sonar_processing/aws/s3fs_manager.py +3 -9
- water_column_sonar_processing/cruise/create_empty_zarr_store.py +14 -14
- water_column_sonar_processing/cruise/datatree_manager.py +3 -6
- water_column_sonar_processing/cruise/resample_regrid.py +67 -49
- water_column_sonar_processing/geometry/__init__.py +7 -2
- water_column_sonar_processing/geometry/elevation_manager.py +16 -17
- water_column_sonar_processing/geometry/geometry_manager.py +25 -25
- water_column_sonar_processing/geometry/line_simplification.py +150 -0
- water_column_sonar_processing/geometry/pmtile_generation.py +99 -64
- water_column_sonar_processing/index/index_manager.py +67 -32
- water_column_sonar_processing/model/zarr_manager.py +54 -22
- water_column_sonar_processing/process.py +15 -13
- water_column_sonar_processing/processing/__init__.py +2 -2
- water_column_sonar_processing/processing/batch_downloader.py +66 -41
- water_column_sonar_processing/processing/raw_to_zarr.py +121 -82
- water_column_sonar_processing/utility/constants.py +17 -2
- water_column_sonar_processing/utility/pipeline_status.py +11 -15
- {water_column_sonar_processing-25.1.7.dist-info → water_column_sonar_processing-25.3.1.dist-info}/METADATA +21 -12
- water_column_sonar_processing-25.3.1.dist-info/RECORD +34 -0
- {water_column_sonar_processing-25.1.7.dist-info → water_column_sonar_processing-25.3.1.dist-info}/WHEEL +1 -1
- water_column_sonar_processing/geometry/geometry_simplification.py +0 -82
- water_column_sonar_processing-25.1.7.dist-info/RECORD +0 -34
- {water_column_sonar_processing-25.1.7.dist-info → water_column_sonar_processing-25.3.1.dist-info/licenses}/LICENSE +0 -0
- {water_column_sonar_processing-25.1.7.dist-info → water_column_sonar_processing-25.3.1.dist-info}/top_level.txt +0 -0
|
@@ -35,6 +35,7 @@ class ResampleRegrid:
|
|
|
35
35
|
input_xr,
|
|
36
36
|
ping_times,
|
|
37
37
|
all_cruise_depth_values,
|
|
38
|
+
water_level,
|
|
38
39
|
) -> np.ndarray:
|
|
39
40
|
print("Interpolating data.")
|
|
40
41
|
try:
|
|
@@ -53,7 +54,7 @@ class ResampleRegrid:
|
|
|
53
54
|
data=data,
|
|
54
55
|
dims=("depth", "time", "frequency"),
|
|
55
56
|
coords={
|
|
56
|
-
"depth": all_cruise_depth_values,
|
|
57
|
+
"depth": all_cruise_depth_values, # TODO: these should be on interval from 7.7 meters to 507 meters
|
|
57
58
|
"time": ping_times,
|
|
58
59
|
"frequency": input_xr.frequency_nominal.values,
|
|
59
60
|
},
|
|
@@ -62,34 +63,19 @@ class ResampleRegrid:
|
|
|
62
63
|
channels = input_xr.channel.values
|
|
63
64
|
for channel in range(
|
|
64
65
|
len(channels)
|
|
65
|
-
): # TODO: leaving off here, need to subset for just indices in time axis
|
|
66
|
+
): # ?TODO: leaving off here, need to subset for just indices in time axis
|
|
66
67
|
gc.collect()
|
|
67
|
-
print(
|
|
68
|
-
np.nanmax(
|
|
69
|
-
input_xr.echo_range.sel(
|
|
70
|
-
channel=input_xr.channel[channel]
|
|
71
|
-
).values
|
|
72
|
-
)
|
|
73
|
-
)
|
|
74
|
-
#
|
|
75
68
|
max_depths = np.nanmax(
|
|
76
|
-
a=input_xr.echo_range.sel(channel=input_xr.channel[channel]).values
|
|
69
|
+
a=input_xr.echo_range.sel(channel=input_xr.channel[channel]).values
|
|
70
|
+
+ water_level,
|
|
77
71
|
axis=1,
|
|
78
72
|
)
|
|
79
|
-
superset_of_max_depths = set(
|
|
80
|
-
np.nanmax(
|
|
81
|
-
input_xr.echo_range.sel(
|
|
82
|
-
channel=input_xr.channel[channel]
|
|
83
|
-
).values,
|
|
84
|
-
1,
|
|
85
|
-
)
|
|
86
|
-
)
|
|
73
|
+
superset_of_max_depths = set(max_depths)
|
|
87
74
|
set_of_max_depths = list(
|
|
88
75
|
{x for x in superset_of_max_depths if x == x}
|
|
89
76
|
) # removes nan's
|
|
90
77
|
# iterate through partitions of data with similar depths and resample
|
|
91
78
|
for select_max_depth in set_of_max_depths:
|
|
92
|
-
gc.collect()
|
|
93
79
|
# TODO: for nan just skip and leave all nan's
|
|
94
80
|
select_indices = [
|
|
95
81
|
i
|
|
@@ -132,6 +118,7 @@ class ResampleRegrid:
|
|
|
132
118
|
)
|
|
133
119
|
] = resampled
|
|
134
120
|
print(f"updated {len(times_select)} ping times")
|
|
121
|
+
gc.collect()
|
|
135
122
|
except Exception as err:
|
|
136
123
|
print(f"Problem finding the dynamodb table: {err}")
|
|
137
124
|
raise err
|
|
@@ -146,9 +133,9 @@ class ResampleRegrid:
|
|
|
146
133
|
sensor_name,
|
|
147
134
|
table_name,
|
|
148
135
|
# TODO: file_name?,
|
|
149
|
-
bucket_name,
|
|
136
|
+
bucket_name, # TODO: this is the same bucket
|
|
150
137
|
override_select_files=None,
|
|
151
|
-
endpoint_url=None
|
|
138
|
+
endpoint_url=None,
|
|
152
139
|
) -> None:
|
|
153
140
|
"""
|
|
154
141
|
The goal here is to interpolate the data against the depth values already populated
|
|
@@ -172,9 +159,9 @@ class ResampleRegrid:
|
|
|
172
159
|
# get dynamo stuff
|
|
173
160
|
dynamo_db_manager = DynamoDBManager()
|
|
174
161
|
cruise_df = dynamo_db_manager.get_table_as_df(
|
|
175
|
-
ship_name=ship_name,
|
|
162
|
+
# ship_name=ship_name,
|
|
176
163
|
cruise_name=cruise_name,
|
|
177
|
-
sensor_name=sensor_name,
|
|
164
|
+
# sensor_name=sensor_name,
|
|
178
165
|
table_name=table_name,
|
|
179
166
|
)
|
|
180
167
|
|
|
@@ -191,19 +178,21 @@ class ResampleRegrid:
|
|
|
191
178
|
file_name_stem = Path(file_name).stem
|
|
192
179
|
print(f"Processing file: {file_name_stem}.")
|
|
193
180
|
|
|
194
|
-
if f"{file_name_stem}.raw" not in list(cruise_df[
|
|
195
|
-
raise Exception(
|
|
181
|
+
if f"{file_name_stem}.raw" not in list(cruise_df["FILE_NAME"]):
|
|
182
|
+
raise Exception("Raw file file_stem not found in dynamodb.")
|
|
196
183
|
|
|
197
184
|
# status = PipelineStatus['LEVEL_1_PROCESSING']
|
|
198
185
|
# TODO: filter rows by enum success, filter the dataframe just for enums >= LEVEL_1_PROCESSING
|
|
199
186
|
# df[df['PIPELINE_STATUS'] < PipelineStatus.LEVEL_1_PROCESSING] = np.nan
|
|
200
187
|
|
|
201
188
|
# Get index from all cruise files. Note: should be based on which are included in cruise.
|
|
202
|
-
index = int(
|
|
203
|
-
cruise_df["FILE_NAME"] == f"{file_name_stem}.raw"
|
|
204
|
-
|
|
189
|
+
index = int(
|
|
190
|
+
cruise_df.index[cruise_df["FILE_NAME"] == f"{file_name_stem}.raw"][
|
|
191
|
+
0
|
|
192
|
+
]
|
|
193
|
+
)
|
|
205
194
|
|
|
206
|
-
#
|
|
195
|
+
# Get input store
|
|
207
196
|
input_xr_zarr_store = zarr_manager.open_s3_zarr_store_with_xarray(
|
|
208
197
|
ship_name=ship_name,
|
|
209
198
|
cruise_name=cruise_name,
|
|
@@ -212,6 +201,10 @@ class ResampleRegrid:
|
|
|
212
201
|
input_bucket_name=bucket_name,
|
|
213
202
|
endpoint_url=endpoint_url,
|
|
214
203
|
)
|
|
204
|
+
|
|
205
|
+
# This is the horizontal offset of the measurement.
|
|
206
|
+
# See https://echopype.readthedocs.io/en/stable/data-proc-additional.html
|
|
207
|
+
water_level = input_xr_zarr_store.water_level.values
|
|
215
208
|
#########################################################################
|
|
216
209
|
# [3] Get needed indices
|
|
217
210
|
# Offset from start index to insert new data. Note that missing values are excluded.
|
|
@@ -225,14 +218,26 @@ class ResampleRegrid:
|
|
|
225
218
|
start_ping_time_index = ping_time_cumsum[index]
|
|
226
219
|
end_ping_time_index = ping_time_cumsum[index + 1]
|
|
227
220
|
|
|
228
|
-
min_echo_range = np.
|
|
229
|
-
|
|
221
|
+
min_echo_range = np.min(
|
|
222
|
+
(cruise_df["MIN_ECHO_RANGE"] + cruise_df["WATER_LEVEL"])
|
|
223
|
+
.dropna()
|
|
224
|
+
.astype(float)
|
|
225
|
+
)
|
|
226
|
+
max_echo_range = np.max(
|
|
227
|
+
(cruise_df["MAX_ECHO_RANGE"] + cruise_df["WATER_LEVEL"])
|
|
228
|
+
.dropna()
|
|
229
|
+
.astype(float)
|
|
230
|
+
)
|
|
231
|
+
cruise_min_epsilon = np.min(
|
|
232
|
+
cruise_df["MIN_ECHO_RANGE"].dropna().astype(float)
|
|
233
|
+
)
|
|
230
234
|
|
|
231
235
|
# Note: cruise dims (depth, time, frequency)
|
|
232
236
|
all_cruise_depth_values = zarr_manager.get_depth_values(
|
|
233
237
|
min_echo_range=min_echo_range,
|
|
234
|
-
max_echo_range=max_echo_range
|
|
235
|
-
|
|
238
|
+
max_echo_range=max_echo_range,
|
|
239
|
+
cruise_min_epsilon=cruise_min_epsilon, # remove this & integrate into min_echo_range
|
|
240
|
+
) # with offset of 7.5 meters, 0 meter measurement should now start at 7.5 meters
|
|
236
241
|
|
|
237
242
|
print(" ".join(list(input_xr_zarr_store.Sv.dims)))
|
|
238
243
|
if set(input_xr_zarr_store.Sv.dims) != {
|
|
@@ -265,34 +270,45 @@ class ResampleRegrid:
|
|
|
265
270
|
)
|
|
266
271
|
|
|
267
272
|
# --- UPDATING --- #
|
|
268
|
-
regrid_resample =
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
273
|
+
regrid_resample = (
|
|
274
|
+
self.interpolate_data( # TODO: need to add water_level here
|
|
275
|
+
input_xr=input_xr,
|
|
276
|
+
ping_times=ping_times,
|
|
277
|
+
all_cruise_depth_values=all_cruise_depth_values,
|
|
278
|
+
water_level=water_level,
|
|
279
|
+
)
|
|
272
280
|
)
|
|
273
281
|
|
|
274
|
-
print(
|
|
282
|
+
print(
|
|
283
|
+
f"start_ping_time_index: {start_ping_time_index}, end_ping_time_index: {end_ping_time_index}"
|
|
284
|
+
)
|
|
275
285
|
#########################################################################
|
|
276
286
|
# write Sv values to cruise-level-model-store
|
|
277
287
|
|
|
278
288
|
for fff in range(regrid_resample.shape[-1]):
|
|
279
|
-
output_zarr_store.Sv[
|
|
289
|
+
output_zarr_store.Sv[
|
|
290
|
+
:, start_ping_time_index:end_ping_time_index, fff
|
|
291
|
+
] = regrid_resample[:, :, fff]
|
|
280
292
|
#########################################################################
|
|
281
293
|
# TODO: add the "detected_seafloor_depth/" to the
|
|
282
294
|
# L2 cruise dataarrays
|
|
283
295
|
# TODO: make bottom optional
|
|
284
296
|
# TODO: Only checking the first channel for now. Need to average across all channels
|
|
285
297
|
# in the future. See https://github.com/CI-CMG/water-column-sonar-processing/issues/11
|
|
286
|
-
if
|
|
287
|
-
print(
|
|
298
|
+
if "detected_seafloor_depth" in input_xr.variables:
|
|
299
|
+
print("Found detected_seafloor_depth, adding data to output store.")
|
|
288
300
|
detected_seafloor_depth = input_xr.detected_seafloor_depth.values
|
|
289
|
-
detected_seafloor_depth[detected_seafloor_depth == 0.] = np.nan
|
|
301
|
+
detected_seafloor_depth[detected_seafloor_depth == 0.0] = np.nan
|
|
290
302
|
# TODO: problem here: Processing file: D20070711-T210709.
|
|
291
|
-
|
|
292
|
-
detected_seafloor_depths
|
|
303
|
+
|
|
304
|
+
detected_seafloor_depths = np.nanmean(
|
|
305
|
+
a=detected_seafloor_depth, axis=0
|
|
306
|
+
)
|
|
307
|
+
# RuntimeWarning: Mean of empty slice detected_seafloor_depths = np.nanmean(detected_seafloor_depth, 0)
|
|
308
|
+
detected_seafloor_depths[detected_seafloor_depths == 0.0] = np.nan
|
|
293
309
|
print(f"min depth measured: {np.nanmin(detected_seafloor_depths)}")
|
|
294
310
|
print(f"max depth measured: {np.nanmax(detected_seafloor_depths)}")
|
|
295
|
-
#available_indices = np.argwhere(np.isnan(geospatial['latitude'].values))
|
|
311
|
+
# available_indices = np.argwhere(np.isnan(geospatial['latitude'].values))
|
|
296
312
|
output_zarr_store.bottom[
|
|
297
313
|
start_ping_time_index:end_ping_time_index
|
|
298
314
|
] = detected_seafloor_depths
|
|
@@ -301,17 +317,19 @@ class ResampleRegrid:
|
|
|
301
317
|
# [5] write subset of latitude/longitude
|
|
302
318
|
output_zarr_store.latitude[
|
|
303
319
|
start_ping_time_index:end_ping_time_index
|
|
304
|
-
] = geospatial.dropna()[
|
|
320
|
+
] = geospatial.dropna()[
|
|
321
|
+
"latitude"
|
|
322
|
+
].values # TODO: get from ds_sv directly, dont need geojson anymore
|
|
305
323
|
output_zarr_store.longitude[
|
|
306
324
|
start_ping_time_index:end_ping_time_index
|
|
307
325
|
] = geospatial.dropna()["longitude"].values
|
|
308
326
|
#########################################################################
|
|
309
327
|
#########################################################################
|
|
310
328
|
except Exception as err:
|
|
311
|
-
print(f"Problem
|
|
329
|
+
print(f"Problem with resample_regrid: {err}")
|
|
312
330
|
raise err
|
|
313
331
|
finally:
|
|
314
|
-
print("
|
|
332
|
+
print("Exiting resample_regrid.")
|
|
315
333
|
# TODO: read across times and verify data was written?
|
|
316
334
|
|
|
317
335
|
#######################################################
|
|
@@ -1,6 +1,11 @@
|
|
|
1
1
|
from .elevation_manager import ElevationManager
|
|
2
2
|
from .geometry_manager import GeometryManager
|
|
3
|
-
from .
|
|
3
|
+
from .line_simplification import LineSimplification
|
|
4
4
|
from .pmtile_generation import PMTileGeneration
|
|
5
5
|
|
|
6
|
-
__all__ = [
|
|
6
|
+
__all__ = [
|
|
7
|
+
"ElevationManager",
|
|
8
|
+
"GeometryManager",
|
|
9
|
+
"LineSimplification",
|
|
10
|
+
"PMTileGeneration",
|
|
11
|
+
]
|
|
@@ -26,16 +26,15 @@ https://gis.ngdc.noaa.gov/arcgis/rest/services/DEM_mosaics/DEM_global_mosaic/Ima
|
|
|
26
26
|
"status": "OK"
|
|
27
27
|
}
|
|
28
28
|
"""
|
|
29
|
+
|
|
29
30
|
import json
|
|
30
31
|
import time
|
|
32
|
+
from collections.abc import Generator
|
|
31
33
|
|
|
32
34
|
import requests
|
|
33
|
-
from collections.abc import Generator
|
|
34
35
|
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
n: int
|
|
38
|
-
) -> Generator:
|
|
36
|
+
|
|
37
|
+
def chunked(ll: list, n: int) -> Generator:
|
|
39
38
|
# Yields successively n-sized chunks from ll.
|
|
40
39
|
for i in range(0, len(ll), n):
|
|
41
40
|
yield ll[i : i + n]
|
|
@@ -51,10 +50,10 @@ class ElevationManager:
|
|
|
51
50
|
|
|
52
51
|
#######################################################
|
|
53
52
|
def get_arcgis_elevation(
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
53
|
+
self,
|
|
54
|
+
lngs: list,
|
|
55
|
+
lats: list,
|
|
56
|
+
chunk_size: int = 500, # I think this is the api limit
|
|
58
57
|
) -> int:
|
|
59
58
|
# Reference: https://developers.arcgis.com/rest/services-reference/enterprise/map-to-image/
|
|
60
59
|
# Info: https://www.arcgis.com/home/item.html?id=c876e3c96a8642ab8557646a3b4fa0ff
|
|
@@ -62,7 +61,7 @@ class ElevationManager:
|
|
|
62
61
|
if len(lngs) != len(lats):
|
|
63
62
|
raise ValueError("lngs and lats must have same length")
|
|
64
63
|
|
|
65
|
-
geometryType = "esriGeometryMultipoint"
|
|
64
|
+
geometryType = "esriGeometryMultipoint" # TODO: allow single point?
|
|
66
65
|
|
|
67
66
|
depths = []
|
|
68
67
|
|
|
@@ -71,14 +70,14 @@ class ElevationManager:
|
|
|
71
70
|
time.sleep(0.1)
|
|
72
71
|
# order: (lng, lat)
|
|
73
72
|
geometry = f'{{"points":{str(chunk)}}}'
|
|
74
|
-
url=f
|
|
73
|
+
url = f"https://gis.ngdc.noaa.gov/arcgis/rest/services/DEM_mosaics/DEM_global_mosaic/ImageServer/identify?geometry={geometry}&geometryType={geometryType}&returnGeometry=false&returnCatalogItems=false&f=json"
|
|
75
74
|
result = requests.get(url, timeout=self.TIMOUT_SECONDS)
|
|
76
|
-
res = json.loads(result.content.decode(
|
|
77
|
-
if
|
|
78
|
-
for element in res[
|
|
79
|
-
depths.append(float(element[
|
|
80
|
-
elif
|
|
81
|
-
depths.append(float(res[
|
|
75
|
+
res = json.loads(result.content.decode("utf8"))
|
|
76
|
+
if "results" in res:
|
|
77
|
+
for element in res["results"]:
|
|
78
|
+
depths.append(float(element["value"]))
|
|
79
|
+
elif "value" in res:
|
|
80
|
+
depths.append(float(res["value"]))
|
|
82
81
|
|
|
83
82
|
return depths
|
|
84
83
|
|
|
@@ -8,17 +8,15 @@ import pandas as pd
|
|
|
8
8
|
from water_column_sonar_processing.aws import S3Manager
|
|
9
9
|
from water_column_sonar_processing.utility import Cleaner
|
|
10
10
|
|
|
11
|
-
|
|
12
|
-
//
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
// 7 0.0000001 0° 00′ 0.00036″ practical limit of commercial surveying 11.132 mm 10.247 mm 7.871 mm 4.3496 mm
|
|
21
|
-
"""
|
|
11
|
+
# // [Decimal / Places / Degrees / Object that can be recognized at scale / N/S or E/W at equator, E/W at 23N/S, E/W at 45N/S, E/W at 67N/S]
|
|
12
|
+
# // 0 1.0 1° 00′ 0″ country or large region 111.32 km 102.47 km 78.71 km 43.496 km
|
|
13
|
+
# // 1 0.1 0° 06′ 0″ large city or district 11.132 km 10.247 km 7.871 km 4.3496 km
|
|
14
|
+
# // 2 0.01 0° 00′ 36″ town or village 1.1132 km 1.0247 km 787.1 m 434.96 m
|
|
15
|
+
# // 3 0.001 0° 00′ 3.6″ neighborhood, street 111.32 m 102.47 m 78.71 m 43.496 m
|
|
16
|
+
# // 4 0.0001 0° 00′ 0.36″ individual street, land parcel 11.132 m 10.247 m 7.871 m 4.3496 m
|
|
17
|
+
# // 5 0.00001 0° 00′ 0.036″ individual trees, door entrance 1.1132 m 1.0247 m 787.1 mm 434.96 mm
|
|
18
|
+
# // 6 0.000001 0° 00′ 0.0036″ individual humans 111.32 mm 102.47 mm 78.71 mm 43.496 mm
|
|
19
|
+
# // 7 0.0000001 0° 00′ 0.00036″ practical limit of commercial surveying 11.132 mm 10.247 mm 7.871 mm 4.3496 mm
|
|
22
20
|
|
|
23
21
|
|
|
24
22
|
class GeometryManager:
|
|
@@ -62,9 +60,9 @@ class GeometryManager:
|
|
|
62
60
|
time1 = echodata.environment.time1.values
|
|
63
61
|
|
|
64
62
|
if len(nmea_times) < len(time1):
|
|
65
|
-
raise Exception(
|
|
63
|
+
raise Exception(
|
|
66
64
|
"Problem: Not enough NMEA times available to extrapolate time1."
|
|
67
|
-
)
|
|
65
|
+
) # TODO: explore this logic further...
|
|
68
66
|
|
|
69
67
|
# Align 'sv_times' to 'nmea_times'
|
|
70
68
|
if not (
|
|
@@ -131,7 +129,9 @@ class GeometryManager:
|
|
|
131
129
|
if write_geojson:
|
|
132
130
|
print("Creating local copy of geojson file.")
|
|
133
131
|
with open(geo_json_name, "w") as write_file:
|
|
134
|
-
write_file.write(
|
|
132
|
+
write_file.write(
|
|
133
|
+
geo_json_line
|
|
134
|
+
) # NOTE: this file can include zeros for lat lon
|
|
135
135
|
|
|
136
136
|
geo_json_prefix = (
|
|
137
137
|
f"spatial/geojson/{ship_name}/{cruise_name}/{sensor_name}"
|
|
@@ -141,11 +141,16 @@ class GeometryManager:
|
|
|
141
141
|
s3_manager = S3Manager(endpoint_url=endpoint_url)
|
|
142
142
|
geojson_object_exists = s3_manager.check_if_object_exists(
|
|
143
143
|
bucket_name=output_bucket_name,
|
|
144
|
-
key_name=f"{geo_json_prefix}/{geo_json_name}"
|
|
144
|
+
key_name=f"{geo_json_prefix}/{geo_json_name}",
|
|
145
145
|
)
|
|
146
146
|
if geojson_object_exists:
|
|
147
|
-
print(
|
|
148
|
-
|
|
147
|
+
print(
|
|
148
|
+
"GeoJSON already exists in s3, deleting existing and continuing."
|
|
149
|
+
)
|
|
150
|
+
s3_manager.delete_nodd_object(
|
|
151
|
+
bucket_name=output_bucket_name,
|
|
152
|
+
key_name=f"{geo_json_prefix}/{geo_json_name}",
|
|
153
|
+
)
|
|
149
154
|
|
|
150
155
|
print("Upload GeoJSON to s3.")
|
|
151
156
|
s3_manager.upload_nodd_file(
|
|
@@ -205,7 +210,6 @@ class GeometryManager:
|
|
|
205
210
|
sensor_name=sensor_name,
|
|
206
211
|
file_name_stem=file_name_stem,
|
|
207
212
|
output_bucket_name=output_bucket_name,
|
|
208
|
-
|
|
209
213
|
)
|
|
210
214
|
###
|
|
211
215
|
geospatial = geopandas.GeoDataFrame.from_features(
|
|
@@ -231,13 +235,9 @@ class GeometryManager:
|
|
|
231
235
|
|
|
232
236
|
############################################################################
|
|
233
237
|
# COMES from the raw-to-zarr conversion
|
|
234
|
-
def __write_geojson_to_file(
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
data
|
|
238
|
-
) -> None:
|
|
239
|
-
print('Writing GeoJSON to file.')
|
|
240
|
-
with open(os.path.join(store_name, 'geo.json'), "w") as outfile:
|
|
238
|
+
def __write_geojson_to_file(self, store_name, data) -> None:
|
|
239
|
+
print("Writing GeoJSON to file.")
|
|
240
|
+
with open(os.path.join(store_name, "geo.json"), "w") as outfile:
|
|
241
241
|
outfile.write(data)
|
|
242
242
|
|
|
243
243
|
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
# import json
|
|
2
|
+
import geopandas as gpd
|
|
3
|
+
import numpy as np
|
|
4
|
+
from pykalman import KalmanFilter
|
|
5
|
+
from shapely.geometry import Point
|
|
6
|
+
|
|
7
|
+
# import matplotlib.pyplot as plt
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
# lambda for timestamp in form "yyyy-MM-ddTHH:mm:ssZ"
|
|
11
|
+
# dt = lambda: datetime.now().isoformat(timespec="seconds") + "Z"
|
|
12
|
+
|
|
13
|
+
# TODO: get line for example HB1906 ...save linestring to array for testing
|
|
14
|
+
|
|
15
|
+
MAX_SPEED_KNOTS = 50
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
# Lambert's formula ==> better accuracy than haversinte
|
|
19
|
+
# Lambert's formula (the formula used by the calculators above) is the method used to calculate the shortest distance along the surface of an ellipsoid. When used to approximate the Earth and calculate the distance on the Earth surface, it has an accuracy on the order of 10 meters over thousands of kilometers, which is more precise than the haversine formula.
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def mph_to_knots(mph_value):
|
|
23
|
+
# 1 mile per hour === 0.868976 Knots
|
|
24
|
+
return mph_value * 0.868976
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
# https://shapely.readthedocs.io/en/stable/reference/shapely.MultiLineString.html#shapely.MultiLineString
|
|
28
|
+
class LineSimplification:
|
|
29
|
+
"""
|
|
30
|
+
// [Decimal / Places / Degrees / Object that can be recognized at scale / N/S or E/W at equator, E/W at 23N/S, E/W at 45N/S, E/W at 67N/S]
|
|
31
|
+
// 0 1.0 1° 00′ 0″ country or large region 111.32 km 102.47 km 78.71 km 43.496 km
|
|
32
|
+
// 1 0.1 0° 06′ 0″ large city or district 11.132 km 10.247 km 7.871 km 4.3496 km
|
|
33
|
+
// 2 0.01 0° 00′ 36″ town or village 1.1132 km 1.0247 km 787.1 m 434.96 m
|
|
34
|
+
// 3 0.001 0° 00′ 3.6″ neighborhood, street 111.32 m 102.47 m 78.71 m 43.496 m
|
|
35
|
+
// 4 0.0001 0° 00′ 0.36″ individual street, land parcel 11.132 m 10.247 m 7.871 m 4.3496 m
|
|
36
|
+
// 5 0.00001 0° 00′ 0.036″ individual trees, door entrance 1.1132 m 1.0247 m 787.1 mm 434.96 mm
|
|
37
|
+
// 6 0.000001 0° 00′ 0.0036″ individual humans 111.32 mm 102.47 mm 78.71 mm 43.496 mm
|
|
38
|
+
// 7 0.0000001 0° 00′ 0.00036″ practical limit of commercial surveying 11.132 mm 10.247 mm 7.871 mm 4.3496 mm
|
|
39
|
+
private static final int SRID = 8307;
|
|
40
|
+
private static final double simplificationTolerance = 0.0001;
|
|
41
|
+
private static final long splitGeometryMs = 900000L;
|
|
42
|
+
private static final int batchSize = 10000;
|
|
43
|
+
private static final int geoJsonPrecision = 5;
|
|
44
|
+
final int geoJsonPrecision = 5;
|
|
45
|
+
final double simplificationTolerance = 0.0001;
|
|
46
|
+
final int simplifierBatchSize = 3000;
|
|
47
|
+
final long maxCount = 0;
|
|
48
|
+
private static final double maxAllowedSpeedKnts = 60D;
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
# TODO: in the future move to standalone library
|
|
52
|
+
#######################################################
|
|
53
|
+
def __init__(
|
|
54
|
+
self,
|
|
55
|
+
):
|
|
56
|
+
pass
|
|
57
|
+
|
|
58
|
+
#######################################################
|
|
59
|
+
def kalman_filter(
|
|
60
|
+
self,
|
|
61
|
+
longitudes,
|
|
62
|
+
latitudes,
|
|
63
|
+
) -> (np.ndarray, np.ndarray):
|
|
64
|
+
"""
|
|
65
|
+
# TODO: need to use masked array to get the right number of values
|
|
66
|
+
"""
|
|
67
|
+
### https://github.com/pykalman/pykalman
|
|
68
|
+
# https://stackoverflow.com/questions/43377626/how-to-use-kalman-filter-in-python-for-location-data
|
|
69
|
+
measurements = np.asarray([list(elem) for elem in zip(longitudes, latitudes)])
|
|
70
|
+
initial_state_mean = [measurements[0, 0], 0, measurements[0, 1], 0]
|
|
71
|
+
transition_matrix = [[1, 1, 0, 0], [0, 1, 0, 0], [0, 0, 1, 1], [0, 0, 0, 1]]
|
|
72
|
+
observation_matrix = [[1, 0, 0, 0], [0, 0, 1, 0]]
|
|
73
|
+
|
|
74
|
+
kf = KalmanFilter(
|
|
75
|
+
transition_matrices=transition_matrix,
|
|
76
|
+
observation_matrices=observation_matrix,
|
|
77
|
+
initial_state_mean=initial_state_mean,
|
|
78
|
+
)
|
|
79
|
+
kf = kf.em(measurements, n_iter=2) # TODO: 5
|
|
80
|
+
(smoothed_state_means, smoothed_state_covariances) = kf.smooth(measurements)
|
|
81
|
+
|
|
82
|
+
# plt.plot(longitudes, latitudes, label="original")
|
|
83
|
+
# plt.plot(smoothed_state_means[:, 0], smoothed_state_means[:, 2], label="smoothed")
|
|
84
|
+
# plt.legend()
|
|
85
|
+
# plt.show()
|
|
86
|
+
|
|
87
|
+
return smoothed_state_means[:, [0, 2]]
|
|
88
|
+
|
|
89
|
+
#######################################################
|
|
90
|
+
def get_speeds(
|
|
91
|
+
self,
|
|
92
|
+
times: np.ndarray, # don't really need time, do need to segment the data first
|
|
93
|
+
latitudes: np.ndarray,
|
|
94
|
+
longitudes: np.ndarray,
|
|
95
|
+
) -> np.ndarray:
|
|
96
|
+
print(MAX_SPEED_KNOTS) # TODO: too high
|
|
97
|
+
print(times[0], latitudes[0], longitudes[0])
|
|
98
|
+
# TODO: distance/time ==> need to take position2 - position1 to get speed
|
|
99
|
+
|
|
100
|
+
# get distance difference
|
|
101
|
+
geom = [Point(xy) for xy in zip(longitudes, latitudes)]
|
|
102
|
+
points_df = gpd.GeoDataFrame({"geometry": geom}, crs="EPSG:4326")
|
|
103
|
+
# Conversion to UTM, a rectilinear projection coordinate system where distance can be calculated with pythagorean theorem
|
|
104
|
+
# an alternative could be to use EPSG 32663
|
|
105
|
+
points_df.to_crs(
|
|
106
|
+
epsg=3310, inplace=True
|
|
107
|
+
) # https://gis.stackexchange.com/questions/293310/finding-distance-between-two-points-with-geoseries-distance
|
|
108
|
+
distance_diffs = points_df.distance(points_df.shift())
|
|
109
|
+
# distance_diffs_sorted = distance_diffs.sort_values(
|
|
110
|
+
# ascending=False
|
|
111
|
+
# ) # TODO: get avg cutoff time
|
|
112
|
+
#
|
|
113
|
+
time_diffs_ns = np.append(0, (times[1:] - times[:-1]).astype(int))
|
|
114
|
+
# time_diffs_ns_sorted = np.sort(time_diffs_ns)
|
|
115
|
+
# largest time diffs HB0707 [ 17. 17.93749786 21.0781271 54.82812723 85.09374797, 113.56249805 204.87500006 216. 440.68749798 544.81249818]
|
|
116
|
+
# largest diffs HB1906 [3.01015808e+00 3.01016013e+00 3.01017805e+00 3.01018701e+00, 3.01018701e+00 3.01018906e+00 3.01019802e+00 3.01021005e+00, 3.01021005e+00 3.01021414e+00 3.01022208e+00 3.01022899e+00, 3.01024998e+00 3.01025920e+00 3.01026202e+00 3.01028096e+00, 3.01119411e+00 3.01120896e+00 3.01120998e+00 3.01120998e+00, 3.01122099e+00 3.01122790e+00 3.01122790e+00 3.01124506e+00, 3.01125197e+00 3.01128090e+00 3.01142707e+00 3.01219814e+00, 3.01221120e+00 3.01223014e+00 3.01225498e+00 3.01225882e+00, 3.01226010e+00 3.01312998e+00 3.01316096e+00 3.01321190e+00, 3.01321293e+00 3.01322880e+00 3.01322906e+00 3.01323110e+00, 3.01323213e+00 3.01323290e+00 3.01326208e+00 3.01328512e+00, 3.01418112e+00 3.01420109e+00 3.01421107e+00 3.01421184e+00, 3.01421414e+00 3.01424819e+00 3.01512883e+00 3.01516006e+00, 3.01524198e+00 3.01619917e+00 3.01623194e+00 3.01623296e+00, 3.01917594e+00 3.01921408e+00 3.01921587e+00 3.02022195e+00, 3.02025216e+00 3.02121702e+00 3.02325811e+00 3.02410291e+00, 3.02421914e+00 3.02426701e+00 3.02523776e+00 3.02718694e+00, 3.02927590e+00 3.03621606e+00 3.03826304e+00 3.34047514e+00, 3.36345114e+00 3.39148595e+00 4.36819302e+00 4.50157901e+00, 4.50315699e+00 4.50330598e+00 4.50333491e+00 4.50428416e+00, 4.50430490e+00 4.50430694e+00 4.50526387e+00 4.50530790e+00, 4.50530995e+00 4.50532301e+00 4.50533478e+00 4.50629402e+00, 4.50730701e+00 4.50825882e+00 4.50939008e+00 6.50179098e+00, 2.25025029e+01 1.39939425e+02 1.54452331e+02 1.60632653e+03, 1.74574667e+05 4.33569587e+05 4.35150475e+05 8.00044883e+05]
|
|
117
|
+
nanoseconds_per_second = 1e9
|
|
118
|
+
speed_meters_per_second = (
|
|
119
|
+
distance_diffs / time_diffs_ns * nanoseconds_per_second
|
|
120
|
+
)
|
|
121
|
+
# returns the speed in meters per second #TODO: get speed in knots
|
|
122
|
+
return speed_meters_per_second.to_numpy(dtype="float32") # includes nan
|
|
123
|
+
|
|
124
|
+
def remove_null_island_values(
|
|
125
|
+
self,
|
|
126
|
+
epsilon=1e-5,
|
|
127
|
+
) -> None:
|
|
128
|
+
# TODO: low priority
|
|
129
|
+
print(epsilon)
|
|
130
|
+
pass
|
|
131
|
+
|
|
132
|
+
def break_linestring_into_multi_linestring(
|
|
133
|
+
self,
|
|
134
|
+
) -> None:
|
|
135
|
+
# TODO: medium priority
|
|
136
|
+
# For any line-strings across the antimeridian, break into multilinestring
|
|
137
|
+
# average cadence is measurements every 1 second
|
|
138
|
+
# break when over 1 minute
|
|
139
|
+
pass
|
|
140
|
+
|
|
141
|
+
def simplify(
|
|
142
|
+
self,
|
|
143
|
+
) -> None:
|
|
144
|
+
# TODO: medium-high priority
|
|
145
|
+
pass
|
|
146
|
+
|
|
147
|
+
#######################################################
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
###########################################################
|