water-column-sonar-processing 25.1.6__py3-none-any.whl → 25.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of water-column-sonar-processing might be problematic. Click here for more details.

Files changed (26) hide show
  1. water_column_sonar_processing/aws/dynamodb_manager.py +27 -32
  2. water_column_sonar_processing/aws/s3_manager.py +52 -64
  3. water_column_sonar_processing/aws/s3fs_manager.py +3 -9
  4. water_column_sonar_processing/cruise/create_empty_zarr_store.py +14 -14
  5. water_column_sonar_processing/cruise/datatree_manager.py +3 -6
  6. water_column_sonar_processing/cruise/resample_regrid.py +67 -49
  7. water_column_sonar_processing/geometry/__init__.py +7 -2
  8. water_column_sonar_processing/geometry/elevation_manager.py +16 -17
  9. water_column_sonar_processing/geometry/geometry_manager.py +25 -25
  10. water_column_sonar_processing/geometry/line_simplification.py +150 -0
  11. water_column_sonar_processing/geometry/pmtile_generation.py +99 -64
  12. water_column_sonar_processing/index/index_manager.py +67 -32
  13. water_column_sonar_processing/model/zarr_manager.py +32 -21
  14. water_column_sonar_processing/process.py +15 -13
  15. water_column_sonar_processing/processing/__init__.py +2 -2
  16. water_column_sonar_processing/processing/batch_downloader.py +66 -41
  17. water_column_sonar_processing/processing/raw_to_zarr.py +121 -82
  18. water_column_sonar_processing/utility/constants.py +11 -1
  19. water_column_sonar_processing/utility/pipeline_status.py +11 -15
  20. {water_column_sonar_processing-25.1.6.dist-info → water_column_sonar_processing-25.3.0.dist-info}/METADATA +21 -12
  21. water_column_sonar_processing-25.3.0.dist-info/RECORD +34 -0
  22. {water_column_sonar_processing-25.1.6.dist-info → water_column_sonar_processing-25.3.0.dist-info}/WHEEL +1 -1
  23. water_column_sonar_processing/geometry/geometry_simplification.py +0 -82
  24. water_column_sonar_processing-25.1.6.dist-info/RECORD +0 -34
  25. {water_column_sonar_processing-25.1.6.dist-info → water_column_sonar_processing-25.3.0.dist-info/licenses}/LICENSE +0 -0
  26. {water_column_sonar_processing-25.1.6.dist-info → water_column_sonar_processing-25.3.0.dist-info}/top_level.txt +0 -0
@@ -35,6 +35,7 @@ class ResampleRegrid:
35
35
  input_xr,
36
36
  ping_times,
37
37
  all_cruise_depth_values,
38
+ water_level,
38
39
  ) -> np.ndarray:
39
40
  print("Interpolating data.")
40
41
  try:
@@ -53,7 +54,7 @@ class ResampleRegrid:
53
54
  data=data,
54
55
  dims=("depth", "time", "frequency"),
55
56
  coords={
56
- "depth": all_cruise_depth_values,
57
+ "depth": all_cruise_depth_values, # TODO: these should be on interval from 7.7 meters to 507 meters
57
58
  "time": ping_times,
58
59
  "frequency": input_xr.frequency_nominal.values,
59
60
  },
@@ -62,34 +63,19 @@ class ResampleRegrid:
62
63
  channels = input_xr.channel.values
63
64
  for channel in range(
64
65
  len(channels)
65
- ): # TODO: leaving off here, need to subset for just indices in time axis
66
+ ): # ?TODO: leaving off here, need to subset for just indices in time axis
66
67
  gc.collect()
67
- print(
68
- np.nanmax(
69
- input_xr.echo_range.sel(
70
- channel=input_xr.channel[channel]
71
- ).values
72
- )
73
- )
74
- #
75
68
  max_depths = np.nanmax(
76
- a=input_xr.echo_range.sel(channel=input_xr.channel[channel]).values,
69
+ a=input_xr.echo_range.sel(channel=input_xr.channel[channel]).values
70
+ + water_level,
77
71
  axis=1,
78
72
  )
79
- superset_of_max_depths = set(
80
- np.nanmax(
81
- input_xr.echo_range.sel(
82
- channel=input_xr.channel[channel]
83
- ).values,
84
- 1,
85
- )
86
- )
73
+ superset_of_max_depths = set(max_depths)
87
74
  set_of_max_depths = list(
88
75
  {x for x in superset_of_max_depths if x == x}
89
76
  ) # removes nan's
90
77
  # iterate through partitions of data with similar depths and resample
91
78
  for select_max_depth in set_of_max_depths:
92
- gc.collect()
93
79
  # TODO: for nan just skip and leave all nan's
94
80
  select_indices = [
95
81
  i
@@ -132,6 +118,7 @@ class ResampleRegrid:
132
118
  )
133
119
  ] = resampled
134
120
  print(f"updated {len(times_select)} ping times")
121
+ gc.collect()
135
122
  except Exception as err:
136
123
  print(f"Problem finding the dynamodb table: {err}")
137
124
  raise err
@@ -146,9 +133,9 @@ class ResampleRegrid:
146
133
  sensor_name,
147
134
  table_name,
148
135
  # TODO: file_name?,
149
- bucket_name, # TODO: this is the same bucket
136
+ bucket_name, # TODO: this is the same bucket
150
137
  override_select_files=None,
151
- endpoint_url=None
138
+ endpoint_url=None,
152
139
  ) -> None:
153
140
  """
154
141
  The goal here is to interpolate the data against the depth values already populated
@@ -172,9 +159,9 @@ class ResampleRegrid:
172
159
  # get dynamo stuff
173
160
  dynamo_db_manager = DynamoDBManager()
174
161
  cruise_df = dynamo_db_manager.get_table_as_df(
175
- ship_name=ship_name,
162
+ # ship_name=ship_name,
176
163
  cruise_name=cruise_name,
177
- sensor_name=sensor_name,
164
+ # sensor_name=sensor_name,
178
165
  table_name=table_name,
179
166
  )
180
167
 
@@ -191,19 +178,21 @@ class ResampleRegrid:
191
178
  file_name_stem = Path(file_name).stem
192
179
  print(f"Processing file: {file_name_stem}.")
193
180
 
194
- if f"{file_name_stem}.raw" not in list(cruise_df['FILE_NAME']):
195
- raise Exception(f"Raw file file_stem not found in dynamodb.")
181
+ if f"{file_name_stem}.raw" not in list(cruise_df["FILE_NAME"]):
182
+ raise Exception("Raw file file_stem not found in dynamodb.")
196
183
 
197
184
  # status = PipelineStatus['LEVEL_1_PROCESSING']
198
185
  # TODO: filter rows by enum success, filter the dataframe just for enums >= LEVEL_1_PROCESSING
199
186
  # df[df['PIPELINE_STATUS'] < PipelineStatus.LEVEL_1_PROCESSING] = np.nan
200
187
 
201
188
  # Get index from all cruise files. Note: should be based on which are included in cruise.
202
- index = int(cruise_df.index[
203
- cruise_df["FILE_NAME"] == f"{file_name_stem}.raw"
204
- ][0])
189
+ index = int(
190
+ cruise_df.index[cruise_df["FILE_NAME"] == f"{file_name_stem}.raw"][
191
+ 0
192
+ ]
193
+ )
205
194
 
206
- # get input store
195
+ # Get input store
207
196
  input_xr_zarr_store = zarr_manager.open_s3_zarr_store_with_xarray(
208
197
  ship_name=ship_name,
209
198
  cruise_name=cruise_name,
@@ -212,6 +201,10 @@ class ResampleRegrid:
212
201
  input_bucket_name=bucket_name,
213
202
  endpoint_url=endpoint_url,
214
203
  )
204
+
205
+ # This is the horizontal offset of the measurement.
206
+ # See https://echopype.readthedocs.io/en/stable/data-proc-additional.html
207
+ water_level = input_xr_zarr_store.water_level.values
215
208
  #########################################################################
216
209
  # [3] Get needed indices
217
210
  # Offset from start index to insert new data. Note that missing values are excluded.
@@ -225,14 +218,26 @@ class ResampleRegrid:
225
218
  start_ping_time_index = ping_time_cumsum[index]
226
219
  end_ping_time_index = ping_time_cumsum[index + 1]
227
220
 
228
- min_echo_range = np.nanmin(np.float32(cruise_df["MIN_ECHO_RANGE"]))
229
- max_echo_range = np.nanmax(np.float32(cruise_df["MAX_ECHO_RANGE"]))
221
+ min_echo_range = np.min(
222
+ (cruise_df["MIN_ECHO_RANGE"] + cruise_df["WATER_LEVEL"])
223
+ .dropna()
224
+ .astype(float)
225
+ )
226
+ max_echo_range = np.max(
227
+ (cruise_df["MAX_ECHO_RANGE"] + cruise_df["WATER_LEVEL"])
228
+ .dropna()
229
+ .astype(float)
230
+ )
231
+ cruise_min_epsilon = np.min(
232
+ cruise_df["MIN_ECHO_RANGE"].dropna().astype(float)
233
+ )
230
234
 
231
235
  # Note: cruise dims (depth, time, frequency)
232
236
  all_cruise_depth_values = zarr_manager.get_depth_values(
233
237
  min_echo_range=min_echo_range,
234
- max_echo_range=max_echo_range
235
- )
238
+ max_echo_range=max_echo_range,
239
+ cruise_min_epsilon=cruise_min_epsilon, # remove this & integrate into min_echo_range
240
+ ) # with offset of 7.5 meters, 0 meter measurement should now start at 7.5 meters
236
241
 
237
242
  print(" ".join(list(input_xr_zarr_store.Sv.dims)))
238
243
  if set(input_xr_zarr_store.Sv.dims) != {
@@ -265,34 +270,45 @@ class ResampleRegrid:
265
270
  )
266
271
 
267
272
  # --- UPDATING --- #
268
- regrid_resample = self.interpolate_data(
269
- input_xr=input_xr,
270
- ping_times=ping_times,
271
- all_cruise_depth_values=all_cruise_depth_values,
273
+ regrid_resample = (
274
+ self.interpolate_data( # TODO: need to add water_level here
275
+ input_xr=input_xr,
276
+ ping_times=ping_times,
277
+ all_cruise_depth_values=all_cruise_depth_values,
278
+ water_level=water_level,
279
+ )
272
280
  )
273
281
 
274
- print(f"start_ping_time_index: {start_ping_time_index}, end_ping_time_index: {end_ping_time_index}")
282
+ print(
283
+ f"start_ping_time_index: {start_ping_time_index}, end_ping_time_index: {end_ping_time_index}"
284
+ )
275
285
  #########################################################################
276
286
  # write Sv values to cruise-level-model-store
277
287
 
278
288
  for fff in range(regrid_resample.shape[-1]):
279
- output_zarr_store.Sv[:, start_ping_time_index:end_ping_time_index, fff] = regrid_resample[:, :, fff]
289
+ output_zarr_store.Sv[
290
+ :, start_ping_time_index:end_ping_time_index, fff
291
+ ] = regrid_resample[:, :, fff]
280
292
  #########################################################################
281
293
  # TODO: add the "detected_seafloor_depth/" to the
282
294
  # L2 cruise dataarrays
283
295
  # TODO: make bottom optional
284
296
  # TODO: Only checking the first channel for now. Need to average across all channels
285
297
  # in the future. See https://github.com/CI-CMG/water-column-sonar-processing/issues/11
286
- if 'detected_seafloor_depth' in input_xr.variables:
287
- print('Found detected_seafloor_depth, adding data to output store.')
298
+ if "detected_seafloor_depth" in input_xr.variables:
299
+ print("Found detected_seafloor_depth, adding data to output store.")
288
300
  detected_seafloor_depth = input_xr.detected_seafloor_depth.values
289
- detected_seafloor_depth[detected_seafloor_depth == 0.] = np.nan
301
+ detected_seafloor_depth[detected_seafloor_depth == 0.0] = np.nan
290
302
  # TODO: problem here: Processing file: D20070711-T210709.
291
- detected_seafloor_depths = np.nanmean(detected_seafloor_depth, 0) # RuntimeWarning: Mean of empty slice detected_seafloor_depths = np.nanmean(detected_seafloor_depth, 0)
292
- detected_seafloor_depths[detected_seafloor_depths == 0.] = np.nan
303
+
304
+ detected_seafloor_depths = np.nanmean(
305
+ a=detected_seafloor_depth, axis=0
306
+ )
307
+ # RuntimeWarning: Mean of empty slice detected_seafloor_depths = np.nanmean(detected_seafloor_depth, 0)
308
+ detected_seafloor_depths[detected_seafloor_depths == 0.0] = np.nan
293
309
  print(f"min depth measured: {np.nanmin(detected_seafloor_depths)}")
294
310
  print(f"max depth measured: {np.nanmax(detected_seafloor_depths)}")
295
- #available_indices = np.argwhere(np.isnan(geospatial['latitude'].values))
311
+ # available_indices = np.argwhere(np.isnan(geospatial['latitude'].values))
296
312
  output_zarr_store.bottom[
297
313
  start_ping_time_index:end_ping_time_index
298
314
  ] = detected_seafloor_depths
@@ -301,17 +317,19 @@ class ResampleRegrid:
301
317
  # [5] write subset of latitude/longitude
302
318
  output_zarr_store.latitude[
303
319
  start_ping_time_index:end_ping_time_index
304
- ] = geospatial.dropna()["latitude"].values # TODO: get from ds_sv directly, dont need geojson anymore
320
+ ] = geospatial.dropna()[
321
+ "latitude"
322
+ ].values # TODO: get from ds_sv directly, dont need geojson anymore
305
323
  output_zarr_store.longitude[
306
324
  start_ping_time_index:end_ping_time_index
307
325
  ] = geospatial.dropna()["longitude"].values
308
326
  #########################################################################
309
327
  #########################################################################
310
328
  except Exception as err:
311
- print(f"Problem interpolating the data: {err}")
329
+ print(f"Problem with resample_regrid: {err}")
312
330
  raise err
313
331
  finally:
314
- print("Done interpolating data.")
332
+ print("Exiting resample_regrid.")
315
333
  # TODO: read across times and verify data was written?
316
334
 
317
335
  #######################################################
@@ -1,6 +1,11 @@
1
1
  from .elevation_manager import ElevationManager
2
2
  from .geometry_manager import GeometryManager
3
- from .geometry_simplification import GeometrySimplification
3
+ from .line_simplification import LineSimplification
4
4
  from .pmtile_generation import PMTileGeneration
5
5
 
6
- __all__ = ["ElevationManager", "GeometryManager", "GeometrySimplification", "PMTileGeneration"]
6
+ __all__ = [
7
+ "ElevationManager",
8
+ "GeometryManager",
9
+ "LineSimplification",
10
+ "PMTileGeneration",
11
+ ]
@@ -26,16 +26,15 @@ https://gis.ngdc.noaa.gov/arcgis/rest/services/DEM_mosaics/DEM_global_mosaic/Ima
26
26
  "status": "OK"
27
27
  }
28
28
  """
29
+
29
30
  import json
30
31
  import time
32
+ from collections.abc import Generator
31
33
 
32
34
  import requests
33
- from collections.abc import Generator
34
35
 
35
- def chunked(
36
- ll: list,
37
- n: int
38
- ) -> Generator:
36
+
37
+ def chunked(ll: list, n: int) -> Generator:
39
38
  # Yields successively n-sized chunks from ll.
40
39
  for i in range(0, len(ll), n):
41
40
  yield ll[i : i + n]
@@ -51,10 +50,10 @@ class ElevationManager:
51
50
 
52
51
  #######################################################
53
52
  def get_arcgis_elevation(
54
- self,
55
- lngs: list,
56
- lats: list,
57
- chunk_size: int=500, # I think this is the api limit
53
+ self,
54
+ lngs: list,
55
+ lats: list,
56
+ chunk_size: int = 500, # I think this is the api limit
58
57
  ) -> int:
59
58
  # Reference: https://developers.arcgis.com/rest/services-reference/enterprise/map-to-image/
60
59
  # Info: https://www.arcgis.com/home/item.html?id=c876e3c96a8642ab8557646a3b4fa0ff
@@ -62,7 +61,7 @@ class ElevationManager:
62
61
  if len(lngs) != len(lats):
63
62
  raise ValueError("lngs and lats must have same length")
64
63
 
65
- geometryType = "esriGeometryMultipoint" # TODO: allow single point?
64
+ geometryType = "esriGeometryMultipoint" # TODO: allow single point?
66
65
 
67
66
  depths = []
68
67
 
@@ -71,14 +70,14 @@ class ElevationManager:
71
70
  time.sleep(0.1)
72
71
  # order: (lng, lat)
73
72
  geometry = f'{{"points":{str(chunk)}}}'
74
- url=f'https://gis.ngdc.noaa.gov/arcgis/rest/services/DEM_mosaics/DEM_global_mosaic/ImageServer/identify?geometry={geometry}&geometryType={geometryType}&returnGeometry=false&returnCatalogItems=false&f=json'
73
+ url = f"https://gis.ngdc.noaa.gov/arcgis/rest/services/DEM_mosaics/DEM_global_mosaic/ImageServer/identify?geometry={geometry}&geometryType={geometryType}&returnGeometry=false&returnCatalogItems=false&f=json"
75
74
  result = requests.get(url, timeout=self.TIMOUT_SECONDS)
76
- res = json.loads(result.content.decode('utf8'))
77
- if 'results' in res:
78
- for element in res['results']:
79
- depths.append(float(element['value']))
80
- elif 'value' in res:
81
- depths.append(float(res['value']))
75
+ res = json.loads(result.content.decode("utf8"))
76
+ if "results" in res:
77
+ for element in res["results"]:
78
+ depths.append(float(element["value"]))
79
+ elif "value" in res:
80
+ depths.append(float(res["value"]))
82
81
 
83
82
  return depths
84
83
 
@@ -8,17 +8,15 @@ import pandas as pd
8
8
  from water_column_sonar_processing.aws import S3Manager
9
9
  from water_column_sonar_processing.utility import Cleaner
10
10
 
11
- """
12
- // [Decimal / Places / Degrees / Object that can be recognized at scale / N/S or E/W at equator, E/W at 23N/S, E/W at 45N/S, E/W at 67N/S]
13
- // 0 1.0 1° 00′ 0″ country or large region 111.32 km 102.47 km 78.71 km 43.496 km
14
- // 1 0.1 060 large city or district 11.132 km 10.247 km 7.871 km 4.3496 km
15
- // 2 0.01 0° 00′ 36 town or village 1.1132 km 1.0247 km 787.1 m 434.96 m
16
- // 3 0.001 0° 00′ 3.6 neighborhood, street 111.32 m 102.47 m 78.71 m 43.496 m
17
- // 4 0.0001 0° 00′ 0.36 individual street, land parcel 11.132 m 10.247 m 7.871 m 4.3496 m
18
- // 5 0.00001 0° 00′ 0.036 individual trees, door entrance 1.1132 m 1.0247 m 787.1 mm 434.96 mm
19
- // 6 0.000001 0° 00′ 0.0036 individual humans 111.32 mm 102.47 mm 78.71 mm 43.496 mm
20
- // 7 0.0000001 0° 00′ 0.00036″ practical limit of commercial surveying 11.132 mm 10.247 mm 7.871 mm 4.3496 mm
21
- """
11
+ # // [Decimal / Places / Degrees / Object that can be recognized at scale / N/S or E/W at equator, E/W at 23N/S, E/W at 45N/S, E/W at 67N/S]
12
+ # // 0 1.0 1° 00′ 0″ country or large region 111.32 km 102.47 km 78.71 km 43.496 km
13
+ # // 1 0.1 06′ 0″ large city or district 11.132 km 10.247 km 7.871 km 4.3496 km
14
+ # // 2 0.01 0036 town or village 1.1132 km 1.0247 km 787.1 m 434.96 m
15
+ # // 3 0.001 0° 00′ 3.6 neighborhood, street 111.32 m 102.47 m 78.71 m 43.496 m
16
+ # // 4 0.0001 0° 00′ 0.36 individual street, land parcel 11.132 m 10.247 m 7.871 m 4.3496 m
17
+ # // 5 0.00001 0° 00′ 0.036 individual trees, door entrance 1.1132 m 1.0247 m 787.1 mm 434.96 mm
18
+ # // 6 0.000001 0° 00′ 0.0036 individual humans 111.32 mm 102.47 mm 78.71 mm 43.496 mm
19
+ # // 7 0.0000001 0° 00′ 0.00036 practical limit of commercial surveying 11.132 mm 10.247 mm 7.871 mm 4.3496 mm
22
20
 
23
21
 
24
22
  class GeometryManager:
@@ -62,9 +60,9 @@ class GeometryManager:
62
60
  time1 = echodata.environment.time1.values
63
61
 
64
62
  if len(nmea_times) < len(time1):
65
- raise Exception( # TODO: explore this logic further...
63
+ raise Exception(
66
64
  "Problem: Not enough NMEA times available to extrapolate time1."
67
- )
65
+ ) # TODO: explore this logic further...
68
66
 
69
67
  # Align 'sv_times' to 'nmea_times'
70
68
  if not (
@@ -131,7 +129,9 @@ class GeometryManager:
131
129
  if write_geojson:
132
130
  print("Creating local copy of geojson file.")
133
131
  with open(geo_json_name, "w") as write_file:
134
- write_file.write(geo_json_line) # NOTE: this file can include zeros for lat lon
132
+ write_file.write(
133
+ geo_json_line
134
+ ) # NOTE: this file can include zeros for lat lon
135
135
 
136
136
  geo_json_prefix = (
137
137
  f"spatial/geojson/{ship_name}/{cruise_name}/{sensor_name}"
@@ -141,11 +141,16 @@ class GeometryManager:
141
141
  s3_manager = S3Manager(endpoint_url=endpoint_url)
142
142
  geojson_object_exists = s3_manager.check_if_object_exists(
143
143
  bucket_name=output_bucket_name,
144
- key_name=f"{geo_json_prefix}/{geo_json_name}"
144
+ key_name=f"{geo_json_prefix}/{geo_json_name}",
145
145
  )
146
146
  if geojson_object_exists:
147
- print("GeoJSON already exists in s3, deleting existing and continuing.")
148
- s3_manager.delete_nodd_object(bucket_name=output_bucket_name, key_name=f"{geo_json_prefix}/{geo_json_name}")
147
+ print(
148
+ "GeoJSON already exists in s3, deleting existing and continuing."
149
+ )
150
+ s3_manager.delete_nodd_object(
151
+ bucket_name=output_bucket_name,
152
+ key_name=f"{geo_json_prefix}/{geo_json_name}",
153
+ )
149
154
 
150
155
  print("Upload GeoJSON to s3.")
151
156
  s3_manager.upload_nodd_file(
@@ -205,7 +210,6 @@ class GeometryManager:
205
210
  sensor_name=sensor_name,
206
211
  file_name_stem=file_name_stem,
207
212
  output_bucket_name=output_bucket_name,
208
-
209
213
  )
210
214
  ###
211
215
  geospatial = geopandas.GeoDataFrame.from_features(
@@ -231,13 +235,9 @@ class GeometryManager:
231
235
 
232
236
  ############################################################################
233
237
  # COMES from the raw-to-zarr conversion
234
- def __write_geojson_to_file(
235
- self,
236
- store_name,
237
- data
238
- ) -> None:
239
- print('Writing GeoJSON to file.')
240
- with open(os.path.join(store_name, 'geo.json'), "w") as outfile:
238
+ def __write_geojson_to_file(self, store_name, data) -> None:
239
+ print("Writing GeoJSON to file.")
240
+ with open(os.path.join(store_name, "geo.json"), "w") as outfile:
241
241
  outfile.write(data)
242
242
 
243
243
 
@@ -0,0 +1,150 @@
1
+ # import json
2
+ import geopandas as gpd
3
+ import numpy as np
4
+ from pykalman import KalmanFilter
5
+ from shapely.geometry import Point
6
+
7
+ # import matplotlib.pyplot as plt
8
+
9
+
10
+ # lambda for timestamp in form "yyyy-MM-ddTHH:mm:ssZ"
11
+ # dt = lambda: datetime.now().isoformat(timespec="seconds") + "Z"
12
+
13
+ # TODO: get line for example HB1906 ...save linestring to array for testing
14
+
15
+ MAX_SPEED_KNOTS = 50
16
+
17
+
18
+ # Lambert's formula ==> better accuracy than haversinte
19
+ # Lambert's formula (the formula used by the calculators above) is the method used to calculate the shortest distance along the surface of an ellipsoid. When used to approximate the Earth and calculate the distance on the Earth surface, it has an accuracy on the order of 10 meters over thousands of kilometers, which is more precise than the haversine formula.
20
+
21
+
22
+ def mph_to_knots(mph_value):
23
+ # 1 mile per hour === 0.868976 Knots
24
+ return mph_value * 0.868976
25
+
26
+
27
+ # https://shapely.readthedocs.io/en/stable/reference/shapely.MultiLineString.html#shapely.MultiLineString
28
+ class LineSimplification:
29
+ """
30
+ // [Decimal / Places / Degrees / Object that can be recognized at scale / N/S or E/W at equator, E/W at 23N/S, E/W at 45N/S, E/W at 67N/S]
31
+ // 0 1.0 1° 00′ 0″ country or large region 111.32 km 102.47 km 78.71 km 43.496 km
32
+ // 1 0.1 0° 06′ 0″ large city or district 11.132 km 10.247 km 7.871 km 4.3496 km
33
+ // 2 0.01 0° 00′ 36″ town or village 1.1132 km 1.0247 km 787.1 m 434.96 m
34
+ // 3 0.001 0° 00′ 3.6″ neighborhood, street 111.32 m 102.47 m 78.71 m 43.496 m
35
+ // 4 0.0001 0° 00′ 0.36″ individual street, land parcel 11.132 m 10.247 m 7.871 m 4.3496 m
36
+ // 5 0.00001 0° 00′ 0.036″ individual trees, door entrance 1.1132 m 1.0247 m 787.1 mm 434.96 mm
37
+ // 6 0.000001 0° 00′ 0.0036″ individual humans 111.32 mm 102.47 mm 78.71 mm 43.496 mm
38
+ // 7 0.0000001 0° 00′ 0.00036″ practical limit of commercial surveying 11.132 mm 10.247 mm 7.871 mm 4.3496 mm
39
+ private static final int SRID = 8307;
40
+ private static final double simplificationTolerance = 0.0001;
41
+ private static final long splitGeometryMs = 900000L;
42
+ private static final int batchSize = 10000;
43
+ private static final int geoJsonPrecision = 5;
44
+ final int geoJsonPrecision = 5;
45
+ final double simplificationTolerance = 0.0001;
46
+ final int simplifierBatchSize = 3000;
47
+ final long maxCount = 0;
48
+ private static final double maxAllowedSpeedKnts = 60D;
49
+ """
50
+
51
+ # TODO: in the future move to standalone library
52
+ #######################################################
53
+ def __init__(
54
+ self,
55
+ ):
56
+ pass
57
+
58
+ #######################################################
59
+ def kalman_filter(
60
+ self,
61
+ longitudes,
62
+ latitudes,
63
+ ) -> (np.ndarray, np.ndarray):
64
+ """
65
+ # TODO: need to use masked array to get the right number of values
66
+ """
67
+ ### https://github.com/pykalman/pykalman
68
+ # https://stackoverflow.com/questions/43377626/how-to-use-kalman-filter-in-python-for-location-data
69
+ measurements = np.asarray([list(elem) for elem in zip(longitudes, latitudes)])
70
+ initial_state_mean = [measurements[0, 0], 0, measurements[0, 1], 0]
71
+ transition_matrix = [[1, 1, 0, 0], [0, 1, 0, 0], [0, 0, 1, 1], [0, 0, 0, 1]]
72
+ observation_matrix = [[1, 0, 0, 0], [0, 0, 1, 0]]
73
+
74
+ kf = KalmanFilter(
75
+ transition_matrices=transition_matrix,
76
+ observation_matrices=observation_matrix,
77
+ initial_state_mean=initial_state_mean,
78
+ )
79
+ kf = kf.em(measurements, n_iter=2) # TODO: 5
80
+ (smoothed_state_means, smoothed_state_covariances) = kf.smooth(measurements)
81
+
82
+ # plt.plot(longitudes, latitudes, label="original")
83
+ # plt.plot(smoothed_state_means[:, 0], smoothed_state_means[:, 2], label="smoothed")
84
+ # plt.legend()
85
+ # plt.show()
86
+
87
+ return smoothed_state_means[:, [0, 2]]
88
+
89
+ #######################################################
90
+ def get_speeds(
91
+ self,
92
+ times: np.ndarray, # don't really need time, do need to segment the data first
93
+ latitudes: np.ndarray,
94
+ longitudes: np.ndarray,
95
+ ) -> np.ndarray:
96
+ print(MAX_SPEED_KNOTS) # TODO: too high
97
+ print(times[0], latitudes[0], longitudes[0])
98
+ # TODO: distance/time ==> need to take position2 - position1 to get speed
99
+
100
+ # get distance difference
101
+ geom = [Point(xy) for xy in zip(longitudes, latitudes)]
102
+ points_df = gpd.GeoDataFrame({"geometry": geom}, crs="EPSG:4326")
103
+ # Conversion to UTM, a rectilinear projection coordinate system where distance can be calculated with pythagorean theorem
104
+ # an alternative could be to use EPSG 32663
105
+ points_df.to_crs(
106
+ epsg=3310, inplace=True
107
+ ) # https://gis.stackexchange.com/questions/293310/finding-distance-between-two-points-with-geoseries-distance
108
+ distance_diffs = points_df.distance(points_df.shift())
109
+ # distance_diffs_sorted = distance_diffs.sort_values(
110
+ # ascending=False
111
+ # ) # TODO: get avg cutoff time
112
+ #
113
+ time_diffs_ns = np.append(0, (times[1:] - times[:-1]).astype(int))
114
+ # time_diffs_ns_sorted = np.sort(time_diffs_ns)
115
+ # largest time diffs HB0707 [ 17. 17.93749786 21.0781271 54.82812723 85.09374797, 113.56249805 204.87500006 216. 440.68749798 544.81249818]
116
+ # largest diffs HB1906 [3.01015808e+00 3.01016013e+00 3.01017805e+00 3.01018701e+00, 3.01018701e+00 3.01018906e+00 3.01019802e+00 3.01021005e+00, 3.01021005e+00 3.01021414e+00 3.01022208e+00 3.01022899e+00, 3.01024998e+00 3.01025920e+00 3.01026202e+00 3.01028096e+00, 3.01119411e+00 3.01120896e+00 3.01120998e+00 3.01120998e+00, 3.01122099e+00 3.01122790e+00 3.01122790e+00 3.01124506e+00, 3.01125197e+00 3.01128090e+00 3.01142707e+00 3.01219814e+00, 3.01221120e+00 3.01223014e+00 3.01225498e+00 3.01225882e+00, 3.01226010e+00 3.01312998e+00 3.01316096e+00 3.01321190e+00, 3.01321293e+00 3.01322880e+00 3.01322906e+00 3.01323110e+00, 3.01323213e+00 3.01323290e+00 3.01326208e+00 3.01328512e+00, 3.01418112e+00 3.01420109e+00 3.01421107e+00 3.01421184e+00, 3.01421414e+00 3.01424819e+00 3.01512883e+00 3.01516006e+00, 3.01524198e+00 3.01619917e+00 3.01623194e+00 3.01623296e+00, 3.01917594e+00 3.01921408e+00 3.01921587e+00 3.02022195e+00, 3.02025216e+00 3.02121702e+00 3.02325811e+00 3.02410291e+00, 3.02421914e+00 3.02426701e+00 3.02523776e+00 3.02718694e+00, 3.02927590e+00 3.03621606e+00 3.03826304e+00 3.34047514e+00, 3.36345114e+00 3.39148595e+00 4.36819302e+00 4.50157901e+00, 4.50315699e+00 4.50330598e+00 4.50333491e+00 4.50428416e+00, 4.50430490e+00 4.50430694e+00 4.50526387e+00 4.50530790e+00, 4.50530995e+00 4.50532301e+00 4.50533478e+00 4.50629402e+00, 4.50730701e+00 4.50825882e+00 4.50939008e+00 6.50179098e+00, 2.25025029e+01 1.39939425e+02 1.54452331e+02 1.60632653e+03, 1.74574667e+05 4.33569587e+05 4.35150475e+05 8.00044883e+05]
117
+ nanoseconds_per_second = 1e9
118
+ speed_meters_per_second = (
119
+ distance_diffs / time_diffs_ns * nanoseconds_per_second
120
+ )
121
+ # returns the speed in meters per second #TODO: get speed in knots
122
+ return speed_meters_per_second.to_numpy(dtype="float32") # includes nan
123
+
124
+ def remove_null_island_values(
125
+ self,
126
+ epsilon=1e-5,
127
+ ) -> None:
128
+ # TODO: low priority
129
+ print(epsilon)
130
+ pass
131
+
132
+ def break_linestring_into_multi_linestring(
133
+ self,
134
+ ) -> None:
135
+ # TODO: medium priority
136
+ # For any line-strings across the antimeridian, break into multilinestring
137
+ # average cadence is measurements every 1 second
138
+ # break when over 1 minute
139
+ pass
140
+
141
+ def simplify(
142
+ self,
143
+ ) -> None:
144
+ # TODO: medium-high priority
145
+ pass
146
+
147
+ #######################################################
148
+
149
+
150
+ ###########################################################