water-column-sonar-processing 0.0.9__py3-none-any.whl → 26.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. water_column_sonar_processing/aws/dynamodb_manager.py +138 -59
  2. water_column_sonar_processing/aws/s3_manager.py +179 -141
  3. water_column_sonar_processing/aws/s3fs_manager.py +29 -33
  4. water_column_sonar_processing/aws/sqs_manager.py +1 -1
  5. water_column_sonar_processing/cruise/create_empty_zarr_store.py +35 -96
  6. water_column_sonar_processing/cruise/datatree_manager.py +21 -0
  7. water_column_sonar_processing/cruise/resample_regrid.py +142 -127
  8. water_column_sonar_processing/geometry/__init__.py +10 -2
  9. water_column_sonar_processing/geometry/elevation_manager.py +111 -0
  10. water_column_sonar_processing/geometry/geometry_manager.py +50 -49
  11. water_column_sonar_processing/geometry/line_simplification.py +176 -0
  12. water_column_sonar_processing/geometry/pmtile_generation.py +227 -223
  13. water_column_sonar_processing/geometry/spatiotemporal.py +106 -0
  14. water_column_sonar_processing/index/index_manager.py +151 -33
  15. water_column_sonar_processing/model/zarr_manager.py +665 -262
  16. water_column_sonar_processing/processing/__init__.py +3 -3
  17. water_column_sonar_processing/processing/raw_to_netcdf.py +320 -0
  18. water_column_sonar_processing/processing/raw_to_zarr.py +206 -214
  19. water_column_sonar_processing/utility/__init__.py +9 -2
  20. water_column_sonar_processing/utility/constants.py +69 -18
  21. water_column_sonar_processing/utility/pipeline_status.py +11 -15
  22. water_column_sonar_processing/utility/timestamp.py +3 -4
  23. water_column_sonar_processing-26.1.9.dist-info/METADATA +239 -0
  24. water_column_sonar_processing-26.1.9.dist-info/RECORD +34 -0
  25. {water_column_sonar_processing-0.0.9.dist-info → water_column_sonar_processing-26.1.9.dist-info}/WHEEL +1 -1
  26. {water_column_sonar_processing-0.0.9.dist-info → water_column_sonar_processing-26.1.9.dist-info/licenses}/LICENSE +1 -1
  27. water_column_sonar_processing/geometry/geometry_simplification.py +0 -82
  28. water_column_sonar_processing/process.py +0 -147
  29. water_column_sonar_processing/processing/cruise_sampler.py +0 -342
  30. water_column_sonar_processing-0.0.9.dist-info/METADATA +0 -134
  31. water_column_sonar_processing-0.0.9.dist-info/RECORD +0 -32
  32. {water_column_sonar_processing-0.0.9.dist-info → water_column_sonar_processing-26.1.9.dist-info}/top_level.txt +0 -0
@@ -1,15 +1,8 @@
1
- import glob
2
- import os
3
- from pathlib import Path
4
1
  import fiona
5
- import s3fs
2
+ import geopandas as gpd
6
3
  import numpy as np
7
4
  import pandas as pd
8
5
  import xarray as xr
9
- import geopandas
10
- import geopandas as gpd
11
- import pyogrio
12
- from concurrent.futures import ThreadPoolExecutor, as_completed
13
6
  from shapely.geometry import LineString
14
7
 
15
8
  MAX_POOL_CONNECTIONS = 64
@@ -17,246 +10,257 @@ MAX_CONCURRENCY = 64
17
10
  MAX_WORKERS = 64
18
11
  GB = 1024**3
19
12
 
13
+ bucket_name = "noaa-wcsd-zarr-pds"
14
+ ship_name = "Henry_B._Bigelow"
15
+ sensor_name = "EK60"
16
+
17
+ # TODO: get pmtiles of all the evr points
18
+
20
19
 
21
20
  class PMTileGeneration(object):
21
+ """
22
+ - iterate through the zarr stores for all cruises
23
+ - generate geojson in geopandas df, simplify linestrings
24
+ - consolidate into singular df, one cruise per row
25
+ - export as geojson
26
+ - using tippecanoe, geojson --> pmtiles w linux command
27
+ - upload to s3
28
+ """
29
+
22
30
  #######################################################
23
31
  def __init__(
24
32
  self,
25
33
  ):
26
- print("123")
34
+ self.bucket_name = "noaa-wcsd-zarr-pds"
35
+ self.ship_name = "Henry_B._Bigelow"
36
+ self.sensor_name = "EK60"
27
37
 
28
38
  #######################################################
29
- # This uses a local collection of file-level geojson files to create the data
30
- def generate_geojson_feature_collection(self):
31
- # This was used to read from noaa-wcsd-model-pds bucket geojson files and then to
32
- # generate the geopandas dataframe which could be exported to another comprehensive
33
- # geojson file. That
34
- result = list(Path("/Users/r2d2/Documents/echofish/geojson").rglob("*.json"))
35
- # result = result[:100]
36
- jjj = 0
37
- pieces = []
38
- for jjj in range(len(result)):
39
- file_name = os.path.normpath(result[jjj]).split(os.sep)[-1]
40
- file_stem = os.path.splitext(os.path.basename(file_name))[0]
41
- geom = gpd.read_file(result[jjj]).iloc[0]["geometry"]
42
- # TDOO: Filter (0,0) coordinates
43
- if len(geom.coords.xy[0]) < 2:
44
- continue
45
- geom = LineString(list(zip(geom.coords.xy[1], geom.coords.xy[0])))
39
+ @staticmethod
40
+ def check_all_cruises(bucket_name, cruises):
41
+ completed = []
42
+ for cruise_name in cruises:
43
+ print(cruise_name)
44
+ try:
45
+ zarr_store = f"{cruise_name}.zarr"
46
+ s3_zarr_store_path = f"{bucket_name}/level_2/{ship_name}/{cruise_name}/{sensor_name}/{zarr_store}"
47
+ kwargs = {"consolidated": False}
48
+ cruise = xr.open_dataset(
49
+ filename_or_obj=f"s3://{s3_zarr_store_path}",
50
+ engine="zarr",
51
+ storage_options={"anon": True},
52
+ **kwargs,
53
+ )
54
+ width = cruise.Sv.shape[1]
55
+ height = cruise.Sv.shape[0]
56
+ depth = cruise.Sv.shape[2]
57
+ print(
58
+ f"height: {height}, width: {width}, depth: {depth} = {width * height * depth}"
59
+ )
60
+ lats = cruise.latitude.to_numpy()
61
+ percent_done = np.count_nonzero(~np.isnan(lats)) / width
62
+ if percent_done != 1.0:
63
+ print(
64
+ f"percent done: {np.round(percent_done, 2)}, {np.count_nonzero(~np.isnan(cruise.latitude.values))}, {width}"
65
+ )
66
+ else:
67
+ completed.append(cruise_name)
68
+ except Exception as err:
69
+ raise RuntimeError(f"Problem parsing Zarr stores, {err}")
70
+ return completed
71
+
72
+ #######################################################
73
+ @staticmethod
74
+ def get_cruise_geometry(cruise_name, index):
75
+ print(cruise_name)
76
+ try:
77
+ pieces = []
78
+ zarr_store = f"{cruise_name}.zarr"
79
+ s3_zarr_store_path = f"{bucket_name}/level_2/{ship_name}/{cruise_name}/{sensor_name}/{zarr_store}"
80
+ cruise = xr.open_dataset(
81
+ filename_or_obj=f"s3://{s3_zarr_store_path}",
82
+ engine="zarr",
83
+ storage_options={"anon": True},
84
+ chunks={},
85
+ cache=True,
86
+ )
87
+ latitude_array = cruise.latitude.to_numpy()
88
+ longitude_array = cruise.longitude.to_numpy()
89
+ if np.isnan(latitude_array).any() or np.isnan(longitude_array).any():
90
+ raise RuntimeError(
91
+ f"There was missing lat-lon dataset for, {cruise_name}"
92
+ )
93
+ geom = LineString(list(zip(longitude_array, latitude_array))).simplify(
94
+ tolerance=0.001, # preserve_topology=True # 113
95
+ ) # TODO: do speed check, convert linestrings to multilinestrings
96
+ print(len(geom.coords))
46
97
  pieces.append(
47
98
  {
48
- "ship_name": os.path.normpath(result[jjj]).split(os.sep)[-4],
49
- "cruise_name": os.path.normpath(result[jjj]).split(os.sep)[-3],
50
- "file_stem": file_stem,
51
- "file_path": result[jjj],
99
+ "id": index,
100
+ "ship_name": ship_name,
101
+ "cruise_name": cruise_name,
102
+ "sensor_name": sensor_name,
52
103
  "geom": geom,
53
104
  }
54
105
  )
55
- df = pd.DataFrame(pieces)
56
- print(df)
57
- gps_gdf = gpd.GeoDataFrame(
58
- data=df[
59
- ["ship_name", "cruise_name", "file_stem"]
60
- ], # try again with file_stem
61
- geometry=df["geom"],
62
- crs="EPSG:4326",
63
- )
64
- print(fiona.supported_drivers)
65
- # gps_gdf.to_file('dataframe.shp', crs='epsg:4326')
66
- # Convert geojson feature collection to pmtiles
67
- gps_gdf.to_file("dataframe.geojson", driver="GeoJSON", crs="epsg:4326")
68
- print("done")
69
- """
70
- # need to eliminate visits to null island
71
- tippecanoe --no-feature-limit -zg --projection=EPSG:4326 -o dataframe.pmtiles -l cruises dataframe.geojson
72
-
73
- https://docs.protomaps.com/pmtiles/create
74
- PMTiles
75
- https://drive.google.com/file/d/17Bi-UIXB9IJkIz30BHpiKHXYpCOgRFge/view?usp=sharing
76
-
77
- Viewer
78
- https://protomaps.github.io/PMTiles/#map=8.91/56.0234/-166.6346
79
- """
80
-
81
- #######################################################
82
- # TODO: temporary using this to get info
83
- def get_info_from_zarr_store(
84
- self,
85
- ship_name,
86
- cruise_names,
87
- ):
88
- total_size = 0
89
- s3_fs = s3fs.S3FileSystem(anon=True)
90
- for cruise_name in cruise_names:
91
- path_to_zarr_store = f"s3://noaa-wcsd-zarr-pds/level_2/{ship_name}/{cruise_name}/EK60/{cruise_name}.zarr"
92
- zarr_store = s3fs.S3Map(root=path_to_zarr_store, s3=s3_fs)
93
- xr_store = xr.open_zarr(store=zarr_store, consolidated=None)
94
- print(f'Cruise: {cruise_name}, shape: {xr_store.time.shape[0]}')
95
- total_size = total_size + xr_store.time.shape[0]
96
-
97
- def get_geospatial_info_from_zarr_store(
98
- self,
99
- ship_name,
100
- cruise_name,
101
- ):
102
- """
103
- Open Zarr store, create geometry, write to geojson, return name
104
- """
105
- s3_fs = s3fs.S3FileSystem(anon=True)
106
- gps_gdf = geopandas.GeoDataFrame(
107
- columns=["id", "ship", "cruise", "sensor", "geometry"],
108
- geometry="geometry",
109
- crs="EPSG:4326"
110
- )
111
- path_to_zarr_store = f"s3://noaa-wcsd-zarr-pds/level_2/{ship_name}/{cruise_name}/EK60/{cruise_name}.zarr"
112
- # file_name = os.path.normpath(path_to_zarr_store).split(os.sep)[-1]
113
- # file_stem = os.path.splitext(os.path.basename(file_name))[0]
114
- zarr_store = s3fs.S3Map(root=path_to_zarr_store, s3=s3_fs)
115
- # ---Open Zarr Store--- #
116
- # TODO: try-except to allow failures
117
- print('opening store')
118
- # xr_store = xr.open_zarr(store=zarr_store, consolidated=False)
119
- xr_store = xr.open_zarr(store=zarr_store, consolidated=None)
120
- print(xr_store.Sv.shape)
121
- # ---Read Zarr Store Time/Latitude/Longitude--- #
122
- latitude = xr_store.latitude.values
123
- longitude = xr_store.longitude.values
124
- if np.isnan(latitude).any() or np.isnan(longitude).any():
125
- print(f'there was missing lat-lon data for {cruise_name}')
126
- return None
127
- # ---Add To GeoPandas Dataframe--- #
128
- # TODO: experiment with tolerance "0.001"
129
- geom = LineString(list(zip(longitude, latitude))).simplify(tolerance=0.001, preserve_topology=True)
130
- gps_gdf.loc[0] = (0, "Henry_B._Bigelow", cruise_name, "EK60", geom) # (ship, cruise, sensor, geometry)
131
- gps_gdf.set_index('id', inplace=True)
132
- gps_gdf.to_file(f"dataframe_{cruise_name}.geojson", driver="GeoJSON") #, engine="pyogrio")
133
- return cruise_name
106
+ df = pd.DataFrame(pieces)
107
+ gps_gdf = gpd.GeoDataFrame(
108
+ data=df[["id", "ship_name", "cruise_name", "sensor_name"]],
109
+ geometry=df["geom"],
110
+ crs="EPSG:4326",
111
+ )
112
+ print(gps_gdf)
113
+ # {'DXF': 'rw', 'CSV': 'raw', 'OpenFileGDB': 'raw', 'ESRIJSON': 'r', 'ESRI Shapefile': 'raw', 'FlatGeobuf': 'raw', 'GeoJSON': 'raw', 'GeoJSONSeq': 'raw', 'GPKG': 'raw', 'GML': 'rw', 'OGR_GMT': 'rw', 'GPX': 'rw', 'MapInfo File': 'raw', 'DGN': 'raw', 'S57': 'r', 'SQLite': 'raw', 'TopoJSON': 'r'}
114
+ if "GeoJSON" not in fiona.supported_drivers.keys():
115
+ raise RuntimeError("Missing GeoJSON driver")
134
116
 
135
- #######################################################
136
- def open_zarr_stores_with_thread_pool_executor(
137
- self,
138
- cruises: list,
139
- ):
140
- # 'cruises' is a list of cruises to process
141
- completed_cruises = []
142
- try:
143
- with ThreadPoolExecutor(max_workers=32) as executor:
144
- futures = [
145
- executor.submit(
146
- self.get_geospatial_info_from_zarr_store,
147
- "Henry_B._Bigelow", # ship_name
148
- cruise, # cruise_name
149
- )
150
- for cruise in cruises
151
- ]
152
- for future in as_completed(futures):
153
- result = future.result()
154
- if result:
155
- completed_cruises.extend([result])
117
+ gps_gdf.set_index("id", inplace=True)
118
+ # gps_gdf.to_file(f"dataframe_{cruise_name}.geojson", driver="GeoJSON") #, crs="epsg:4326")
119
+ return gps_gdf
156
120
  except Exception as err:
157
- print(err)
158
- print("Done opening zarr stores using thread pool.")
159
- return completed_cruises # Took ~12 minutes
121
+ raise RuntimeError(f"Problem parsing Zarr stores, {err}")
160
122
 
161
123
  #######################################################
162
- # https://docs.protomaps.com/pmtiles/create
163
- def aggregate_geojson_into_dataframe(
164
- self
165
- ):
166
- """
167
- iterate through cruises, threadpoolexecute geojson creation, aggregate geojson files into df,
168
- """
169
- gps_gdf = geopandas.GeoDataFrame(
124
+ @staticmethod
125
+ def aggregate_geojson_into_dataframe(geoms):
126
+ gps_gdf = gpd.GeoDataFrame(
170
127
  columns=["id", "ship", "cruise", "sensor", "geometry"],
171
128
  geometry="geometry",
172
- crs="EPSG:4326"
129
+ crs="EPSG:4326",
130
+ )
131
+ for iii, geom in enumerate(geoms):
132
+ gps_gdf.loc[iii] = (
133
+ iii,
134
+ geom.ship_name[iii],
135
+ geom.cruise_name[iii],
136
+ geom.sensor_name[iii],
137
+ geom.geometry[iii],
138
+ )
139
+ gps_gdf.set_index("id", inplace=True)
140
+ gps_gdf.to_file(
141
+ filename="dataset.geojson",
142
+ driver="GeoJSON",
143
+ engine="fiona", # or "pyogrio"
144
+ layer_options={"ID_GENERATE": "YES"},
145
+ crs="EPSG:4326",
146
+ id_generate=True, # required for the feature click selection
173
147
  )
174
-
175
- file_type = 'dataframe_*.geojson'
176
- geojson_files = glob.glob(file_type)
177
- for jjj in range(len(geojson_files)):
178
- print(jjj)
179
- geom = geopandas.read_file(geojson_files[jjj])
180
- gps_gdf.loc[jjj] = (jjj, geom.ship[0], geom.cruise[0], geom.sensor[0], geom.geometry[0])
181
- #gps_gdf.loc[0] = (0, "Henry_B._Bigelow", cruise_name, "EK60", geom) # (ship, cruise, sensor, geometry)
182
148
  print(gps_gdf)
183
- gps_gdf.set_index('id', inplace=True)
184
- gps_gdf.to_file(f"data.geojson", driver="GeoJSON", engine="pyogrio", layer_options={"ID_GENERATE": "YES"})
185
- return list(gps_gdf.cruise)
186
149
 
187
- # gps_gdf.loc[iii] = (iii, "Henry_B._Bigelow", cruise_name, "EK60", geom) # (ship, cruise, sensor, geometry)
188
- #print('writing to file')
189
- #print(gps_gdf)
190
- # gps_gdf.set_index('id', inplace=True)
191
- # gps_gdf.to_file(f"dataframe_{cruise_name}.geojson", driver="GeoJSON", engine="pyogrio", layer_options={"ID_GENERATE": "YES"})
192
- # https://gdal.org/en/latest/drivers/vector/jsonfg.html
193
- # gps_gdf.to_file(
194
- # f"data.geojson",
195
- # driver="GeoJSON",
196
- # engine="pyogrio",
197
- # layer_options={"ID_FIELD": "id"}
198
- # )
199
- # gps_gdf.to_file(f"dataframe_{cruise_name}.geojson", driver="GeoJSON", engine="pyogrio", id_generate=True)
200
-
201
- # print(fiona.supported_drivers) # {'DXF': 'rw', 'CSV': 'raw', 'OpenFileGDB': 'raw', 'ESRIJSON': 'r', 'ESRI Shapefile': 'raw', 'FlatGeobuf': 'raw', 'GeoJSON': 'raw', 'GeoJSONSeq': 'raw', 'GPKG': 'raw', 'GML': 'rw', 'OGR_GMT': 'rw', 'GPX': 'rw', 'MapInfo File': 'raw', 'DGN': 'raw', 'S57': 'r', 'SQLite': 'raw', 'TopoJSON': 'r'}
202
- #gps_gdf.to_file('dataframe.shp', crs="EPSG:4326", engine="fiona")
203
- # Convert geojson feature collection to pmtiles
204
- #gps_gdf.to_file("dataframe.geojson", driver="GeoJSON", crs="EPSG:4326", engine="fiona")
205
- #print("done")
206
- # ---Export Shapefile--- #
150
+ #######################################################
151
+ def create_collection_geojson(self):
152
+ cruises = [
153
+ "HB0706",
154
+ "HB0707",
155
+ "HB0710",
156
+ "HB0711",
157
+ "HB0802",
158
+ "HB0803",
159
+ "HB0805",
160
+ "HB0806",
161
+ "HB0807",
162
+ "HB0901",
163
+ "HB0902",
164
+ "HB0903",
165
+ "HB0904",
166
+ "HB0905",
167
+ "HB1002",
168
+ "HB1006",
169
+ "HB1102",
170
+ "HB1103",
171
+ "HB1105",
172
+ "HB1201",
173
+ "HB1206",
174
+ "HB1301",
175
+ "HB1303",
176
+ "HB1304",
177
+ "HB1401",
178
+ "HB1402",
179
+ "HB1403",
180
+ "HB1405",
181
+ "HB1501",
182
+ "HB1502",
183
+ "HB1503",
184
+ "HB1506",
185
+ "HB1507",
186
+ "HB1601",
187
+ "HB1603",
188
+ "HB1604",
189
+ "HB1701",
190
+ "HB1702",
191
+ "HB1801",
192
+ "HB1802",
193
+ "HB1803",
194
+ "HB1804",
195
+ "HB1805",
196
+ "HB1806",
197
+ "HB1901",
198
+ "HB1902",
199
+ "HB1903",
200
+ "HB1904",
201
+ "HB1906",
202
+ "HB1907",
203
+ "HB2001",
204
+ "HB2006",
205
+ "HB2007",
206
+ "HB20ORT",
207
+ "HB20TR",
208
+ ]
209
+ completed_cruises = self.check_all_cruises(
210
+ bucket_name=bucket_name, cruises=cruises
211
+ ) # TODO: threadpool this
212
+ ### create linestring ###
213
+ geometries = []
214
+ for jjj, completed_cruise in enumerate(
215
+ completed_cruises
216
+ ): # TODO: threadpool this
217
+ geometries.append(
218
+ self.get_cruise_geometry(cruise_name=completed_cruise, index=jjj)
219
+ )
220
+ #
221
+ self.aggregate_geojson_into_dataframe(geoms=geometries)
222
+ #
223
+ print(
224
+ 'Now run this: "tippecanoe --no-feature-limit -zg -o dataset.pmtiles -l cruises dataset.geojson --force"'
225
+ )
226
+ # # water-column-sonar-id.pmtiles
227
+ # linux command: "tippecanoe --no-feature-limit -zg -o water-column-sonar-id.pmtiles -l cruises dataset.geojson --force"
228
+ # note: 'cruises' is the name of the layer
229
+ # size is ~3.3 MB for the pmtiles
230
+ # then drag-and-drop here: https://pmtiles.io/#map=6.79/39.802/-71.51
207
231
 
232
+ #######################################################
233
+ # TODO: copy the .pmtiles file to the s3 bucket "noaa-wcsd-pds-index"
234
+ #######################################################
208
235
 
236
+ #######################################################
237
+ # TODO: get threadpool working
238
+ # def open_zarr_stores_with_thread_pool_executor(
239
+ # self,
240
+ # cruises: list,
241
+ # ):
242
+ # # 'cruises' is a list of cruises to process
243
+ # completed_cruises = []
244
+ # try:
245
+ # with ThreadPoolExecutor(max_workers=32) as executor:
246
+ # futures = [
247
+ # executor.submit(
248
+ # self.get_geospatial_info_from_zarr_store,
249
+ # "Henry_B._Bigelow", # ship_name
250
+ # cruise, # cruise_name
251
+ # )
252
+ # for cruise in cruises
253
+ # ]
254
+ # for future in as_completed(futures):
255
+ # result = future.result()
256
+ # if result:
257
+ # completed_cruises.extend([result])
258
+ # except Exception as err:
259
+ # raise RuntimeError(f"Problem, {err}")
260
+ # print("Done opening zarr stores using thread pool.")
261
+ # return completed_cruises # Took ~12 minutes
209
262
 
210
- #gps_gdf.set_geometry(col='geometry', inplace=True)
211
- #gps_gdf.__geo_interface__
212
- #gps_gdf.set_index('id', inplace=True)
213
- #gps_gdf.to_file(f"dataframe3.geojson", driver="GeoJSON", crs="EPSG:4326", engine="fiona", index=True)
263
+ #######################################################
214
264
 
215
- ### this gives the right layer id values
216
- #gps_gdf.to_file(f"dataframe6.geojson", driver="GeoJSON", engine="pyogrio", layer_options={"ID_GENERATE": "YES"})
217
- # jq '{"type": "FeatureCollection", "features": [.[] | .features[]]}' --slurp input*.geojson > output.geojson
218
- #tippecanoe -zg --projection=EPSG:4326 -o water-column-sonar-id.pmtiles -l cruises output.geojson
219
- #tippecanoe -zg --convert-stringified-ids-to-numbers --projection=EPSG:4326 -o water-column-sonar-id.pmtiles -l cruises dataframe*.geojson
220
- # {
221
- # "type": "FeatureCollection",
222
- # "name": "dataframe5",
223
- # "features": [
224
- # { "type": "Feature", "id": 0, "properties": { "id": 0, "ship": "Henry_B._Bigelow", "cruise": "HB0706", "sensor": "EK60" }, "geometry": { "type": "LineString", "coordinates": [ [ -72.120498657226562, 39.659671783447266 ], [ -72.120773315429688, 39.660198211669922 ] ] } },
225
- # { "type": "Feature", "id": 1, "properties": { "id": 1, "ship": "Henry_B._Bigelow", "cruise": "HB0707", "sensor": "EK60" }, "geometry": { "type": "LineString", "coordinates": [ [ -71.797836303710938, 41.003166198730469 ], [ -71.797996520996094, 41.002998352050781 ], [ -71.798583984375, 41.002994537353516 ] ] } },
226
- # { "type": "Feature", "id": 2, "properties": { "id": 2, "ship": "Henry_B._Bigelow", "cruise": "HB0710", "sensor": "EK60" }, "geometry": { "type": "LineString", "coordinates": [ [ -72.489486694335938, 40.331901550292969 ], [ -72.490760803222656, 40.33099365234375 ] ] } }
227
- # ]
228
- # }
229
- """
230
- # https://docs.protomaps.com/pmtiles/create
231
- #ogr2ogr -t_srs EPSG:4326 data.geojson dataframe.shp
232
- # Only need to do the second one here...
233
- tippecanoe -zg --projection=EPSG:4326 -o data.pmtiles -l cruises dataframe.geojson
234
- tippecanoe -zg --projection=EPSG:4326 -o data.pmtiles -l cruises --coalesce-densest-as-needed --extend-zooms-if-still-dropping dataframe*.geojson
235
- # used this to combine all the geojson files into single pmtile file (2024-12-03):
236
- tippecanoe -zg --projection=EPSG:4326 -o data.pmtiles -l cruises --coalesce-densest-as-needed --extend-zooms-if-still-dropping dataframe*.geojson
237
265
 
238
- TODO:
239
- run each one of the cruises in a separate ospool workflow.
240
- each process gets own store
241
- """
242
266
  ###########################################################
243
-
244
- # s3_manager = S3Manager() # endpoint_url=endpoint_url)
245
- # # s3fs_manager = S3FSManager()
246
- # # input_bucket_name = "test_input_bucket"
247
- # # s3_manager.create_bucket(bucket_name=input_bucket_name)
248
- # ship_name = "Henry_B._Bigelow"
249
- # cruise_name = "HB0706"
250
- # sensor_name = "EK60"
251
- #
252
- # # ---Scan Bucket For All Zarr Stores--- #
253
- # # https://noaa-wcsd-zarr-pds.s3.amazonaws.com/index.html#level_2/Henry_B._Bigelow/HB0706/EK60/HB0706.zarr/
254
- # path_to_zarr_store = f"s3://noaa-wcsd-zarr-pds/level_2/Henry_B._Bigelow/HB0706/EK60/HB0706.zarr"
255
- # s3 = s3fs.S3FileSystem()
256
- # zarr_store = s3fs.S3Map(path_to_zarr_store, s3=s3)
257
- # ds_zarr = xr.open_zarr(zarr_store, consolidated=None)
258
- # print(ds_zarr.Sv.shape)
259
-
260
-
261
-
262
- total = [246847, 89911, 169763, 658047, 887640, 708771, 187099, 3672813, 4095002, 763268, 162727, 189454, 1925270, 3575857, 1031920, 1167590, 3737415, 4099957, 3990725, 3619996, 3573052, 2973090, 55851, 143192, 1550164, 3692819, 668400, 489735, 393260, 1311234, 242989, 4515760, 1303091, 704663, 270645, 3886437, 4204381, 1062090, 428639, 541455, 4206506, 298561, 1279329, 137416, 139836, 228947, 517949]
@@ -0,0 +1,106 @@
1
+ import geopandas as gpd
2
+ import numpy as np
3
+ import pandas as pd
4
+ from shapely.geometry import Point
5
+
6
+ from water_column_sonar_processing.model import ZarrManager
7
+
8
+
9
+ # Convert "meters per second" to "knots"
10
+ # meters_per_second_to_knots = lambda mps_value: mps_value * 1.94384
11
+
12
+
13
+ class Spatiotemporal:
14
+ #######################################################
15
+ def __init__(
16
+ self,
17
+ ):
18
+ self.NANOSECONDS_PER_SECOND = 1e9
19
+ self.CUTOFF_DISTANCE_METERS = 50.0
20
+ self.CUTOFF_TIME_SECONDS = 10.0
21
+
22
+ #######################################################
23
+ @staticmethod
24
+ def meters_per_second_to_knots(
25
+ mps_value,
26
+ ):
27
+ return mps_value * 1.94384
28
+
29
+ #######################################################
30
+ def compute_speed_and_distance(
31
+ self,
32
+ times_ns, #: np.ndarray[tuple[int], np.dtype[np.int64]],
33
+ latitudes, #: np.ndarray,
34
+ longitudes, #: np.ndarray,
35
+ ) -> pd.DataFrame:
36
+ try:
37
+ # fix times
38
+ times = np.array([np.datetime64(int(i), "ns") for i in times_ns])
39
+ geom = [Point(xy) for xy in zip(longitudes, latitudes)]
40
+ points_df = gpd.GeoDataFrame({"geometry": geom}, crs="EPSG:4326")
41
+ # Conversion to a rectilinear projection coordinate system where distance can be calculated with pythagorean theorem
42
+ # EPSG:4087, WGS 84 / World Equidistant Cylindrical
43
+ # https://epsg.io/4087
44
+ points_df.to_crs(epsg=4087, inplace=True)
45
+ distance_diffs = points_df.distance(points_df.geometry.shift())
46
+ distance_diffs[0] = distance_diffs[1] # missing first datapoint, backfill
47
+ # Issue: np.max(distance_diffs) = 3397 meters
48
+ time_diffs_ns = np.append(0, (times[1:] - times[:-1]).astype(int))
49
+ time_diffs_ns[0] = time_diffs_ns[1] # missing first datapoint, backfill
50
+ time_diffs_seconds = time_diffs_ns / self.NANOSECONDS_PER_SECOND
51
+ # Calculate the speed in knots
52
+ speed_meters_per_second = np.array(
53
+ (distance_diffs / time_diffs_ns * self.NANOSECONDS_PER_SECOND),
54
+ dtype=np.float32,
55
+ )
56
+ knots = self.meters_per_second_to_knots(speed_meters_per_second)
57
+ metrics_df = pd.DataFrame(
58
+ {
59
+ "speed_knots": knots.astype(dtype=np.float32),
60
+ "distance_meters": distance_diffs.to_numpy(dtype=np.float32),
61
+ "diff_seconds": time_diffs_seconds.astype(np.float32),
62
+ },
63
+ index=times,
64
+ )
65
+ #
66
+ return metrics_df
67
+ except Exception as err:
68
+ raise RuntimeError(f"Exception encountered, {err}")
69
+
70
+ #######################################################
71
+ def add_speed_and_distance(
72
+ self,
73
+ ship_name,
74
+ cruise_name,
75
+ sensor_name,
76
+ bucket_name,
77
+ endpoint_url=None,
78
+ ) -> None:
79
+ try:
80
+ zarr_manager = ZarrManager()
81
+ zarr_store = zarr_manager.open_s3_zarr_store_with_zarr(
82
+ ship_name=ship_name,
83
+ cruise_name=cruise_name,
84
+ sensor_name=sensor_name,
85
+ output_bucket_name=bucket_name,
86
+ endpoint_url=endpoint_url,
87
+ )
88
+ longitudes = zarr_store["longitude"][:]
89
+ latitudes = zarr_store["latitude"][:]
90
+ times = zarr_store["time"][:]
91
+ #
92
+ metrics_df = self.compute_speed_and_distance(
93
+ times_ns=times,
94
+ latitudes=latitudes,
95
+ longitudes=longitudes,
96
+ )
97
+ # Write the speed and distance to the output zarr store
98
+ zarr_store["speed"][:] = metrics_df.speed_knots.values
99
+ zarr_store["distance"][:] = metrics_df.distance_meters.values
100
+ except Exception as err:
101
+ raise RuntimeError(
102
+ f"Exception encountered writing the speed and distance, {err}"
103
+ )
104
+
105
+
106
+ ###########################################################