water-column-sonar-processing 0.0.1__py3-none-any.whl → 25.11.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of water-column-sonar-processing might be problematic. Click here for more details.

Files changed (60) hide show
  1. water_column_sonar_processing/__init__.py +13 -0
  2. water_column_sonar_processing/aws/__init__.py +7 -0
  3. water_column_sonar_processing/aws/dynamodb_manager.py +355 -0
  4. water_column_sonar_processing/aws/s3_manager.py +420 -0
  5. water_column_sonar_processing/aws/s3fs_manager.py +72 -0
  6. {model → water_column_sonar_processing}/aws/sns_manager.py +10 -21
  7. {model → water_column_sonar_processing}/aws/sqs_manager.py +11 -19
  8. water_column_sonar_processing/cruise/__init__.py +4 -0
  9. water_column_sonar_processing/cruise/create_empty_zarr_store.py +191 -0
  10. water_column_sonar_processing/cruise/datatree_manager.py +21 -0
  11. water_column_sonar_processing/cruise/resample_regrid.py +339 -0
  12. water_column_sonar_processing/geometry/__init__.py +11 -0
  13. water_column_sonar_processing/geometry/elevation_manager.py +111 -0
  14. water_column_sonar_processing/geometry/geometry_manager.py +243 -0
  15. water_column_sonar_processing/geometry/line_simplification.py +176 -0
  16. water_column_sonar_processing/geometry/pmtile_generation.py +261 -0
  17. water_column_sonar_processing/index/__init__.py +3 -0
  18. water_column_sonar_processing/index/index_manager.py +384 -0
  19. water_column_sonar_processing/model/__init__.py +3 -0
  20. water_column_sonar_processing/model/zarr_manager.py +722 -0
  21. water_column_sonar_processing/process.py +149 -0
  22. water_column_sonar_processing/processing/__init__.py +4 -0
  23. water_column_sonar_processing/processing/raw_to_netcdf.py +320 -0
  24. water_column_sonar_processing/processing/raw_to_zarr.py +425 -0
  25. water_column_sonar_processing/utility/__init__.py +13 -0
  26. {model → water_column_sonar_processing}/utility/cleaner.py +7 -8
  27. water_column_sonar_processing/utility/constants.py +118 -0
  28. {model → water_column_sonar_processing}/utility/pipeline_status.py +47 -24
  29. water_column_sonar_processing/utility/timestamp.py +12 -0
  30. water_column_sonar_processing-25.11.1.dist-info/METADATA +182 -0
  31. water_column_sonar_processing-25.11.1.dist-info/RECORD +34 -0
  32. {water_column_sonar_processing-0.0.1.dist-info → water_column_sonar_processing-25.11.1.dist-info}/WHEEL +1 -1
  33. {water_column_sonar_processing-0.0.1.dist-info → water_column_sonar_processing-25.11.1.dist-info/licenses}/LICENSE +1 -1
  34. water_column_sonar_processing-25.11.1.dist-info/top_level.txt +1 -0
  35. __init__.py +0 -0
  36. model/__init__.py +0 -0
  37. model/aws/__init__.py +0 -0
  38. model/aws/dynamodb_manager.py +0 -149
  39. model/aws/s3_manager.py +0 -356
  40. model/aws/s3fs_manager.py +0 -74
  41. model/cruise/__init__.py +0 -0
  42. model/cruise/create_empty_zarr_store.py +0 -166
  43. model/cruise/resample_regrid.py +0 -248
  44. model/geospatial/__init__.py +0 -0
  45. model/geospatial/geometry_manager.py +0 -194
  46. model/geospatial/geometry_simplification.py +0 -81
  47. model/geospatial/pmtile_generation.py +0 -74
  48. model/index/__init__.py +0 -0
  49. model/index/index.py +0 -228
  50. model/model.py +0 -138
  51. model/utility/__init__.py +0 -0
  52. model/utility/constants.py +0 -56
  53. model/utility/timestamp.py +0 -12
  54. model/zarr/__init__.py +0 -0
  55. model/zarr/bar.py +0 -28
  56. model/zarr/foo.py +0 -11
  57. model/zarr/zarr_manager.py +0 -298
  58. water_column_sonar_processing-0.0.1.dist-info/METADATA +0 -89
  59. water_column_sonar_processing-0.0.1.dist-info/RECORD +0 -32
  60. water_column_sonar_processing-0.0.1.dist-info/top_level.txt +0 -2
@@ -0,0 +1,191 @@
1
+ import os
2
+ import tempfile
3
+
4
+ import numpy as np
5
+
6
+ from water_column_sonar_processing.aws import DynamoDBManager, S3Manager
7
+ from water_column_sonar_processing.model import ZarrManager
8
+ from water_column_sonar_processing.utility import Cleaner
9
+
10
+
11
+ # TODO: change name to "CreateLocalEmptyZarrStore"
12
+ class CreateEmptyZarrStore:
13
+ #######################################################
14
+ def __init__(
15
+ self,
16
+ ):
17
+ self.__overwrite = True
18
+ # self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
19
+ # self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
20
+
21
+ #######################################################
22
+ # TODO: moved this to the s3_manager
23
+ # def upload_zarr_store_to_s3(
24
+ # self,
25
+ # output_bucket_name: str,
26
+ # local_directory: str,
27
+ # object_prefix: str,
28
+ # cruise_name: str,
29
+ # ) -> None:
30
+ # print("uploading model store to s3")
31
+ # s3_manager = S3Manager()
32
+ # #
33
+ # print("Starting upload with thread pool executor.")
34
+ # # # 'all_files' is passed a list of lists: [[local_path, s3_key], [...], ...]
35
+ # all_files = []
36
+ # for subdir, dirs, files in os.walk(f"{local_directory}/{cruise_name}.zarr"):
37
+ # for file in files:
38
+ # local_path = os.path.join(subdir, file)
39
+ # # TODO: find a better method for splitting strings here:
40
+ # # 'level_2/Henry_B._Bigelow/HB0806/EK60/HB0806.zarr/.zattrs'
41
+ # s3_key = f"{object_prefix}/{cruise_name}.zarr{local_path.split(f'{cruise_name}.zarr')[-1]}"
42
+ # all_files.append([local_path, s3_key])
43
+ # #
44
+ # # print(all_files)
45
+ # s3_manager.upload_files_with_thread_pool_executor(
46
+ # output_bucket_name=output_bucket_name,
47
+ # all_files=all_files,
48
+ # )
49
+ # print("Done uploading with thread pool executor.")
50
+ # # TODO: move to common place
51
+
52
+ #######################################################
53
+ def create_cruise_level_zarr_store(
54
+ self,
55
+ output_bucket_name: str,
56
+ ship_name: str,
57
+ cruise_name: str,
58
+ sensor_name: str,
59
+ table_name: str,
60
+ # override_cruise_min_epsilon=None,
61
+ ) -> None:
62
+ """
63
+ Initialize zarr store. The water_level needs to be integrated.
64
+ """
65
+ tempdir = tempfile.TemporaryDirectory()
66
+ try:
67
+ # HB0806 - 123, HB0903 - 220
68
+ dynamo_db_manager = DynamoDBManager()
69
+ s3_manager = S3Manager()
70
+
71
+ df = dynamo_db_manager.get_table_as_df(
72
+ table_name=table_name,
73
+ cruise_name=cruise_name,
74
+ )
75
+
76
+ # TODO: filter the dataframe just for enums >= LEVEL_1_PROCESSING
77
+ # df[df['PIPELINE_STATUS'] < PipelineStatus.LEVEL_1_PROCESSING] = np.nan
78
+
79
+ # TODO: VERIFY GEOJSON EXISTS as prerequisite!!!
80
+
81
+ print(f"DataFrame shape: {df.shape}")
82
+ cruise_channels = list(
83
+ set([i for sublist in df["CHANNELS"].dropna() for i in sublist])
84
+ )
85
+ cruise_channels.sort()
86
+
87
+ consolidated_zarr_width = np.sum(
88
+ df["NUM_PING_TIME_DROPNA"].dropna().astype(int)
89
+ )
90
+
91
+ # [3] calculate the max/min measurement resolutions for the whole cruise
92
+ # cruise_min_echo_range = np.min(
93
+ # (df["MIN_ECHO_RANGE"] + df["WATER_LEVEL"]).dropna().astype(float)
94
+ # )
95
+
96
+ # [4] calculate the np.max(max_echo_range + water_level)
97
+ cruise_max_echo_range = np.max(
98
+ (df["MAX_ECHO_RANGE"] + df["WATER_LEVEL"]).dropna().astype(float)
99
+ )
100
+
101
+ # TODO: set this to either 1 or 0.5 meters
102
+ cruise_min_epsilon = np.min(df["MIN_ECHO_RANGE"].dropna().astype(float))
103
+
104
+ print(f"cruise_max_echo_range: {cruise_max_echo_range}")
105
+
106
+ # [5] get number of channels
107
+ cruise_frequencies = [
108
+ float(i) for i in df["FREQUENCIES"].dropna().values.flatten()[0]
109
+ ]
110
+ print(cruise_frequencies)
111
+
112
+ new_width = int(consolidated_zarr_width)
113
+ print(f"new_width: {new_width}")
114
+ #################################################################
115
+ store_name = f"{cruise_name}.zarr"
116
+ print(store_name)
117
+ ################################################################
118
+ # Delete existing model store if it exists
119
+ zarr_prefix = os.path.join("level_2", ship_name, cruise_name, sensor_name)
120
+ child_objects = s3_manager.get_child_objects(
121
+ bucket_name=output_bucket_name,
122
+ sub_prefix=zarr_prefix,
123
+ )
124
+ #
125
+ if len(child_objects) > 0:
126
+ s3_manager.delete_nodd_objects(
127
+ bucket_name=output_bucket_name,
128
+ objects=child_objects,
129
+ )
130
+ ################################################################
131
+ # Create new model store
132
+ zarr_manager = ZarrManager()
133
+ new_height = len( # [0.19m down to 1001.744m] = 5272 samples, 10.3 tiles @ 512
134
+ zarr_manager.get_depth_values( # these depths should be from min_epsilon to max_range+water_level
135
+ # min_echo_range=cruise_min_echo_range,
136
+ max_echo_range=cruise_max_echo_range,
137
+ cruise_min_epsilon=cruise_min_epsilon,
138
+ )
139
+ )
140
+ print(f"new_height: {new_height}")
141
+
142
+ zarr_manager.create_zarr_store(
143
+ path=tempdir.name, # TODO: need to use .name or problem
144
+ ship_name=ship_name,
145
+ cruise_name=cruise_name,
146
+ sensor_name=sensor_name,
147
+ frequencies=cruise_frequencies,
148
+ width=new_width,
149
+ # min_echo_range=cruise_min_echo_range,
150
+ max_echo_range=cruise_max_echo_range,
151
+ cruise_min_epsilon=cruise_min_epsilon,
152
+ calibration_status=True,
153
+ )
154
+ #################################################################
155
+ s3_manager.upload_zarr_store_to_s3(
156
+ output_bucket_name=output_bucket_name,
157
+ local_directory=tempdir.name, # TODO: need to use .name or problem
158
+ object_prefix=zarr_prefix,
159
+ cruise_name=cruise_name,
160
+ )
161
+ # https://noaa-wcsd-zarr-pds.s3.amazonaws.com/index.html
162
+ #################################################################
163
+ # Verify count of the files uploaded
164
+ # count = self.__get_file_count(store_name=store_name)
165
+ # #
166
+ # raw_zarr_files = self.__get_s3_files( # TODO: just need count
167
+ # bucket_name=self.__output_bucket,
168
+ # sub_prefix=os.path.join(zarr_prefix, store_name),
169
+ # )
170
+ # if len(raw_zarr_files) != count:
171
+ # print(f'Problem writing {store_name} with proper count {count}.')
172
+ # raise Exception("File count doesnt equal number of s3 Zarr store files.")
173
+ # else:
174
+ # print("File counts match.")
175
+ #################################################################
176
+ # Success
177
+ # TODO: update enum in dynamodb
178
+ print("Done creating cruise level zarr store.")
179
+ #################################################################
180
+ except Exception as err:
181
+ raise RuntimeError(
182
+ f"Problem trying to create new cruise model store, {err}"
183
+ )
184
+ finally:
185
+ cleaner = Cleaner()
186
+ cleaner.delete_local_files()
187
+ # TODO: should delete zarr store in temp directory too?
188
+ print("Done creating cruise level model store")
189
+
190
+
191
+ ###########################################################
@@ -0,0 +1,21 @@
1
+ # ### https://xarray-datatree.readthedocs.io/en/latest/data-structures.html
2
+ # import xarray as xr
3
+ # from datatree import DataTree
4
+ #
5
+ #
6
+ # class DatatreeManager:
7
+ # #######################################################
8
+ # def __init__(
9
+ # self,
10
+ # ):
11
+ # self.dtype = "float32"
12
+ #
13
+ # #################################################################
14
+ # def create_datatree(
15
+ # self,
16
+ # input_ds,
17
+ # ) -> None:
18
+ # ds1 = xr.Dataset({"foo": "orange"})
19
+ # dt = DataTree(name="root", dataset=ds1) # create root node
20
+ # # ds2 = xr.Dataset({"bar": 0}, coords={"y": ("y", [0, 1, 2])})
21
+ # return dt
@@ -0,0 +1,339 @@
1
+ import gc
2
+ import warnings
3
+ from pathlib import Path
4
+
5
+ import numpy as np
6
+ import pandas as pd
7
+ import xarray as xr
8
+
9
+ from water_column_sonar_processing.aws import DynamoDBManager
10
+ from water_column_sonar_processing.geometry import GeometryManager
11
+ from water_column_sonar_processing.model import ZarrManager
12
+
13
+ warnings.simplefilter("ignore", category=RuntimeWarning)
14
+
15
+
16
+ class ResampleRegrid:
17
+ #######################################################
18
+ def __init__(
19
+ self,
20
+ ):
21
+ self.__overwrite = True
22
+ # self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
23
+ # self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
24
+ self.dtype = "float32"
25
+
26
+ #################################################################
27
+ def interpolate_data(
28
+ self,
29
+ input_xr,
30
+ ping_times,
31
+ all_cruise_depth_values, # includes water_level offset
32
+ water_level, # this is the offset that will be added to each respective file
33
+ ) -> np.ndarray:
34
+ """
35
+ What gets passed into interpolate data
36
+ """
37
+ print("Interpolating dataset.")
38
+ try:
39
+ data = np.empty(
40
+ (
41
+ len(all_cruise_depth_values),
42
+ len(ping_times),
43
+ len(input_xr.frequency_nominal),
44
+ ),
45
+ dtype=self.dtype,
46
+ )
47
+
48
+ data[:] = np.nan
49
+
50
+ regrid_resample = xr.DataArray( # where data will be written to
51
+ data=data,
52
+ dims=("depth", "time", "frequency"),
53
+ coords={
54
+ "depth": all_cruise_depth_values,
55
+ "time": ping_times,
56
+ "frequency": input_xr.frequency_nominal.values,
57
+ },
58
+ )
59
+
60
+ # shift the input data by water_level
61
+ input_xr.echo_range.values = (
62
+ input_xr.echo_range.values + water_level
63
+ ) # water_level # TODO: change
64
+
65
+ channels = input_xr.channel.values
66
+ for channel in range(
67
+ len(channels)
68
+ ): # ?TODO: leaving off here, need to subset for just indices in time axis
69
+ gc.collect()
70
+ max_depths = np.nanmax(
71
+ a=input_xr.echo_range.sel(channel=input_xr.channel[channel]).values,
72
+ # + water_level,
73
+ axis=1,
74
+ )
75
+ superset_of_max_depths = set(
76
+ max_depths
77
+ ) # HB1501, D20150503-T102035.raw, TypeError: unhashable type: 'numpy.ndarray'
78
+ set_of_max_depths = list(
79
+ {x for x in superset_of_max_depths if x == x}
80
+ ) # removes nan's
81
+ # iterate through partitions of dataset with similar depths and resample
82
+ for select_max_depth in set_of_max_depths:
83
+ # TODO: for nan just skip and leave all nan's
84
+ select_indices = [
85
+ i
86
+ for i in range(0, len(max_depths))
87
+ if max_depths[i] == select_max_depth
88
+ ]
89
+
90
+ # now create new DataArray with proper dimension and indices
91
+ # data_select = input_xr.Sv.sel(
92
+ # channel=input_xr.channel[channel]
93
+ # ).values[select_indices, :].T # TODO: dont like this transpose
94
+ data_select = input_xr.Sv.sel(channel=input_xr.channel[channel])[
95
+ select_indices, :
96
+ ].T.values
97
+ # change from ".values[select_indices, :].T" to "[select_indices, :].values.T"
98
+
99
+ times_select = input_xr.ping_time.values[select_indices]
100
+ depths_select = input_xr.echo_range.sel(
101
+ channel=input_xr.channel[channel]
102
+ ).values[
103
+ select_indices[0], :
104
+ ] # '0' because all others in group should be same
105
+
106
+ da_select = xr.DataArray(
107
+ data=data_select,
108
+ dims=("depth", "time"),
109
+ coords={
110
+ "depth": depths_select,
111
+ "time": times_select,
112
+ },
113
+ ).dropna(dim="depth")
114
+ resampled = da_select.interp(
115
+ depth=all_cruise_depth_values, method="nearest"
116
+ )
117
+ # write to the resample array
118
+ regrid_resample.loc[
119
+ dict(
120
+ time=times_select,
121
+ frequency=input_xr.frequency_nominal.values[channel],
122
+ )
123
+ ] = resampled
124
+ print(f"updated {len(times_select)} ping times")
125
+ gc.collect()
126
+ except Exception as err:
127
+ raise RuntimeError(f"Problem finding the dynamodb table, {err}")
128
+ print("Done interpolating dataset.")
129
+ return regrid_resample.values.copy()
130
+
131
+ #################################################################
132
+ def resample_regrid(
133
+ self,
134
+ ship_name,
135
+ cruise_name,
136
+ sensor_name,
137
+ table_name,
138
+ bucket_name,
139
+ override_select_files=None,
140
+ # override_cruise_min_epsilon=None,
141
+ endpoint_url=None,
142
+ ) -> None:
143
+ """
144
+ The goal here is to interpolate the dataset against the depth values already populated
145
+ in the existing file level model stores. We open the cruise-level store with model for
146
+ read/write operations. We open the file-level store with Xarray to leverage tools for
147
+ resampling and subsetting the dataset.
148
+ """
149
+ print("Resample Regrid, Interpolating dataset.")
150
+ try:
151
+ zarr_manager = ZarrManager()
152
+ geo_manager = GeometryManager()
153
+
154
+ output_zarr_store = zarr_manager.open_s3_zarr_store_with_zarr(
155
+ ship_name=ship_name,
156
+ cruise_name=cruise_name,
157
+ sensor_name=sensor_name,
158
+ output_bucket_name=bucket_name,
159
+ endpoint_url=endpoint_url,
160
+ )
161
+
162
+ # get dynamo stuff
163
+ dynamo_db_manager = DynamoDBManager()
164
+ cruise_df = dynamo_db_manager.get_table_as_df(
165
+ # ship_name=ship_name,
166
+ cruise_name=cruise_name,
167
+ # sensor_name=sensor_name,
168
+ table_name=table_name,
169
+ )
170
+
171
+ #########################################################
172
+ #########################################################
173
+ all_file_names = cruise_df["FILE_NAME"]
174
+
175
+ if override_select_files is not None:
176
+ all_file_names = override_select_files
177
+
178
+ # Iterate files
179
+ for file_name in all_file_names:
180
+ gc.collect()
181
+ file_name_stem = Path(file_name).stem
182
+ print(f"Processing file: {file_name_stem}.")
183
+
184
+ if f"{file_name_stem}.raw" not in list(cruise_df["FILE_NAME"]):
185
+ raise Exception("Raw file file_stem not found in dynamodb.")
186
+
187
+ # status = PipelineStatus['LEVEL_1_PROCESSING']
188
+ # TODO: filter rows by enum success, filter the dataframe just for enums >= LEVEL_1_PROCESSING
189
+ # df[df['PIPELINE_STATUS'] < PipelineStatus.LEVEL_1_PROCESSING] = np.nan
190
+
191
+ # Get index from all cruise files. Note: should be based on which are included in cruise.
192
+ index = int(
193
+ cruise_df.index[cruise_df["FILE_NAME"] == f"{file_name_stem}.raw"][
194
+ 0
195
+ ]
196
+ )
197
+
198
+ # Get input store — this is unadjusted for water_level
199
+ input_xr_zarr_store = zarr_manager.open_s3_zarr_store_with_xarray(
200
+ ship_name=ship_name,
201
+ cruise_name=cruise_name,
202
+ sensor_name=sensor_name,
203
+ file_name_stem=file_name_stem,
204
+ input_bucket_name=bucket_name,
205
+ endpoint_url=endpoint_url,
206
+ )
207
+
208
+ # This is the vertical offset of the sensor related to the ocean surface
209
+ # See https://echopype.readthedocs.io/en/stable/data-proc-additional.html
210
+ if "water_level" in input_xr_zarr_store.keys():
211
+ water_level = input_xr_zarr_store.water_level.values
212
+ else:
213
+ water_level = 0.0
214
+ #########################################################################
215
+ # [3] Get needed time indices — along the x-axis
216
+ # Offset from start index to insert new dataset. Note that missing values are excluded.
217
+ ping_time_cumsum = np.insert(
218
+ np.cumsum(
219
+ cruise_df["NUM_PING_TIME_DROPNA"].dropna().to_numpy(dtype=int)
220
+ ),
221
+ obj=0,
222
+ values=0,
223
+ )
224
+ start_ping_time_index = ping_time_cumsum[index]
225
+ end_ping_time_index = ping_time_cumsum[index + 1]
226
+
227
+ max_echo_range = np.max(
228
+ (cruise_df["MAX_ECHO_RANGE"] + cruise_df["WATER_LEVEL"])
229
+ .dropna()
230
+ .astype(float)
231
+ )
232
+ cruise_min_epsilon = np.min(
233
+ cruise_df["MIN_ECHO_RANGE"].dropna().astype(float)
234
+ )
235
+
236
+ # Note: cruise dims (depth, time, frequency)
237
+ all_cruise_depth_values = zarr_manager.get_depth_values( # needs to integrate water_level
238
+ # min_echo_range=min_echo_range,
239
+ max_echo_range=max_echo_range, # does it here
240
+ cruise_min_epsilon=cruise_min_epsilon, # remove this & integrate into min_echo_range
241
+ ) # with offset of 7.5 meters, 0 meter measurement should now start at 7.5 meters
242
+
243
+ print(" ".join(list(input_xr_zarr_store.Sv.dims)))
244
+ if set(input_xr_zarr_store.Sv.dims) != {
245
+ "channel",
246
+ "ping_time",
247
+ "range_sample",
248
+ }:
249
+ raise Exception("Xarray dimensions are not as expected.")
250
+
251
+ indices, geospatial = geo_manager.read_s3_geo_json(
252
+ ship_name=ship_name,
253
+ cruise_name=cruise_name,
254
+ sensor_name=sensor_name,
255
+ file_name_stem=file_name_stem,
256
+ input_xr_zarr_store=input_xr_zarr_store,
257
+ endpoint_url=endpoint_url,
258
+ output_bucket_name=bucket_name,
259
+ )
260
+
261
+ input_xr = input_xr_zarr_store.isel(
262
+ ping_time=indices
263
+ ) # Problem with HB200802-D20080310-T174959.zarr/
264
+
265
+ ping_times = input_xr.ping_time.values
266
+ # Date format: numpy.datetime64('2007-07-20T02:10:25.845073920') converts to "1184897425.845074"
267
+ epoch_seconds = [
268
+ (pd.Timestamp(i) - pd.Timestamp("1970-01-01")) / pd.Timedelta("1s")
269
+ for i in ping_times
270
+ ]
271
+ output_zarr_store["time"][start_ping_time_index:end_ping_time_index] = (
272
+ epoch_seconds
273
+ )
274
+
275
+ # --- UPDATING --- #
276
+ regrid_resample = self.interpolate_data(
277
+ input_xr=input_xr,
278
+ ping_times=ping_times,
279
+ all_cruise_depth_values=all_cruise_depth_values, # should accommodate the water_level already
280
+ water_level=water_level, # not applied to anything yet
281
+ )
282
+
283
+ print(
284
+ f"start_ping_time_index: {start_ping_time_index}, end_ping_time_index: {end_ping_time_index}"
285
+ )
286
+ #########################################################################
287
+ # write Sv values to cruise-level-model-store
288
+
289
+ for fff in range(regrid_resample.shape[-1]):
290
+ output_zarr_store["Sv"][
291
+ :, start_ping_time_index:end_ping_time_index, fff
292
+ ] = regrid_resample[:, :, fff]
293
+ #########################################################################
294
+ # TODO: add the "detected_seafloor_depth/" to the
295
+ # L2 cruise dataarrays
296
+ # TODO: make bottom optional
297
+ # TODO: Only checking the first channel for now. Need to average across all channels
298
+ # in the future. See https://github.com/CI-CMG/water-column-sonar-processing/issues/11
299
+ if "detected_seafloor_depth" in input_xr.variables:
300
+ print(
301
+ "Found detected_seafloor_depth, adding dataset to output store."
302
+ )
303
+ detected_seafloor_depth = input_xr.detected_seafloor_depth.values
304
+ detected_seafloor_depth[detected_seafloor_depth == 0.0] = np.nan
305
+ # TODO: problem here: Processing file: D20070711-T210709.
306
+
307
+ # Use the lowest frequencies to determine bottom
308
+ detected_seafloor_depths = detected_seafloor_depth[0, :]
309
+
310
+ detected_seafloor_depths[detected_seafloor_depths == 0.0] = np.nan
311
+ print(f"min depth measured: {np.nanmin(detected_seafloor_depths)}")
312
+ print(f"max depth measured: {np.nanmax(detected_seafloor_depths)}")
313
+ # available_indices = np.argwhere(np.isnan(geospatial['latitude'].values))
314
+ output_zarr_store["bottom"][
315
+ start_ping_time_index:end_ping_time_index
316
+ ] = detected_seafloor_depths
317
+ #
318
+ #########################################################################
319
+ # [5] write subset of latitude/longitude
320
+ output_zarr_store["latitude"][
321
+ start_ping_time_index:end_ping_time_index
322
+ ] = geospatial.dropna()[
323
+ "latitude"
324
+ ].values # TODO: get from ds_sv directly, dont need geojson anymore
325
+ output_zarr_store["longitude"][
326
+ start_ping_time_index:end_ping_time_index
327
+ ] = geospatial.dropna()["longitude"].values
328
+ #########################################################################
329
+ #########################################################################
330
+ except Exception as err:
331
+ raise RuntimeError(f"Problem with resample_regrid, {err}")
332
+ finally:
333
+ print("Exiting resample_regrid.")
334
+ # TODO: read across times and verify dataset was written?
335
+
336
+ #######################################################
337
+
338
+
339
+ ###########################################################
@@ -0,0 +1,11 @@
1
+ from .elevation_manager import ElevationManager
2
+ from .geometry_manager import GeometryManager
3
+ from .line_simplification import LineSimplification
4
+ from .pmtile_generation import PMTileGeneration
5
+
6
+ __all__ = [
7
+ "ElevationManager",
8
+ "GeometryManager",
9
+ "LineSimplification",
10
+ "PMTileGeneration",
11
+ ]
@@ -0,0 +1,111 @@
1
+ """
2
+ https://gis.ngdc.noaa.gov/arcgis/rest/services/DEM_mosaics/DEM_global_mosaic/ImageServer/identify?geometry=-31.70235%2C13.03332&geometryType=esriGeometryPoint&returnGeometry=false&returnCatalogItems=false&f=json
3
+
4
+ https://gis.ngdc.noaa.gov/arcgis/rest/services/DEM_mosaics/DEM_global_mosaic/ImageServer/
5
+ identify?
6
+ geometry=-31.70235%2C13.03332
7
+ &geometryType=esriGeometryPoint
8
+ &returnGeometry=false
9
+ &returnCatalogItems=false
10
+ &f=json
11
+ {"objectId":0,"name":"Pixel","value":"-5733","location":{"x":-31.702349999999999,"y":13.03332,"spatialReference":{"wkid":4326,"latestWkid":4326}},"properties":null,"catalogItems":null,"catalogItemVisibilities":[]}
12
+ -5733
13
+
14
+ (base) rudy:deleteME rudy$ curl https://api.opentopodata.org/v1/gebco2020?locations=13.03332,-31.70235
15
+ {
16
+ "results": [
17
+ {
18
+ "dataset": "gebco2020",
19
+ "elevation": -5729.0,
20
+ "location": {
21
+ "lat": 13.03332,
22
+ "lng": -31.70235
23
+ }
24
+ }
25
+ ],
26
+ "status": "OK"
27
+ }
28
+ """
29
+
30
+ import json
31
+ import time
32
+ from collections.abc import Generator
33
+
34
+ import requests
35
+
36
+
37
+ def chunked(ll: list, n: int) -> Generator:
38
+ # Yields successively n-sized chunks from ll.
39
+ for i in range(0, len(ll), n):
40
+ yield ll[i : i + n]
41
+
42
+
43
+ class ElevationManager:
44
+ #######################################################
45
+ def __init__(
46
+ self,
47
+ ):
48
+ self.DECIMAL_PRECISION = 5 # precision for GPS coordinates
49
+ self.TIMOUT_SECONDS = 10
50
+
51
+ #######################################################
52
+ def get_arcgis_elevation(
53
+ self,
54
+ lngs: list,
55
+ lats: list,
56
+ chunk_size: int = 500, # I think this is the api limit
57
+ ) -> int:
58
+ # Reference: https://developers.arcgis.com/rest/services-reference/enterprise/map-to-image/
59
+ # Info: https://www.arcgis.com/home/item.html?id=c876e3c96a8642ab8557646a3b4fa0ff
60
+ ### 'https://gis.ngdc.noaa.gov/arcgis/rest/services/DEM_mosaics/DEM_global_mosaic/ImageServer/identify?geometry={"points":[[-31.70235,13.03332],[-32.70235,14.03332]]}&geometryType=esriGeometryMultipoint&returnGeometry=false&returnCatalogItems=false&f=json'
61
+ if len(lngs) != len(lats):
62
+ raise ValueError("lngs and lats must have same length")
63
+
64
+ geometryType = "esriGeometryMultipoint" # TODO: allow single point?
65
+
66
+ depths = []
67
+
68
+ list_of_points = [list(elem) for elem in list(zip(lngs, lats))]
69
+ for chunk in chunked(list_of_points, chunk_size):
70
+ time.sleep(0.1)
71
+ # order: (lng, lat)
72
+ geometry = f'{{"points":{str(chunk)}}}'
73
+ url = f"https://gis.ngdc.noaa.gov/arcgis/rest/services/DEM_mosaics/DEM_global_mosaic/ImageServer/identify?geometry={geometry}&geometryType={geometryType}&returnGeometry=false&returnCatalogItems=false&f=json"
74
+ result = requests.get(url, timeout=self.TIMOUT_SECONDS)
75
+ res = json.loads(result.content.decode("utf8"))
76
+ if "results" in res:
77
+ for element in res["results"]:
78
+ depths.append(float(element["value"]))
79
+ elif "value" in res:
80
+ depths.append(float(res["value"]))
81
+
82
+ return depths
83
+
84
+ # def get_gebco_bathymetry_elevation(self) -> int:
85
+ # # Documentation: https://www.opentopodata.org/datasets/gebco2020/
86
+ # latitude = 13.03332
87
+ # longitude = -31.70235
88
+ # dataset = "gebco2020"
89
+ # url = f"https://api.opentopodata.org/v1/{dataset}?locations={latitude},{longitude}"
90
+ # pass
91
+
92
+ # def get_elevation(
93
+ # self,
94
+ # df,
95
+ # lat_column,
96
+ # lon_column,
97
+ # ) -> int:
98
+ # """Query service using lat, lon. add the elevation values as a new column."""
99
+ # url = r'https://epqs.nationalmap.gov/v1/json?'
100
+ # elevations = []
101
+ # for lat, lon in zip(df[lat_column], df[lon_column]):
102
+ # # define rest query params
103
+ # params = {
104
+ # 'output': 'json',
105
+ # 'x': lon,
106
+ # 'y': lat,
107
+ # 'units': 'Meters'
108
+ # }
109
+ # result = requests.get((url + urllib.parse.urlencode(params)))
110
+ # elevations.append(result.json()['value'])
111
+ # return elevations