water-column-sonar-processing 25.11.1__py3-none-any.whl → 26.1.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of water-column-sonar-processing might be problematic. Click here for more details.

Files changed (22) hide show
  1. water_column_sonar_processing/aws/s3_manager.py +2 -4
  2. water_column_sonar_processing/aws/s3fs_manager.py +1 -9
  3. water_column_sonar_processing/cruise/create_empty_zarr_store.py +19 -81
  4. water_column_sonar_processing/cruise/resample_regrid.py +88 -104
  5. water_column_sonar_processing/geometry/__init__.py +2 -0
  6. water_column_sonar_processing/geometry/elevation_manager.py +2 -2
  7. water_column_sonar_processing/geometry/geometry_manager.py +11 -13
  8. water_column_sonar_processing/geometry/line_simplification.py +10 -10
  9. water_column_sonar_processing/geometry/pmtile_generation.py +8 -3
  10. water_column_sonar_processing/geometry/spatiotemporal.py +106 -0
  11. water_column_sonar_processing/index/index_manager.py +43 -46
  12. water_column_sonar_processing/model/zarr_manager.py +533 -514
  13. water_column_sonar_processing/processing/raw_to_zarr.py +45 -139
  14. water_column_sonar_processing/utility/cleaner.py +2 -1
  15. water_column_sonar_processing/utility/constants.py +29 -29
  16. water_column_sonar_processing-26.1.14.dist-info/METADATA +240 -0
  17. {water_column_sonar_processing-25.11.1.dist-info → water_column_sonar_processing-26.1.14.dist-info}/RECORD +20 -20
  18. water_column_sonar_processing/process.py +0 -149
  19. water_column_sonar_processing-25.11.1.dist-info/METADATA +0 -182
  20. {water_column_sonar_processing-25.11.1.dist-info → water_column_sonar_processing-26.1.14.dist-info}/WHEEL +0 -0
  21. {water_column_sonar_processing-25.11.1.dist-info → water_column_sonar_processing-26.1.14.dist-info}/licenses/LICENSE +0 -0
  22. {water_column_sonar_processing-25.11.1.dist-info → water_column_sonar_processing-26.1.14.dist-info}/top_level.txt +0 -0
@@ -31,8 +31,6 @@ class S3Manager:
31
31
  endpoint_url: Optional[str] = None,
32
32
  ):
33
33
  self.endpoint_url = endpoint_url
34
- # self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
35
- # self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
36
34
  self.s3_region = os.environ.get("AWS_REGION", default="us-east-1")
37
35
  self.s3_client_config = Config(max_pool_connections=MAX_POOL_CONNECTIONS)
38
36
  self.s3_transfer_config = TransferConfig(
@@ -56,6 +54,7 @@ class S3Manager:
56
54
  service_name="s3",
57
55
  config=self.s3_client_config,
58
56
  region_name=self.s3_region,
57
+ endpoint_url=self.endpoint_url,
59
58
  )
60
59
  self.s3_session_noaa_wcsd_zarr_pds = boto3.Session(
61
60
  aws_access_key_id=os.environ.get("OUTPUT_BUCKET_ACCESS_KEY"),
@@ -76,6 +75,7 @@ class S3Manager:
76
75
  endpoint_url=self.endpoint_url,
77
76
  )
78
77
  )
78
+ #
79
79
  self.paginator = self.s3_client.get_paginator("list_objects_v2")
80
80
  self.paginator_noaa_wcsd_zarr_pds = (
81
81
  self.s3_client_noaa_wcsd_zarr_pds.get_paginator("list_objects_v2")
@@ -117,7 +117,6 @@ class S3Manager:
117
117
  return client.list_buckets()
118
118
 
119
119
  #####################################################################
120
- # tested
121
120
  def upload_nodd_file(
122
121
  self,
123
122
  file_name: str,
@@ -133,7 +132,6 @@ class S3Manager:
133
132
  return key
134
133
 
135
134
  #####################################################################
136
- # tested
137
135
  def upload_files_with_thread_pool_executor(
138
136
  self,
139
137
  output_bucket_name: str,
@@ -3,6 +3,7 @@ from typing import Optional
3
3
 
4
4
  import s3fs
5
5
 
6
+
6
7
  # TODO: S3FS_LOGGING_LEVEL=DEBUG
7
8
  # S3FS_LOGGING_LEVEL=DEBUG
8
9
 
@@ -21,17 +22,8 @@ class S3FSManager:
21
22
  endpoint_url=endpoint_url,
22
23
  key=os.environ.get("OUTPUT_BUCKET_ACCESS_KEY"),
23
24
  secret=os.environ.get("OUTPUT_BUCKET_SECRET_ACCESS_KEY"),
24
- # asynchronous=True,
25
25
  )
26
- # self.s3fs.ls("")
27
26
 
28
- # s3_fs = s3fs.S3FileSystem( # TODO: use s3fs_manager?
29
- # anon=True,
30
- # client_kwargs={
31
- # "endpoint_url": moto_server,
32
- # "region_name": "us-east-1",
33
- # },
34
- # )
35
27
  #####################################################################
36
28
  def s3_map(
37
29
  self,
@@ -6,6 +6,7 @@ import numpy as np
6
6
  from water_column_sonar_processing.aws import DynamoDBManager, S3Manager
7
7
  from water_column_sonar_processing.model import ZarrManager
8
8
  from water_column_sonar_processing.utility import Cleaner
9
+ from water_column_sonar_processing.utility import Constants
9
10
 
10
11
 
11
12
  # TODO: change name to "CreateLocalEmptyZarrStore"
@@ -19,52 +20,21 @@ class CreateEmptyZarrStore:
19
20
  # self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
20
21
 
21
22
  #######################################################
22
- # TODO: moved this to the s3_manager
23
- # def upload_zarr_store_to_s3(
24
- # self,
25
- # output_bucket_name: str,
26
- # local_directory: str,
27
- # object_prefix: str,
28
- # cruise_name: str,
29
- # ) -> None:
30
- # print("uploading model store to s3")
31
- # s3_manager = S3Manager()
32
- # #
33
- # print("Starting upload with thread pool executor.")
34
- # # # 'all_files' is passed a list of lists: [[local_path, s3_key], [...], ...]
35
- # all_files = []
36
- # for subdir, dirs, files in os.walk(f"{local_directory}/{cruise_name}.zarr"):
37
- # for file in files:
38
- # local_path = os.path.join(subdir, file)
39
- # # TODO: find a better method for splitting strings here:
40
- # # 'level_2/Henry_B._Bigelow/HB0806/EK60/HB0806.zarr/.zattrs'
41
- # s3_key = f"{object_prefix}/{cruise_name}.zarr{local_path.split(f'{cruise_name}.zarr')[-1]}"
42
- # all_files.append([local_path, s3_key])
43
- # #
44
- # # print(all_files)
45
- # s3_manager.upload_files_with_thread_pool_executor(
46
- # output_bucket_name=output_bucket_name,
47
- # all_files=all_files,
48
- # )
49
- # print("Done uploading with thread pool executor.")
50
- # # TODO: move to common place
51
-
52
- #######################################################
23
+ @staticmethod
53
24
  def create_cruise_level_zarr_store(
54
- self,
55
25
  output_bucket_name: str,
56
26
  ship_name: str,
57
27
  cruise_name: str,
58
28
  sensor_name: str,
59
29
  table_name: str,
60
- # override_cruise_min_epsilon=None,
61
30
  ) -> None:
62
31
  """
63
- Initialize zarr store. The water_level needs to be integrated.
32
+ Initialize zarr store for the entire cruise which aggregates all the raw data.
33
+ All cruises will be resampled at 20 cm depth.
34
+ # tempdir="/tmp", # TODO: create better tmp directory for testing
64
35
  """
65
36
  tempdir = tempfile.TemporaryDirectory()
66
37
  try:
67
- # HB0806 - 123, HB0903 - 220
68
38
  dynamo_db_manager = DynamoDBManager()
69
39
  s3_manager = S3Manager()
70
40
 
@@ -76,7 +46,7 @@ class CreateEmptyZarrStore:
76
46
  # TODO: filter the dataframe just for enums >= LEVEL_1_PROCESSING
77
47
  # df[df['PIPELINE_STATUS'] < PipelineStatus.LEVEL_1_PROCESSING] = np.nan
78
48
 
79
- # TODO: VERIFY GEOJSON EXISTS as prerequisite!!!
49
+ # TODO: VERIFY GEOJSON EXISTS as prerequisite!!! ...no more geojson needed
80
50
 
81
51
  print(f"DataFrame shape: {df.shape}")
82
52
  cruise_channels = list(
@@ -88,18 +58,11 @@ class CreateEmptyZarrStore:
88
58
  df["NUM_PING_TIME_DROPNA"].dropna().astype(int)
89
59
  )
90
60
 
91
- # [3] calculate the max/min measurement resolutions for the whole cruise
92
- # cruise_min_echo_range = np.min(
93
- # (df["MIN_ECHO_RANGE"] + df["WATER_LEVEL"]).dropna().astype(float)
94
- # )
95
-
96
- # [4] calculate the np.max(max_echo_range + water_level)
61
+ # [4] max measurement resolution for the whole cruise
62
+ # Each max-echo-range is paired with water-level and then find the max of that
97
63
  cruise_max_echo_range = np.max(
98
64
  (df["MAX_ECHO_RANGE"] + df["WATER_LEVEL"]).dropna().astype(float)
99
- )
100
-
101
- # TODO: set this to either 1 or 0.5 meters
102
- cruise_min_epsilon = np.min(df["MIN_ECHO_RANGE"].dropna().astype(float))
65
+ ) # max_echo_range now includes water_level
103
66
 
104
67
  print(f"cruise_max_echo_range: {cruise_max_echo_range}")
105
68
 
@@ -107,21 +70,18 @@ class CreateEmptyZarrStore:
107
70
  cruise_frequencies = [
108
71
  float(i) for i in df["FREQUENCIES"].dropna().values.flatten()[0]
109
72
  ]
110
- print(cruise_frequencies)
111
73
 
112
74
  new_width = int(consolidated_zarr_width)
113
- print(f"new_width: {new_width}")
114
- #################################################################
115
- store_name = f"{cruise_name}.zarr"
116
- print(store_name)
117
75
  ################################################################
118
- # Delete existing model store if it exists
119
- zarr_prefix = os.path.join("level_2", ship_name, cruise_name, sensor_name)
76
+ # Delete any existing stores
77
+ zarr_prefix = os.path.join(
78
+ str(Constants.LEVEL_2.value), ship_name, cruise_name, sensor_name
79
+ )
120
80
  child_objects = s3_manager.get_child_objects(
121
81
  bucket_name=output_bucket_name,
122
82
  sub_prefix=zarr_prefix,
123
83
  )
124
- #
84
+
125
85
  if len(child_objects) > 0:
126
86
  s3_manager.delete_nodd_objects(
127
87
  bucket_name=output_bucket_name,
@@ -130,50 +90,28 @@ class CreateEmptyZarrStore:
130
90
  ################################################################
131
91
  # Create new model store
132
92
  zarr_manager = ZarrManager()
133
- new_height = len( # [0.19m down to 1001.744m] = 5272 samples, 10.3 tiles @ 512
134
- zarr_manager.get_depth_values( # these depths should be from min_epsilon to max_range+water_level
135
- # min_echo_range=cruise_min_echo_range,
136
- max_echo_range=cruise_max_echo_range,
137
- cruise_min_epsilon=cruise_min_epsilon,
138
- )
139
- )
140
- print(f"new_height: {new_height}")
141
-
142
93
  zarr_manager.create_zarr_store(
143
- path=tempdir.name, # TODO: need to use .name or problem
94
+ path=tempdir.name,
144
95
  ship_name=ship_name,
145
96
  cruise_name=cruise_name,
146
97
  sensor_name=sensor_name,
147
98
  frequencies=cruise_frequencies,
148
99
  width=new_width,
149
- # min_echo_range=cruise_min_echo_range,
150
100
  max_echo_range=cruise_max_echo_range,
151
- cruise_min_epsilon=cruise_min_epsilon,
101
+ # cruise_min_epsilon=cruise_min_epsilon,
152
102
  calibration_status=True,
153
103
  )
154
104
  #################################################################
105
+ # TODO: would be more elegant to create directly into s3 bucket
155
106
  s3_manager.upload_zarr_store_to_s3(
156
107
  output_bucket_name=output_bucket_name,
157
- local_directory=tempdir.name, # TODO: need to use .name or problem
108
+ local_directory=tempdir.name,
158
109
  object_prefix=zarr_prefix,
159
110
  cruise_name=cruise_name,
160
111
  )
161
- # https://noaa-wcsd-zarr-pds.s3.amazonaws.com/index.html
162
112
  #################################################################
163
- # Verify count of the files uploaded
164
- # count = self.__get_file_count(store_name=store_name)
165
- # #
166
- # raw_zarr_files = self.__get_s3_files( # TODO: just need count
167
- # bucket_name=self.__output_bucket,
168
- # sub_prefix=os.path.join(zarr_prefix, store_name),
169
- # )
170
- # if len(raw_zarr_files) != count:
171
- # print(f'Problem writing {store_name} with proper count {count}.')
172
- # raise Exception("File count doesnt equal number of s3 Zarr store files.")
173
- # else:
174
- # print("File counts match.")
113
+ # TODO: verify count of the files uploaded
175
114
  #################################################################
176
- # Success
177
115
  # TODO: update enum in dynamodb
178
116
  print("Done creating cruise level zarr store.")
179
117
  #################################################################
@@ -3,11 +3,9 @@ import warnings
3
3
  from pathlib import Path
4
4
 
5
5
  import numpy as np
6
- import pandas as pd
7
6
  import xarray as xr
8
7
 
9
8
  from water_column_sonar_processing.aws import DynamoDBManager
10
- from water_column_sonar_processing.geometry import GeometryManager
11
9
  from water_column_sonar_processing.model import ZarrManager
12
10
 
13
11
  warnings.simplefilter("ignore", category=RuntimeWarning)
@@ -19,28 +17,29 @@ class ResampleRegrid:
19
17
  self,
20
18
  ):
21
19
  self.__overwrite = True
22
- # self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
23
- # self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
24
20
  self.dtype = "float32"
25
21
 
26
22
  #################################################################
27
23
  def interpolate_data(
28
24
  self,
29
- input_xr,
30
- ping_times,
31
- all_cruise_depth_values, # includes water_level offset
32
- water_level, # this is the offset that will be added to each respective file
25
+ input_xr: xr.Dataset,
26
+ ping_times: np.ndarray,
27
+ all_cruise_depth_values: np.ndarray, # includes water_level offset
28
+ water_level: float = 0.0,
33
29
  ) -> np.ndarray:
34
30
  """
35
- What gets passed into interpolate data
31
+ Input dataset is passed in along with times and depth values to regrid to.
36
32
  """
37
33
  print("Interpolating dataset.")
38
34
  try:
35
+ # add offset for the water level to the whole input xarray
36
+ input_xr.depth.values = input_xr.depth.values + water_level
37
+
39
38
  data = np.empty(
40
- (
39
+ ( # Depth / Time / Frequency
41
40
  len(all_cruise_depth_values),
42
41
  len(ping_times),
43
- len(input_xr.frequency_nominal),
42
+ len(input_xr.frequency_nominal.values),
44
43
  ),
45
44
  dtype=self.dtype,
46
45
  )
@@ -49,36 +48,27 @@ class ResampleRegrid:
49
48
 
50
49
  regrid_resample = xr.DataArray( # where data will be written to
51
50
  data=data,
52
- dims=("depth", "time", "frequency"),
53
51
  coords={
54
52
  "depth": all_cruise_depth_values,
55
53
  "time": ping_times,
56
54
  "frequency": input_xr.frequency_nominal.values,
57
55
  },
56
+ dims=("depth", "time", "frequency"),
57
+ name="Sv",
58
58
  )
59
59
 
60
- # shift the input data by water_level
61
- input_xr.echo_range.values = (
62
- input_xr.echo_range.values + water_level
63
- ) # water_level # TODO: change
64
-
65
60
  channels = input_xr.channel.values
66
- for channel in range(
67
- len(channels)
68
- ): # ?TODO: leaving off here, need to subset for just indices in time axis
61
+ for channel in range(len(channels)):
69
62
  gc.collect()
70
63
  max_depths = np.nanmax(
71
- a=input_xr.echo_range.sel(channel=input_xr.channel[channel]).values,
64
+ a=input_xr.depth.sel(channel=input_xr.channel[channel]).values,
72
65
  # + water_level,
73
66
  axis=1,
74
67
  )
75
- superset_of_max_depths = set(
76
- max_depths
77
- ) # HB1501, D20150503-T102035.raw, TypeError: unhashable type: 'numpy.ndarray'
68
+ superset_of_max_depths = set(max_depths)
78
69
  set_of_max_depths = list(
79
70
  {x for x in superset_of_max_depths if x == x}
80
- ) # removes nan's
81
- # iterate through partitions of dataset with similar depths and resample
71
+ ) # To speed things up resample in groups denoted by max_depth -- so samples might no longer be adjacent
82
72
  for select_max_depth in set_of_max_depths:
83
73
  # TODO: for nan just skip and leave all nan's
84
74
  select_indices = [
@@ -87,46 +77,50 @@ class ResampleRegrid:
87
77
  if max_depths[i] == select_max_depth
88
78
  ]
89
79
 
90
- # now create new DataArray with proper dimension and indices
91
- # data_select = input_xr.Sv.sel(
92
- # channel=input_xr.channel[channel]
93
- # ).values[select_indices, :].T # TODO: dont like this transpose
94
80
  data_select = input_xr.Sv.sel(channel=input_xr.channel[channel])[
95
81
  select_indices, :
96
82
  ].T.values
97
- # change from ".values[select_indices, :].T" to "[select_indices, :].values.T"
98
83
 
99
84
  times_select = input_xr.ping_time.values[select_indices]
100
- depths_select = input_xr.echo_range.sel(
101
- channel=input_xr.channel[channel]
102
- ).values[
103
- select_indices[0], :
104
- ] # '0' because all others in group should be same
105
-
85
+ # input_xr.depth[0][0] -> [0., 499.9] before
86
+ # input_xr.depth.values = input_xr.depth.values + water_level # issue here!! overwritting all the data
87
+ # input_xr.depth[0][0] -> [7.5, 507.40] after
88
+ depths_all = input_xr.depth.sel(
89
+ channel=input_xr.channel[channel],
90
+ ping_time=input_xr.ping_time[select_indices[0]],
91
+ ).values
92
+ depths_select = depths_all[~np.isnan(depths_all)]
93
+ #
106
94
  da_select = xr.DataArray(
107
- data=data_select,
95
+ data=data_select[: len(depths_select), :],
108
96
  dims=("depth", "time"),
109
97
  coords={
110
98
  "depth": depths_select,
111
99
  "time": times_select,
112
100
  },
113
- ).dropna(dim="depth")
114
- resampled = da_select.interp(
115
- depth=all_cruise_depth_values, method="nearest"
116
101
  )
117
- # write to the resample array
118
- regrid_resample.loc[
102
+ # 'resampled' is now the interpolated superset of new dimensions
103
+ resampled = da_select.interp( # need to define the data with water level (domain)
104
+ depth=all_cruise_depth_values, # and need to interpolate over the (range)
105
+ method="nearest",
106
+ assume_sorted=True,
107
+ ) # good through here, @27 is -3.11 which is 5.4 m depth
108
+
109
+ ### write to outptut ###
110
+ regrid_resample.loc[ # ~150 MB for 5001x7706x4
119
111
  dict(
120
112
  time=times_select,
121
113
  frequency=input_xr.frequency_nominal.values[channel],
122
114
  )
123
115
  ] = resampled
124
- print(f"updated {len(times_select)} ping times")
116
+ # print(f"updated {len(times_select)} ping times")
125
117
  gc.collect()
118
+ return regrid_resample.values.copy()
126
119
  except Exception as err:
127
120
  raise RuntimeError(f"Problem finding the dynamodb table, {err}")
128
- print("Done interpolating dataset.")
129
- return regrid_resample.values.copy()
121
+ finally:
122
+ gc.collect()
123
+ print("Done interpolating dataset.")
130
124
 
131
125
  #################################################################
132
126
  def resample_regrid(
@@ -137,7 +131,6 @@ class ResampleRegrid:
137
131
  table_name,
138
132
  bucket_name,
139
133
  override_select_files=None,
140
- # override_cruise_min_epsilon=None,
141
134
  endpoint_url=None,
142
135
  ) -> None:
143
136
  """
@@ -149,7 +142,6 @@ class ResampleRegrid:
149
142
  print("Resample Regrid, Interpolating dataset.")
150
143
  try:
151
144
  zarr_manager = ZarrManager()
152
- geo_manager = GeometryManager()
153
145
 
154
146
  output_zarr_store = zarr_manager.open_s3_zarr_store_with_zarr(
155
147
  ship_name=ship_name,
@@ -159,12 +151,9 @@ class ResampleRegrid:
159
151
  endpoint_url=endpoint_url,
160
152
  )
161
153
 
162
- # get dynamo stuff
163
154
  dynamo_db_manager = DynamoDBManager()
164
155
  cruise_df = dynamo_db_manager.get_table_as_df(
165
- # ship_name=ship_name,
166
156
  cruise_name=cruise_name,
167
- # sensor_name=sensor_name,
168
157
  table_name=table_name,
169
158
  )
170
159
 
@@ -182,6 +171,7 @@ class ResampleRegrid:
182
171
  print(f"Processing file: {file_name_stem}.")
183
172
 
184
173
  if f"{file_name_stem}.raw" not in list(cruise_df["FILE_NAME"]):
174
+ print("Raw file file_stem not found in dynamodb.")
185
175
  raise Exception("Raw file file_stem not found in dynamodb.")
186
176
 
187
177
  # status = PipelineStatus['LEVEL_1_PROCESSING']
@@ -195,20 +185,21 @@ class ResampleRegrid:
195
185
  ]
196
186
  )
197
187
 
198
- # Get input store — this is unadjusted for water_level
188
+ # Get input store
199
189
  input_xr_zarr_store = zarr_manager.open_s3_zarr_store_with_xarray(
200
190
  ship_name=ship_name,
201
191
  cruise_name=cruise_name,
202
192
  sensor_name=sensor_name,
203
193
  file_name_stem=file_name_stem,
204
- input_bucket_name=bucket_name,
194
+ bucket_name=bucket_name,
205
195
  endpoint_url=endpoint_url,
206
196
  )
207
197
 
198
+ #########################################################################
208
199
  # This is the vertical offset of the sensor related to the ocean surface
209
200
  # See https://echopype.readthedocs.io/en/stable/data-proc-additional.html
210
201
  if "water_level" in input_xr_zarr_store.keys():
211
- water_level = input_xr_zarr_store.water_level.values
202
+ water_level = float(input_xr_zarr_store.water_level.values)
212
203
  else:
213
204
  water_level = 0.0
214
205
  #########################################################################
@@ -224,60 +215,52 @@ class ResampleRegrid:
224
215
  start_ping_time_index = ping_time_cumsum[index]
225
216
  end_ping_time_index = ping_time_cumsum[index + 1]
226
217
 
227
- max_echo_range = np.max(
218
+ max_echo_range = np.max( # Should water level go in here?
228
219
  (cruise_df["MAX_ECHO_RANGE"] + cruise_df["WATER_LEVEL"])
229
220
  .dropna()
230
- .astype(float)
231
- )
232
- cruise_min_epsilon = np.min(
233
- cruise_df["MIN_ECHO_RANGE"].dropna().astype(float)
221
+ .astype(np.float32)
234
222
  )
223
+ # cruise_min_epsilon = np.min(
224
+ # cruise_df["MIN_ECHO_RANGE"].dropna().astype(float)
225
+ # ) # TODO: currently overwriting to 0.25 m
235
226
 
236
- # Note: cruise dims (depth, time, frequency)
237
- all_cruise_depth_values = zarr_manager.get_depth_values( # needs to integrate water_level
238
- # min_echo_range=min_echo_range,
239
- max_echo_range=max_echo_range, # does it here
240
- cruise_min_epsilon=cruise_min_epsilon, # remove this & integrate into min_echo_range
241
- ) # with offset of 7.5 meters, 0 meter measurement should now start at 7.5 meters
227
+ all_cruise_depth_values = zarr_manager.get_depth_values(
228
+ max_echo_range=max_echo_range,
229
+ # cruise_min_epsilon=cruise_min_epsilon,
230
+ )
242
231
 
243
- print(" ".join(list(input_xr_zarr_store.Sv.dims)))
244
- if set(input_xr_zarr_store.Sv.dims) != {
232
+ if set(
233
+ input_xr_zarr_store.Sv.dims
234
+ ) != { # Cruise dimensions are: (depth, time, frequency)
245
235
  "channel",
246
236
  "ping_time",
247
237
  "range_sample",
248
238
  }:
249
239
  raise Exception("Xarray dimensions are not as expected.")
250
240
 
251
- indices, geospatial = geo_manager.read_s3_geo_json(
252
- ship_name=ship_name,
253
- cruise_name=cruise_name,
254
- sensor_name=sensor_name,
255
- file_name_stem=file_name_stem,
256
- input_xr_zarr_store=input_xr_zarr_store,
257
- endpoint_url=endpoint_url,
258
- output_bucket_name=bucket_name,
259
- )
241
+ # indices, geospatial = geo_manager.read_s3_geo_json( # TODO: remove this!!!!
242
+ # ship_name=ship_name,
243
+ # cruise_name=cruise_name,
244
+ # sensor_name=sensor_name,
245
+ # file_name_stem=file_name_stem,
246
+ # input_xr_zarr_store=input_xr_zarr_store,
247
+ # endpoint_url=endpoint_url,
248
+ # output_bucket_name=bucket_name,
249
+ # )
260
250
 
261
- input_xr = input_xr_zarr_store.isel(
262
- ping_time=indices
263
- ) # Problem with HB200802-D20080310-T174959.zarr/
251
+ input_xr = input_xr_zarr_store # .isel(ping_time=indices)
264
252
 
265
253
  ping_times = input_xr.ping_time.values
266
- # Date format: numpy.datetime64('2007-07-20T02:10:25.845073920') converts to "1184897425.845074"
267
- epoch_seconds = [
268
- (pd.Timestamp(i) - pd.Timestamp("1970-01-01")) / pd.Timedelta("1s")
269
- for i in ping_times
270
- ]
271
254
  output_zarr_store["time"][start_ping_time_index:end_ping_time_index] = (
272
- epoch_seconds
255
+ input_xr.ping_time.data
273
256
  )
274
257
 
275
- # --- UPDATING --- #
258
+ # --- UPDATING --- # # TODO: problem, this returns dimensionless array
276
259
  regrid_resample = self.interpolate_data(
277
260
  input_xr=input_xr,
278
261
  ping_times=ping_times,
279
262
  all_cruise_depth_values=all_cruise_depth_values, # should accommodate the water_level already
280
- water_level=water_level, # not applied to anything yet
263
+ water_level=water_level,
281
264
  )
282
265
 
283
266
  print(
@@ -288,50 +271,51 @@ class ResampleRegrid:
288
271
 
289
272
  for fff in range(regrid_resample.shape[-1]):
290
273
  output_zarr_store["Sv"][
291
- :, start_ping_time_index:end_ping_time_index, fff
274
+ : regrid_resample[:, :, fff].shape[0],
275
+ start_ping_time_index:end_ping_time_index,
276
+ fff,
292
277
  ] = regrid_resample[:, :, fff]
293
278
  #########################################################################
294
- # TODO: add the "detected_seafloor_depth/" to the
295
- # L2 cruise dataarrays
296
- # TODO: make bottom optional
297
- # TODO: Only checking the first channel for now. Need to average across all channels
298
279
  # in the future. See https://github.com/CI-CMG/water-column-sonar-processing/issues/11
299
- if "detected_seafloor_depth" in input_xr.variables:
300
- print(
301
- "Found detected_seafloor_depth, adding dataset to output store."
302
- )
280
+ if "detected_seafloor_depth" in list(input_xr.variables):
281
+ print("Adding detected_seafloor_depth to output")
303
282
  detected_seafloor_depth = input_xr.detected_seafloor_depth.values
304
283
  detected_seafloor_depth[detected_seafloor_depth == 0.0] = np.nan
305
- # TODO: problem here: Processing file: D20070711-T210709.
306
284
 
307
- # Use the lowest frequencies to determine bottom
285
+ # As requested, use the lowest frequencies to determine bottom
308
286
  detected_seafloor_depths = detected_seafloor_depth[0, :]
309
287
 
310
288
  detected_seafloor_depths[detected_seafloor_depths == 0.0] = np.nan
311
289
  print(f"min depth measured: {np.nanmin(detected_seafloor_depths)}")
312
290
  print(f"max depth measured: {np.nanmax(detected_seafloor_depths)}")
313
- # available_indices = np.argwhere(np.isnan(geospatial['latitude'].values))
314
291
  output_zarr_store["bottom"][
315
292
  start_ping_time_index:end_ping_time_index
316
293
  ] = detected_seafloor_depths
317
294
  #
318
295
  #########################################################################
319
296
  # [5] write subset of latitude/longitude
297
+ # output_zarr_store["latitude"][
298
+ # start_ping_time_index:end_ping_time_index
299
+ # ] = geospatial.dropna()[
300
+ # "latitude"
301
+ # ].values # TODO: get from ds_sv directly, dont need geojson anymore
302
+ # output_zarr_store["longitude"][
303
+ # start_ping_time_index:end_ping_time_index
304
+ # ] = geospatial.dropna()["longitude"].values
305
+ #########################################################################
320
306
  output_zarr_store["latitude"][
321
307
  start_ping_time_index:end_ping_time_index
322
- ] = geospatial.dropna()[
323
- "latitude"
324
- ].values # TODO: get from ds_sv directly, dont need geojson anymore
308
+ ] = input_xr_zarr_store.latitude.dropna(dim="ping_time").values
325
309
  output_zarr_store["longitude"][
326
310
  start_ping_time_index:end_ping_time_index
327
- ] = geospatial.dropna()["longitude"].values
328
- #########################################################################
311
+ ] = input_xr_zarr_store.longitude.dropna(dim="ping_time").values
329
312
  #########################################################################
330
313
  except Exception as err:
331
314
  raise RuntimeError(f"Problem with resample_regrid, {err}")
332
315
  finally:
333
316
  print("Exiting resample_regrid.")
334
317
  # TODO: read across times and verify dataset was written?
318
+ gc.collect()
335
319
 
336
320
  #######################################################
337
321
 
@@ -2,10 +2,12 @@ from .elevation_manager import ElevationManager
2
2
  from .geometry_manager import GeometryManager
3
3
  from .line_simplification import LineSimplification
4
4
  from .pmtile_generation import PMTileGeneration
5
+ from .spatiotemporal import Spatiotemporal
5
6
 
6
7
  __all__ = [
7
8
  "ElevationManager",
8
9
  "GeometryManager",
9
10
  "LineSimplification",
10
11
  "PMTileGeneration",
12
+ "Spatiotemporal",
11
13
  ]
@@ -46,7 +46,7 @@ class ElevationManager:
46
46
  self,
47
47
  ):
48
48
  self.DECIMAL_PRECISION = 5 # precision for GPS coordinates
49
- self.TIMOUT_SECONDS = 10
49
+ self.TIMEOUT_SECONDS = 10
50
50
 
51
51
  #######################################################
52
52
  def get_arcgis_elevation(
@@ -71,7 +71,7 @@ class ElevationManager:
71
71
  # order: (lng, lat)
72
72
  geometry = f'{{"points":{str(chunk)}}}'
73
73
  url = f"https://gis.ngdc.noaa.gov/arcgis/rest/services/DEM_mosaics/DEM_global_mosaic/ImageServer/identify?geometry={geometry}&geometryType={geometryType}&returnGeometry=false&returnCatalogItems=false&f=json"
74
- result = requests.get(url, timeout=self.TIMOUT_SECONDS)
74
+ result = requests.get(url, timeout=self.TIMEOUT_SECONDS)
75
75
  res = json.loads(result.content.decode("utf8"))
76
76
  if "results" in res:
77
77
  for element in res["results"]: