water-column-sonar-processing 0.0.5__py3-none-any.whl → 0.0.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- water_column_sonar_processing/__init__.py +16 -0
- water_column_sonar_processing/aws/__init__.py +7 -4
- water_column_sonar_processing/aws/dynamodb_manager.py +70 -49
- water_column_sonar_processing/aws/s3_manager.py +112 -122
- water_column_sonar_processing/aws/s3fs_manager.py +13 -19
- water_column_sonar_processing/aws/sns_manager.py +10 -21
- water_column_sonar_processing/aws/sqs_manager.py +10 -18
- water_column_sonar_processing/cruise/__init__.py +4 -0
- water_column_sonar_processing/cruise/create_empty_zarr_store.py +51 -33
- water_column_sonar_processing/cruise/resample_regrid.py +109 -58
- water_column_sonar_processing/geometry/__init__.py +5 -0
- water_column_sonar_processing/geometry/geometry_manager.py +79 -48
- water_column_sonar_processing/geometry/geometry_simplification.py +13 -12
- water_column_sonar_processing/geometry/pmtile_generation.py +24 -23
- water_column_sonar_processing/index/__init__.py +3 -0
- water_column_sonar_processing/index/index_manager.py +104 -80
- water_column_sonar_processing/model/__init__.py +3 -0
- water_column_sonar_processing/model/zarr_manager.py +113 -75
- water_column_sonar_processing/process.py +76 -69
- water_column_sonar_processing/utility/__init__.py +6 -0
- water_column_sonar_processing/utility/cleaner.py +6 -7
- water_column_sonar_processing/utility/constants.py +42 -35
- water_column_sonar_processing/utility/pipeline_status.py +37 -10
- water_column_sonar_processing/utility/timestamp.py +3 -2
- {water_column_sonar_processing-0.0.5.dist-info → water_column_sonar_processing-0.0.6.dist-info}/METADATA +31 -1
- water_column_sonar_processing-0.0.6.dist-info/RECORD +29 -0
- water_column_sonar_processing-0.0.5.dist-info/RECORD +0 -29
- {water_column_sonar_processing-0.0.5.dist-info → water_column_sonar_processing-0.0.6.dist-info}/LICENSE +0 -0
- {water_column_sonar_processing-0.0.5.dist-info → water_column_sonar_processing-0.0.6.dist-info}/WHEEL +0 -0
- {water_column_sonar_processing-0.0.5.dist-info → water_column_sonar_processing-0.0.6.dist-info}/top_level.txt +0 -0
|
@@ -1,15 +1,15 @@
|
|
|
1
1
|
import gc
|
|
2
2
|
import os
|
|
3
3
|
from pathlib import Path
|
|
4
|
+
|
|
4
5
|
import numcodecs
|
|
5
6
|
import numpy as np
|
|
6
|
-
import xarray as xr
|
|
7
7
|
import pandas as pd
|
|
8
|
+
import xarray as xr
|
|
8
9
|
|
|
9
10
|
from water_column_sonar_processing.aws.dynamodb_manager import DynamoDBManager
|
|
10
|
-
from water_column_sonar_processing.model.zarr_manager import ZarrManager
|
|
11
11
|
from water_column_sonar_processing.geometry.geometry_manager import GeometryManager
|
|
12
|
-
|
|
12
|
+
from water_column_sonar_processing.model.zarr_manager import ZarrManager
|
|
13
13
|
|
|
14
14
|
numcodecs.blosc.use_threads = False
|
|
15
15
|
numcodecs.blosc.set_nthreads(1)
|
|
@@ -19,30 +19,34 @@ numcodecs.blosc.set_nthreads(1)
|
|
|
19
19
|
# ZARR_V3_EXPERIMENTAL_API = 1
|
|
20
20
|
# creates the latlon data: foo = ep.consolidate.add_location(ds_Sv, echodata)
|
|
21
21
|
|
|
22
|
+
|
|
22
23
|
class ResampleRegrid:
|
|
23
24
|
#######################################################
|
|
24
25
|
def __init__(
|
|
25
|
-
|
|
26
|
+
self,
|
|
26
27
|
):
|
|
27
28
|
self.__overwrite = True
|
|
28
29
|
self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
|
|
29
30
|
self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
|
|
30
|
-
self.dtype =
|
|
31
|
+
self.dtype = "float32"
|
|
31
32
|
|
|
32
33
|
#################################################################
|
|
33
34
|
def interpolate_data(
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
35
|
+
self,
|
|
36
|
+
input_xr,
|
|
37
|
+
ping_times,
|
|
38
|
+
all_cruise_depth_values,
|
|
38
39
|
) -> np.ndarray:
|
|
39
40
|
print("Interpolating data.")
|
|
40
41
|
try:
|
|
41
|
-
data = np.empty(
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
42
|
+
data = np.empty(
|
|
43
|
+
(
|
|
44
|
+
len(all_cruise_depth_values),
|
|
45
|
+
len(ping_times),
|
|
46
|
+
len(input_xr.frequency_nominal),
|
|
47
|
+
),
|
|
48
|
+
dtype=self.dtype,
|
|
49
|
+
)
|
|
46
50
|
|
|
47
51
|
data[:] = np.nan
|
|
48
52
|
|
|
@@ -53,37 +57,60 @@ class ResampleRegrid:
|
|
|
53
57
|
"depth": all_cruise_depth_values,
|
|
54
58
|
"time": ping_times,
|
|
55
59
|
"frequency": input_xr.frequency_nominal.values,
|
|
56
|
-
}
|
|
60
|
+
},
|
|
57
61
|
)
|
|
58
62
|
|
|
59
63
|
channels = input_xr.channel.values
|
|
60
|
-
for channel in range(
|
|
61
|
-
|
|
64
|
+
for channel in range(
|
|
65
|
+
len(channels)
|
|
66
|
+
): # TODO: leaving off here, need to subset for just indices in time axis
|
|
67
|
+
print(
|
|
68
|
+
np.nanmax(
|
|
69
|
+
input_xr.echo_range.sel(
|
|
70
|
+
channel=input_xr.channel[channel]
|
|
71
|
+
).values
|
|
72
|
+
)
|
|
73
|
+
)
|
|
62
74
|
#
|
|
63
75
|
max_depths = np.nanmax(
|
|
64
76
|
a=input_xr.echo_range.sel(channel=input_xr.channel[channel]).values,
|
|
65
|
-
axis=1
|
|
77
|
+
axis=1,
|
|
66
78
|
)
|
|
67
79
|
superset_of_max_depths = set(
|
|
68
|
-
np.nanmax(
|
|
80
|
+
np.nanmax(
|
|
81
|
+
input_xr.echo_range.sel(
|
|
82
|
+
channel=input_xr.channel[channel]
|
|
83
|
+
).values,
|
|
84
|
+
1,
|
|
85
|
+
)
|
|
69
86
|
)
|
|
70
|
-
set_of_max_depths = list(
|
|
87
|
+
set_of_max_depths = list(
|
|
88
|
+
{x for x in superset_of_max_depths if x == x}
|
|
89
|
+
) # removes nan's
|
|
71
90
|
# iterate through partitions of data with similar depths and resample
|
|
72
91
|
for select_max_depth in set_of_max_depths:
|
|
73
92
|
# TODO: for nan just skip and leave all nan's
|
|
74
|
-
select_indices = [
|
|
93
|
+
select_indices = [
|
|
94
|
+
i
|
|
95
|
+
for i in range(0, len(max_depths))
|
|
96
|
+
if max_depths[i] == select_max_depth
|
|
97
|
+
]
|
|
75
98
|
|
|
76
99
|
# now create new DataArray with proper dimension and indices
|
|
77
100
|
# data_select = input_xr.Sv.sel(
|
|
78
101
|
# channel=input_xr.channel[channel]
|
|
79
102
|
# ).values[select_indices, :].T # TODO: dont like this transpose
|
|
80
|
-
data_select = input_xr.Sv.sel(channel=input_xr.channel[channel])[
|
|
103
|
+
data_select = input_xr.Sv.sel(channel=input_xr.channel[channel])[
|
|
104
|
+
select_indices, :
|
|
105
|
+
].T.values
|
|
81
106
|
# change from ".values[select_indices, :].T" to "[select_indices, :].values.T"
|
|
82
107
|
|
|
83
108
|
times_select = input_xr.ping_time.values[select_indices]
|
|
84
109
|
depths_select = input_xr.echo_range.sel(
|
|
85
110
|
channel=input_xr.channel[channel]
|
|
86
|
-
).values[
|
|
111
|
+
).values[
|
|
112
|
+
select_indices[0], :
|
|
113
|
+
] # '0' because all others in group should be same
|
|
87
114
|
|
|
88
115
|
da_select = xr.DataArray(
|
|
89
116
|
data=data_select,
|
|
@@ -91,27 +118,32 @@ class ResampleRegrid:
|
|
|
91
118
|
coords={
|
|
92
119
|
"depth": depths_select,
|
|
93
120
|
"time": times_select,
|
|
94
|
-
}
|
|
95
|
-
).dropna(dim=
|
|
96
|
-
resampled = da_select.interp(
|
|
121
|
+
},
|
|
122
|
+
).dropna(dim="depth")
|
|
123
|
+
resampled = da_select.interp(
|
|
124
|
+
depth=all_cruise_depth_values, method="nearest"
|
|
125
|
+
)
|
|
97
126
|
# write to the resample array
|
|
98
127
|
regrid_resample.loc[
|
|
99
|
-
dict(
|
|
128
|
+
dict(
|
|
129
|
+
time=times_select,
|
|
130
|
+
frequency=input_xr.frequency_nominal.values[channel],
|
|
131
|
+
)
|
|
100
132
|
] = resampled
|
|
101
133
|
print(f"updated {len(times_select)} ping times")
|
|
102
134
|
except Exception as err:
|
|
103
|
-
print(f
|
|
135
|
+
print(f"Problem finding the dynamodb table: {err}")
|
|
104
136
|
raise err
|
|
105
137
|
print("Done interpolating data.")
|
|
106
138
|
return regrid_resample
|
|
107
139
|
|
|
108
140
|
#################################################################
|
|
109
141
|
def resample_regrid(
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
142
|
+
self,
|
|
143
|
+
ship_name,
|
|
144
|
+
cruise_name,
|
|
145
|
+
sensor_name,
|
|
146
|
+
table_name,
|
|
115
147
|
) -> None:
|
|
116
148
|
"""
|
|
117
149
|
The goal here is to interpolate the data against the depth values already populated
|
|
@@ -144,7 +176,7 @@ class ResampleRegrid:
|
|
|
144
176
|
#########################################################
|
|
145
177
|
#########################################################
|
|
146
178
|
# TODO: iterate files here
|
|
147
|
-
all_file_names = cruise_df[
|
|
179
|
+
all_file_names = cruise_df["FILE_NAME"]
|
|
148
180
|
for file_name in all_file_names:
|
|
149
181
|
gc.collect()
|
|
150
182
|
file_name_stem = Path(file_name).stem
|
|
@@ -158,7 +190,9 @@ class ResampleRegrid:
|
|
|
158
190
|
# df[df['PIPELINE_STATUS'] < PipelineStatus.LEVEL_1_PROCESSING] = np.nan
|
|
159
191
|
|
|
160
192
|
# Get index from all cruise files. Note: should be based on which are included in cruise.
|
|
161
|
-
index = cruise_df.index[
|
|
193
|
+
index = cruise_df.index[
|
|
194
|
+
cruise_df["FILE_NAME"] == f"{file_name_stem}.raw"
|
|
195
|
+
][0]
|
|
162
196
|
|
|
163
197
|
# get input store
|
|
164
198
|
input_xr_zarr_store = zarr_manager.open_s3_zarr_store_with_xarray(
|
|
@@ -171,29 +205,38 @@ class ResampleRegrid:
|
|
|
171
205
|
# [3] Get needed indices
|
|
172
206
|
# Offset from start index to insert new data. Note that missing values are excluded.
|
|
173
207
|
ping_time_cumsum = np.insert(
|
|
174
|
-
np.cumsum(
|
|
208
|
+
np.cumsum(
|
|
209
|
+
cruise_df["NUM_PING_TIME_DROPNA"].dropna().to_numpy(dtype=int)
|
|
210
|
+
),
|
|
175
211
|
obj=0,
|
|
176
|
-
values=0
|
|
212
|
+
values=0,
|
|
177
213
|
)
|
|
178
214
|
start_ping_time_index = ping_time_cumsum[index]
|
|
179
215
|
end_ping_time_index = ping_time_cumsum[index + 1]
|
|
180
216
|
|
|
181
|
-
min_echo_range = np.nanmin(np.float32(cruise_df[
|
|
182
|
-
max_echo_range = np.nanmax(np.float32(cruise_df[
|
|
217
|
+
min_echo_range = np.nanmin(np.float32(cruise_df["MIN_ECHO_RANGE"]))
|
|
218
|
+
max_echo_range = np.nanmax(np.float32(cruise_df["MAX_ECHO_RANGE"]))
|
|
183
219
|
|
|
184
|
-
print(
|
|
220
|
+
print(
|
|
221
|
+
"Creating empty ndarray for Sv data."
|
|
222
|
+
) # Note: cruise_zarr dimensions are (depth, time, frequency)
|
|
185
223
|
cruise_sv_subset = np.empty(
|
|
186
|
-
shape=output_zarr_store.Sv[
|
|
224
|
+
shape=output_zarr_store.Sv[
|
|
225
|
+
:, start_ping_time_index:end_ping_time_index, :
|
|
226
|
+
].shape
|
|
187
227
|
)
|
|
188
228
|
cruise_sv_subset[:, :, :] = np.nan # (5208, 9778, 4)
|
|
189
229
|
|
|
190
230
|
all_cruise_depth_values = zarr_manager.get_depth_values(
|
|
191
|
-
min_echo_range=min_echo_range,
|
|
192
|
-
max_echo_range=max_echo_range
|
|
231
|
+
min_echo_range=min_echo_range, max_echo_range=max_echo_range
|
|
193
232
|
)
|
|
194
233
|
|
|
195
234
|
print(" ".join(list(input_xr_zarr_store.Sv.dims)))
|
|
196
|
-
if set(input_xr_zarr_store.Sv.dims) != {
|
|
235
|
+
if set(input_xr_zarr_store.Sv.dims) != {
|
|
236
|
+
"channel",
|
|
237
|
+
"ping_time",
|
|
238
|
+
"range_sample",
|
|
239
|
+
}:
|
|
197
240
|
raise Exception("Xarray dimensions are not as expected.")
|
|
198
241
|
|
|
199
242
|
# get geojson
|
|
@@ -209,8 +252,13 @@ class ResampleRegrid:
|
|
|
209
252
|
|
|
210
253
|
ping_times = input_xr.ping_time.values
|
|
211
254
|
# Date format: numpy.datetime64('2007-07-20T02:10:25.845073920') converts to "1184897425.845074"
|
|
212
|
-
epoch_seconds = [
|
|
213
|
-
|
|
255
|
+
epoch_seconds = [
|
|
256
|
+
(pd.Timestamp(i) - pd.Timestamp("1970-01-01")) / pd.Timedelta("1s")
|
|
257
|
+
for i in ping_times
|
|
258
|
+
]
|
|
259
|
+
output_zarr_store.time[start_ping_time_index:end_ping_time_index] = (
|
|
260
|
+
epoch_seconds
|
|
261
|
+
)
|
|
214
262
|
|
|
215
263
|
# --- UPDATING --- #
|
|
216
264
|
|
|
@@ -220,30 +268,33 @@ class ResampleRegrid:
|
|
|
220
268
|
all_cruise_depth_values=all_cruise_depth_values,
|
|
221
269
|
)
|
|
222
270
|
|
|
223
|
-
print(
|
|
271
|
+
print(
|
|
272
|
+
f"start_ping_time_index: {start_ping_time_index}, end_ping_time_index: {end_ping_time_index}"
|
|
273
|
+
)
|
|
224
274
|
|
|
225
275
|
#########################################################################
|
|
226
276
|
# write Sv values to cruise-level-model-store
|
|
227
|
-
for channel in range(
|
|
277
|
+
for channel in range(
|
|
278
|
+
len(input_xr.channel.values)
|
|
279
|
+
): # doesn't like being written in one fell swoop :(
|
|
228
280
|
output_zarr_store.Sv[
|
|
229
|
-
:,
|
|
230
|
-
start_ping_time_index:end_ping_time_index,
|
|
231
|
-
channel
|
|
281
|
+
:, start_ping_time_index:end_ping_time_index, channel
|
|
232
282
|
] = regrid_resample[:, :, channel]
|
|
233
283
|
|
|
234
284
|
#########################################################################
|
|
235
285
|
# [5] write subset of latitude/longitude
|
|
236
|
-
output_zarr_store.latitude[
|
|
237
|
-
|
|
238
|
-
].values
|
|
239
|
-
output_zarr_store.longitude[
|
|
240
|
-
|
|
241
|
-
].values
|
|
286
|
+
output_zarr_store.latitude[
|
|
287
|
+
start_ping_time_index:end_ping_time_index
|
|
288
|
+
] = geospatial.dropna()["latitude"].values
|
|
289
|
+
output_zarr_store.longitude[
|
|
290
|
+
start_ping_time_index:end_ping_time_index
|
|
291
|
+
] = geospatial.dropna()["longitude"].values
|
|
242
292
|
except Exception as err:
|
|
243
|
-
print(f
|
|
293
|
+
print(f"Problem interpolating the data: {err}")
|
|
244
294
|
raise err
|
|
245
295
|
print("Done interpolating data.")
|
|
246
296
|
|
|
247
297
|
#######################################################
|
|
248
298
|
|
|
299
|
+
|
|
249
300
|
###########################################################
|
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
|
-
|
|
2
|
+
|
|
3
3
|
import geopandas
|
|
4
|
+
import numpy as np
|
|
4
5
|
import pandas as pd
|
|
5
6
|
|
|
6
|
-
from water_column_sonar_processing.utility.cleaner import Cleaner
|
|
7
7
|
from water_column_sonar_processing.aws.s3_manager import S3Manager
|
|
8
|
+
from water_column_sonar_processing.utility.cleaner import Cleaner
|
|
8
9
|
|
|
9
10
|
"""
|
|
10
11
|
// [Decimal / Places / Degrees / Object that can be recognized at scale / N/S or E/W at equator, E/W at 23N/S, E/W at 45N/S, E/W at 67N/S]
|
|
@@ -22,28 +23,32 @@ from water_column_sonar_processing.aws.s3_manager import S3Manager
|
|
|
22
23
|
class GeometryManager:
|
|
23
24
|
#######################################################
|
|
24
25
|
def __init__(
|
|
25
|
-
|
|
26
|
+
self,
|
|
26
27
|
):
|
|
27
28
|
self.DECIMAL_PRECISION = 5 # precision for GPS coordinates
|
|
28
29
|
self.SIMPLIFICATION_TOLERANCE = 0.0001 # RDP simplification to street level
|
|
29
30
|
|
|
30
31
|
#######################################################
|
|
31
32
|
def read_echodata_gps_data(
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
33
|
+
self,
|
|
34
|
+
echodata,
|
|
35
|
+
ship_name,
|
|
36
|
+
cruise_name,
|
|
37
|
+
sensor_name,
|
|
38
|
+
file_name,
|
|
39
|
+
write_geojson=True,
|
|
39
40
|
) -> tuple:
|
|
40
41
|
file_name_stem = Path(file_name).stem
|
|
41
42
|
geo_json_name = f"{file_name_stem}.json"
|
|
42
43
|
|
|
43
|
-
print(
|
|
44
|
+
print("Getting GPS data from echopype object.")
|
|
44
45
|
try:
|
|
45
|
-
latitude = np.round(
|
|
46
|
-
|
|
46
|
+
latitude = np.round(
|
|
47
|
+
echodata.platform.latitude.values, self.DECIMAL_PRECISION
|
|
48
|
+
)
|
|
49
|
+
longitude = np.round(
|
|
50
|
+
echodata.platform.longitude.values, self.DECIMAL_PRECISION
|
|
51
|
+
)
|
|
47
52
|
|
|
48
53
|
# RE: time coordinates: https://github.com/OSOceanAcoustics/echopype/issues/656#issue-1219104771
|
|
49
54
|
# 'nmea_times' are times from the nmea datalogger associated with GPS
|
|
@@ -54,10 +59,15 @@ class GeometryManager:
|
|
|
54
59
|
time1 = echodata.environment.time1.values
|
|
55
60
|
|
|
56
61
|
if len(nmea_times) < len(time1):
|
|
57
|
-
raise Exception(
|
|
62
|
+
raise Exception(
|
|
63
|
+
"Problem: Not enough NMEA times available to extrapolate time1."
|
|
64
|
+
)
|
|
58
65
|
|
|
59
66
|
# Align 'sv_times' to 'nmea_times'
|
|
60
|
-
if not (
|
|
67
|
+
if not (
|
|
68
|
+
np.all(time1[:-1] <= time1[1:])
|
|
69
|
+
and np.all(nmea_times[:-1] <= nmea_times[1:])
|
|
70
|
+
):
|
|
61
71
|
raise Exception("Problem: NMEA times are not sorted.")
|
|
62
72
|
|
|
63
73
|
# Finds the indices where 'v' can be inserted just to the right of 'a'
|
|
@@ -67,65 +77,83 @@ class GeometryManager:
|
|
|
67
77
|
lon = longitude[indices]
|
|
68
78
|
lon[indices < 0] = np.nan
|
|
69
79
|
|
|
70
|
-
if not (
|
|
80
|
+
if not (
|
|
81
|
+
np.all(lat[~np.isnan(lat)] >= -90.0)
|
|
82
|
+
and np.all(lat[~np.isnan(lat)] <= 90.0)
|
|
83
|
+
and np.all(lon[~np.isnan(lon)] >= -180.0)
|
|
84
|
+
and np.all(lon[~np.isnan(lon)] <= 180.0)
|
|
85
|
+
):
|
|
71
86
|
raise Exception("Problem: GPS Data falls outside allowed bounds.")
|
|
72
87
|
|
|
73
88
|
# check for visits to null island
|
|
74
89
|
null_island_indices = list(
|
|
75
|
-
set.intersection(
|
|
90
|
+
set.intersection(
|
|
91
|
+
set(np.where(np.abs(lat) < 1e-3)[0]),
|
|
92
|
+
set(np.where(np.abs(lon) < 1e-3)[0]),
|
|
93
|
+
)
|
|
76
94
|
)
|
|
77
95
|
lat[null_island_indices] = np.nan
|
|
78
96
|
lon[null_island_indices] = np.nan
|
|
79
97
|
|
|
80
98
|
# create requirement for minimum linestring size
|
|
81
|
-
MIN_ALLOWED_SIZE =
|
|
82
|
-
|
|
99
|
+
MIN_ALLOWED_SIZE = (
|
|
100
|
+
4 # don't want to process files with less than 4 data points
|
|
101
|
+
)
|
|
102
|
+
if (
|
|
103
|
+
len(lat[~np.isnan(lat)]) < MIN_ALLOWED_SIZE
|
|
104
|
+
or len(lon[~np.isnan(lon)]) < MIN_ALLOWED_SIZE
|
|
105
|
+
):
|
|
83
106
|
raise Exception(
|
|
84
107
|
f"There was not enough data in lat or lon to create geojson, {len(lat[~np.isnan(lat)])} found, less than {MIN_ALLOWED_SIZE}."
|
|
85
108
|
)
|
|
86
109
|
|
|
87
110
|
# https://osoceanacoustics.github.io/echopype-examples/echopype_tour.html
|
|
88
|
-
gps_df =
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
111
|
+
gps_df = (
|
|
112
|
+
pd.DataFrame({"latitude": lat, "longitude": lon, "time": time1})
|
|
113
|
+
.set_index(["time"])
|
|
114
|
+
.fillna(0)
|
|
115
|
+
)
|
|
93
116
|
|
|
94
117
|
# Note: We set np.nan to 0,0 so downstream missing values can be omitted
|
|
95
118
|
gps_gdf = geopandas.GeoDataFrame(
|
|
96
119
|
gps_df,
|
|
97
120
|
geometry=geopandas.points_from_xy(
|
|
98
|
-
gps_df[
|
|
99
|
-
gps_df['latitude']
|
|
121
|
+
gps_df["longitude"], gps_df["latitude"]
|
|
100
122
|
),
|
|
101
|
-
crs="epsg:4326"
|
|
123
|
+
crs="epsg:4326",
|
|
102
124
|
)
|
|
103
125
|
# Note: We set np.nan to 0,0 so downstream missing values can be omitted
|
|
104
126
|
|
|
105
127
|
geo_json_line = gps_gdf.to_json()
|
|
106
128
|
if write_geojson:
|
|
107
|
-
print(
|
|
129
|
+
print("Creating local copy of geojson file.")
|
|
108
130
|
with open(geo_json_name, "w") as write_file:
|
|
109
131
|
write_file.write(geo_json_line)
|
|
110
132
|
|
|
111
|
-
geo_json_prefix =
|
|
133
|
+
geo_json_prefix = (
|
|
134
|
+
f"spatial/geojson/{ship_name}/{cruise_name}/{sensor_name}"
|
|
135
|
+
)
|
|
112
136
|
|
|
113
|
-
print(
|
|
137
|
+
print("Checking s3 and deleting any existing GeoJSON file.")
|
|
114
138
|
s3_manager = S3Manager()
|
|
115
|
-
s3_objects = s3_manager.list_nodd_objects(
|
|
139
|
+
s3_objects = s3_manager.list_nodd_objects(
|
|
140
|
+
prefix=f"{geo_json_prefix}/{geo_json_name}"
|
|
141
|
+
)
|
|
116
142
|
if len(s3_objects) > 0:
|
|
117
|
-
print(
|
|
143
|
+
print(
|
|
144
|
+
"GeoJSON already exists in s3, deleting existing and continuing."
|
|
145
|
+
)
|
|
118
146
|
s3_manager.delete_nodd_objects(objects=s3_objects)
|
|
119
147
|
|
|
120
|
-
print(
|
|
148
|
+
print("Upload GeoJSON to s3.")
|
|
121
149
|
s3_manager.upload_nodd_file(
|
|
122
150
|
file_name=geo_json_name, # file_name
|
|
123
|
-
key=f"{geo_json_prefix}/{geo_json_name}" # key
|
|
151
|
+
key=f"{geo_json_prefix}/{geo_json_name}", # key
|
|
124
152
|
)
|
|
125
153
|
|
|
126
154
|
# TODO: delete geo_json file
|
|
127
155
|
cleaner = Cleaner()
|
|
128
|
-
cleaner.delete_local_files(file_types=[
|
|
156
|
+
cleaner.delete_local_files(file_types=["*.json"])
|
|
129
157
|
|
|
130
158
|
#################################################################
|
|
131
159
|
# TODO: simplify with shapely
|
|
@@ -144,7 +172,9 @@ class GeometryManager:
|
|
|
144
172
|
#################################################################
|
|
145
173
|
# GeoJSON FeatureCollection with IDs as "time"
|
|
146
174
|
except Exception as err:
|
|
147
|
-
print(
|
|
175
|
+
print(
|
|
176
|
+
f"Exception encountered extracting gps coordinates creating geojson: {err}"
|
|
177
|
+
)
|
|
148
178
|
raise
|
|
149
179
|
# Note: returned lat/lon values can include np.nan because they need to be aligned with
|
|
150
180
|
# the Sv data! GeoJSON needs simplification but has been filtered.
|
|
@@ -154,12 +184,12 @@ class GeometryManager:
|
|
|
154
184
|
|
|
155
185
|
#######################################################
|
|
156
186
|
def read_s3_geo_json(
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
187
|
+
self,
|
|
188
|
+
ship_name,
|
|
189
|
+
cruise_name,
|
|
190
|
+
sensor_name,
|
|
191
|
+
file_name_stem,
|
|
192
|
+
input_xr_zarr_store,
|
|
163
193
|
):
|
|
164
194
|
try:
|
|
165
195
|
s3_manager = S3Manager()
|
|
@@ -170,25 +200,26 @@ class GeometryManager:
|
|
|
170
200
|
file_name_stem=file_name_stem,
|
|
171
201
|
)
|
|
172
202
|
###
|
|
173
|
-
geospatial = geopandas.GeoDataFrame.from_features(
|
|
174
|
-
|
|
175
|
-
)
|
|
203
|
+
geospatial = geopandas.GeoDataFrame.from_features(
|
|
204
|
+
geo_json["features"]
|
|
205
|
+
).set_index(pd.json_normalize(geo_json["features"])["id"].values)
|
|
176
206
|
null_island_indices = list(
|
|
177
207
|
set.intersection(
|
|
178
208
|
set(np.where(np.abs(geospatial.latitude.values) < 1e-3)[0]),
|
|
179
|
-
set(np.where(np.abs(geospatial.longitude.values) < 1e-3)[0])
|
|
209
|
+
set(np.where(np.abs(geospatial.longitude.values) < 1e-3)[0]),
|
|
180
210
|
)
|
|
181
211
|
)
|
|
182
212
|
geospatial.iloc[null_island_indices] = np.nan
|
|
183
213
|
###
|
|
184
|
-
geospatial_index = geospatial.dropna().index.values.astype(
|
|
214
|
+
geospatial_index = geospatial.dropna().index.values.astype("datetime64[ns]")
|
|
185
215
|
aa = input_xr_zarr_store.ping_time.values.tolist()
|
|
186
216
|
vv = geospatial_index.tolist()
|
|
187
217
|
indices = np.searchsorted(a=aa, v=vv)
|
|
188
218
|
|
|
189
219
|
return indices, geospatial
|
|
190
220
|
except Exception as err: # Failure
|
|
191
|
-
print(f
|
|
221
|
+
print(f"Exception encountered reading s3 GeoJSON: {err}")
|
|
192
222
|
raise
|
|
193
223
|
|
|
224
|
+
|
|
194
225
|
###########################################################
|
|
@@ -31,51 +31,52 @@
|
|
|
31
31
|
|
|
32
32
|
|
|
33
33
|
"""
|
|
34
|
-
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class GeometrySimplification:
|
|
35
37
|
# TODO: in the future move to standalone library
|
|
36
38
|
#######################################################
|
|
37
39
|
def __init__(
|
|
38
|
-
|
|
40
|
+
self,
|
|
39
41
|
):
|
|
40
42
|
pass
|
|
41
43
|
|
|
42
44
|
#######################################################
|
|
43
45
|
def speed_check(
|
|
44
|
-
|
|
45
|
-
|
|
46
|
+
self,
|
|
47
|
+
speed_knots=50,
|
|
46
48
|
) -> None:
|
|
47
49
|
print(speed_knots)
|
|
48
50
|
pass
|
|
49
51
|
|
|
50
52
|
def remove_null_island_values(
|
|
51
|
-
|
|
52
|
-
|
|
53
|
+
self,
|
|
54
|
+
epsilon=1e-5,
|
|
53
55
|
) -> None:
|
|
54
56
|
print(epsilon)
|
|
55
57
|
pass
|
|
56
58
|
|
|
57
59
|
def stream_geometry(
|
|
58
|
-
|
|
60
|
+
self,
|
|
59
61
|
) -> None:
|
|
60
62
|
pass
|
|
61
63
|
|
|
62
64
|
def break_linestring_into_multi_linestring(
|
|
63
|
-
|
|
65
|
+
self,
|
|
64
66
|
) -> None:
|
|
65
67
|
# For any line-strings across the antimeridian, break into multilinestring
|
|
66
68
|
pass
|
|
67
69
|
|
|
68
70
|
def simplify(
|
|
69
|
-
|
|
71
|
+
self,
|
|
70
72
|
) -> None:
|
|
71
73
|
pass
|
|
72
74
|
|
|
73
|
-
def kalman_filter(
|
|
74
|
-
self
|
|
75
|
-
):
|
|
75
|
+
def kalman_filter(self):
|
|
76
76
|
# for cruises with bad signal, filter so that
|
|
77
77
|
pass
|
|
78
78
|
|
|
79
79
|
#######################################################
|
|
80
80
|
|
|
81
|
+
|
|
81
82
|
###########################################################
|