water-column-sonar-processing 0.0.1__py3-none-any.whl → 26.1.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of water-column-sonar-processing might be problematic. Click here for more details.

Files changed (60) hide show
  1. water_column_sonar_processing/__init__.py +13 -0
  2. water_column_sonar_processing/aws/__init__.py +7 -0
  3. water_column_sonar_processing/aws/dynamodb_manager.py +355 -0
  4. water_column_sonar_processing/aws/s3_manager.py +418 -0
  5. water_column_sonar_processing/aws/s3fs_manager.py +64 -0
  6. {model → water_column_sonar_processing}/aws/sns_manager.py +10 -21
  7. {model → water_column_sonar_processing}/aws/sqs_manager.py +11 -19
  8. water_column_sonar_processing/cruise/__init__.py +4 -0
  9. water_column_sonar_processing/cruise/create_empty_zarr_store.py +129 -0
  10. water_column_sonar_processing/cruise/datatree_manager.py +21 -0
  11. water_column_sonar_processing/cruise/resample_regrid.py +323 -0
  12. water_column_sonar_processing/geometry/__init__.py +13 -0
  13. water_column_sonar_processing/geometry/elevation_manager.py +111 -0
  14. water_column_sonar_processing/geometry/geometry_manager.py +241 -0
  15. water_column_sonar_processing/geometry/line_simplification.py +176 -0
  16. water_column_sonar_processing/geometry/pmtile_generation.py +266 -0
  17. water_column_sonar_processing/geometry/spatiotemporal.py +106 -0
  18. water_column_sonar_processing/index/__init__.py +3 -0
  19. water_column_sonar_processing/index/index_manager.py +381 -0
  20. water_column_sonar_processing/model/__init__.py +3 -0
  21. water_column_sonar_processing/model/zarr_manager.py +741 -0
  22. water_column_sonar_processing/processing/__init__.py +4 -0
  23. water_column_sonar_processing/processing/raw_to_netcdf.py +320 -0
  24. water_column_sonar_processing/processing/raw_to_zarr.py +331 -0
  25. water_column_sonar_processing/utility/__init__.py +13 -0
  26. {model → water_column_sonar_processing}/utility/cleaner.py +7 -7
  27. water_column_sonar_processing/utility/constants.py +118 -0
  28. {model → water_column_sonar_processing}/utility/pipeline_status.py +47 -24
  29. water_column_sonar_processing/utility/timestamp.py +12 -0
  30. water_column_sonar_processing-26.1.14.dist-info/METADATA +240 -0
  31. water_column_sonar_processing-26.1.14.dist-info/RECORD +34 -0
  32. {water_column_sonar_processing-0.0.1.dist-info → water_column_sonar_processing-26.1.14.dist-info}/WHEEL +1 -1
  33. {water_column_sonar_processing-0.0.1.dist-info → water_column_sonar_processing-26.1.14.dist-info/licenses}/LICENSE +1 -1
  34. water_column_sonar_processing-26.1.14.dist-info/top_level.txt +1 -0
  35. __init__.py +0 -0
  36. model/__init__.py +0 -0
  37. model/aws/__init__.py +0 -0
  38. model/aws/dynamodb_manager.py +0 -149
  39. model/aws/s3_manager.py +0 -356
  40. model/aws/s3fs_manager.py +0 -74
  41. model/cruise/__init__.py +0 -0
  42. model/cruise/create_empty_zarr_store.py +0 -166
  43. model/cruise/resample_regrid.py +0 -248
  44. model/geospatial/__init__.py +0 -0
  45. model/geospatial/geometry_manager.py +0 -194
  46. model/geospatial/geometry_simplification.py +0 -81
  47. model/geospatial/pmtile_generation.py +0 -74
  48. model/index/__init__.py +0 -0
  49. model/index/index.py +0 -228
  50. model/model.py +0 -138
  51. model/utility/__init__.py +0 -0
  52. model/utility/constants.py +0 -56
  53. model/utility/timestamp.py +0 -12
  54. model/zarr/__init__.py +0 -0
  55. model/zarr/bar.py +0 -28
  56. model/zarr/foo.py +0 -11
  57. model/zarr/zarr_manager.py +0 -298
  58. water_column_sonar_processing-0.0.1.dist-info/METADATA +0 -89
  59. water_column_sonar_processing-0.0.1.dist-info/RECORD +0 -32
  60. water_column_sonar_processing-0.0.1.dist-info/top_level.txt +0 -2
@@ -0,0 +1,241 @@
1
+ from pathlib import Path
2
+
3
+ import geopandas
4
+ import numpy as np
5
+ import pandas as pd
6
+
7
+ from water_column_sonar_processing.aws import S3Manager
8
+ from water_column_sonar_processing.utility import Cleaner
9
+
10
+
11
+ # // [Decimal / Places / Degrees / Object that can be recognized at scale / N/S or E/W at equator, E/W at 23N/S, E/W at 45N/S, E/W at 67N/S]
12
+ # // 0 1.0 1° 00′ 0″ country or large region 111.32 km 102.47 km 78.71 km 43.496 km
13
+ # // 1 0.1 0° 06′ 0″ large city or district 11.132 km 10.247 km 7.871 km 4.3496 km
14
+ # // 2 0.01 0° 00′ 36″ town or village 1.1132 km 1.0247 km 787.1 m 434.96 m
15
+ # // 3 0.001 0° 00′ 3.6″ neighborhood, street 111.32 m 102.47 m 78.71 m 43.496 m
16
+ # // 4 0.0001 0° 00′ 0.36″ individual street, land parcel 11.132 m 10.247 m 7.871 m 4.3496 m
17
+ # // 5 0.00001 0° 00′ 0.036″ individual trees, door entrance 1.1132 m 1.0247 m 787.1 mm 434.96 mm
18
+ # // 6 0.000001 0° 00′ 0.0036″ individual humans 111.32 mm 102.47 mm 78.71 mm 43.496 mm
19
+ # // 7 0.0000001 0° 00′ 0.00036″ practical limit of commercial surveying 11.132 mm 10.247 mm 7.871 mm 4.3496 mm
20
+
21
+
22
+ class GeometryManager:
23
+ #######################################################
24
+ def __init__(
25
+ self,
26
+ ):
27
+ self.DECIMAL_PRECISION = 6 # precision for GPS coordinates
28
+ self.SIMPLIFICATION_TOLERANCE = 0.0001 # RDP simplification to "street level"
29
+
30
+ #######################################################
31
+ def read_echodata_gps_data(
32
+ self,
33
+ echodata,
34
+ output_bucket_name,
35
+ ship_name,
36
+ cruise_name,
37
+ sensor_name,
38
+ file_name,
39
+ endpoint_url=None,
40
+ write_geojson=True,
41
+ ) -> tuple:
42
+ file_name_stem = Path(file_name).stem
43
+ geo_json_name = f"{file_name_stem}.json"
44
+
45
+ print("Getting GPS dataset from echopype object.")
46
+ try:
47
+ latitude = (
48
+ echodata.platform.latitude.values
49
+ ) # TODO: DONT get values from here!
50
+ longitude = echodata.platform.longitude.values
51
+
52
+ # RE: time coordinates: https://github.com/OSOceanAcoustics/echopype/issues/656#issue-1219104771
53
+ # 'nmea_times' are times from the nmea datalogger associated with GPS
54
+ # note that nmea_times, unlike time1, can be sorted
55
+ nmea_times = np.sort(echodata.platform.time1.values)
56
+
57
+ # 'time1' are times from the echosounder associated with the dataset of the transducer measurement
58
+ time1 = echodata.environment.time1.values
59
+
60
+ if len(nmea_times) < len(time1):
61
+ raise Exception(
62
+ "Problem: Not enough NMEA times available to extrapolate time1."
63
+ ) # TODO: explore this logic further...
64
+
65
+ # Align 'sv_times' to 'nmea_times'
66
+ if not (
67
+ np.all(time1[:-1] <= time1[1:])
68
+ and np.all(nmea_times[:-1] <= nmea_times[1:])
69
+ ):
70
+ raise Exception("Problem: NMEA times are not sorted.")
71
+
72
+ # Finds the indices where 'v' can be inserted just to the right of 'a'
73
+ indices = np.searchsorted(a=nmea_times, v=time1, side="right") - 1
74
+ lat = latitude[indices]
75
+ lat[indices < 0] = np.nan # values recorded before indexing are set to nan
76
+ lon = longitude[indices]
77
+ lon[indices < 0] = np.nan
78
+
79
+ if not (
80
+ np.all(lat[~np.isnan(lat)] >= -90.0)
81
+ and np.all(lat[~np.isnan(lat)] <= 90.0)
82
+ and np.all(lon[~np.isnan(lon)] >= -180.0)
83
+ and np.all(lon[~np.isnan(lon)] <= 180.0)
84
+ ):
85
+ raise Exception("Problem: GPS Data falls outside allowed bounds.")
86
+
87
+ # check for visits to null island
88
+ null_island_indices = list(
89
+ set.intersection(
90
+ set(np.where(np.abs(lat) < 1e-3)[0]),
91
+ set(np.where(np.abs(lon) < 1e-3)[0]),
92
+ )
93
+ )
94
+ lat[null_island_indices] = np.nan
95
+ lon[null_island_indices] = np.nan
96
+
97
+ # create requirement for minimum linestring size
98
+ MIN_ALLOWED_SIZE = (
99
+ 4 # don't want to process files with less than 4 dataset points
100
+ )
101
+ if (
102
+ len(lat[~np.isnan(lat)]) < MIN_ALLOWED_SIZE
103
+ or len(lon[~np.isnan(lon)]) < MIN_ALLOWED_SIZE
104
+ ):
105
+ raise Exception(
106
+ f"There was not enough dataset in lat or lon to create geojson, {len(lat[~np.isnan(lat)])} found, less than {MIN_ALLOWED_SIZE}."
107
+ )
108
+
109
+ # https://osoceanacoustics.github.io/echopype-examples/echopype_tour.html
110
+ gps_df = (
111
+ pd.DataFrame({"latitude": lat, "longitude": lon, "time": time1})
112
+ .set_index(["time"])
113
+ .fillna(0)
114
+ )
115
+
116
+ # Note: We set np.nan to 0,0 so downstream missing values can be omitted
117
+ gps_gdf = geopandas.GeoDataFrame(
118
+ gps_df,
119
+ geometry=geopandas.points_from_xy(
120
+ gps_df["longitude"], gps_df["latitude"]
121
+ ),
122
+ crs="epsg:4326",
123
+ )
124
+ # Note: We set np.nan to 0,0 so downstream missing values can be omitted
125
+ # TODO: so what ends up here is dataset with corruption at null island!!!
126
+ geo_json_line = gps_gdf.to_json()
127
+ if write_geojson:
128
+ print("Creating local copy of geojson file.")
129
+ with open(geo_json_name, "w") as write_file:
130
+ write_file.write(
131
+ geo_json_line
132
+ ) # NOTE: this file can include zeros for lat lon
133
+
134
+ geo_json_prefix = (
135
+ f"spatial/geojson/{ship_name}/{cruise_name}/{sensor_name}"
136
+ )
137
+
138
+ print("Checking s3 and deleting any existing GeoJSON file.")
139
+ s3_manager = S3Manager(endpoint_url=endpoint_url)
140
+ geojson_object_exists = s3_manager.check_if_object_exists(
141
+ bucket_name=output_bucket_name,
142
+ key_name=f"{geo_json_prefix}/{geo_json_name}",
143
+ )
144
+ if geojson_object_exists:
145
+ print(
146
+ "GeoJSON already exists in s3, deleting existing and continuing."
147
+ )
148
+ s3_manager.delete_nodd_object(
149
+ bucket_name=output_bucket_name,
150
+ key_name=f"{geo_json_prefix}/{geo_json_name}",
151
+ )
152
+
153
+ print("Upload GeoJSON to s3.")
154
+ s3_manager.upload_nodd_file(
155
+ file_name=geo_json_name, # file_name
156
+ key=f"{geo_json_prefix}/{geo_json_name}", # key
157
+ output_bucket_name=output_bucket_name,
158
+ )
159
+
160
+ # TODO: delete geo_json file
161
+ cleaner = Cleaner()
162
+ cleaner.delete_local_files(file_types=["*.json"])
163
+
164
+ #################################################################
165
+ # TODO: simplify with shapely
166
+ # linestring = shapely.geometry.LineString(
167
+ # [xy for xy in zip(gps_gdf.longitude, gps_gdf.latitude)]
168
+ # )
169
+ # len(linestring.coords)
170
+ # line_simplified = linestring.simplify(
171
+ # tolerance=self.SIMPLIFICATION_TOLERANCE,
172
+ # preserve_topology=True
173
+ # )
174
+ # print(f"Total number of points for original linestring: {len(linestring.coords)}")
175
+ # print(f"Total number of points needed for the simplified linestring: {len(line_simplified.coords)}")
176
+ # print(line_simplified)
177
+ # geo_json_line_simplified = shapely.to_geojson(line_simplified)
178
+ #################################################################
179
+ # GeoJSON FeatureCollection with IDs as "time"
180
+ except Exception as err:
181
+ raise RuntimeError(
182
+ f"Exception encountered extracting gps coordinates creating geojson, {err}"
183
+ )
184
+
185
+ # Note: returned lat/lon values can include np.nan because they need to be aligned with
186
+ # the Sv dataset! GeoJSON needs simplification but has been filtered.
187
+ # return gps_df.index.values, gps_df.latitude.values, gps_df.longitude.values
188
+ return gps_df.index.values, lat, lon
189
+ # TODO: if geojson is already returned with 0,0, the return here
190
+ # can include np.nan values?
191
+
192
+ #######################################################
193
+ @staticmethod
194
+ def read_s3_geo_json(
195
+ ship_name,
196
+ cruise_name,
197
+ sensor_name,
198
+ file_name_stem,
199
+ input_xr_zarr_store,
200
+ endpoint_url,
201
+ output_bucket_name,
202
+ ):
203
+ try:
204
+ s3_manager = S3Manager(endpoint_url=endpoint_url)
205
+ geo_json = s3_manager.read_s3_json(
206
+ ship_name=ship_name,
207
+ cruise_name=cruise_name,
208
+ sensor_name=sensor_name,
209
+ file_name_stem=file_name_stem,
210
+ output_bucket_name=output_bucket_name,
211
+ )
212
+ ###
213
+ geospatial = geopandas.GeoDataFrame.from_features(
214
+ geo_json["features"]
215
+ ).set_index(pd.json_normalize(geo_json["features"])["id"].values)
216
+ null_island_indices = list(
217
+ set.intersection(
218
+ set(np.where(np.abs(geospatial.latitude.values) < 1e-3)[0]),
219
+ set(np.where(np.abs(geospatial.longitude.values) < 1e-3)[0]),
220
+ )
221
+ )
222
+ geospatial.iloc[null_island_indices] = np.nan
223
+ ###
224
+ geospatial_index = geospatial.dropna().index.values.astype("datetime64[ns]")
225
+ aa = input_xr_zarr_store.ping_time.values.tolist()
226
+ vv = geospatial_index.tolist()
227
+ indices = np.searchsorted(a=aa, v=vv)
228
+
229
+ return indices, geospatial
230
+ except Exception as err:
231
+ raise RuntimeError(f"Exception encountered reading s3 GeoJSON, {err}")
232
+
233
+ ############################################################################
234
+ # COMES from the raw-to-zarr conversion
235
+ # def __write_geojson_to_file(self, store_name, data) -> None:
236
+ # print("Writing GeoJSON to file.")
237
+ # with open(os.path.join(store_name, "geo.json"), "w") as outfile:
238
+ # outfile.write(data)
239
+
240
+
241
+ ###########################################################
@@ -0,0 +1,176 @@
1
+ # import json
2
+ import geopandas as gpd
3
+ import numpy as np
4
+ from pykalman import KalmanFilter
5
+ from shapely.geometry import Point
6
+
7
+ # import hvplot.pandas
8
+ # from holoviews import opts
9
+ # hv.extension('bokeh')
10
+
11
+ # import matplotlib.pyplot as plt
12
+
13
+
14
+ # lambda for timestamp in form "yyyy-MM-ddTHH:mm:ssZ"
15
+ # dt = lambda: datetime.now().isoformat(timespec="seconds") + "Z"
16
+
17
+ # TODO: get line for example HB1906 ...save linestring to array for testing
18
+
19
+ MAX_SPEED_KNOTS = 50
20
+
21
+
22
+ # Lambert's formula ==> better accuracy than haversinte
23
+ # Lambert's formula (the formula used by the calculators above) is the method used to calculate the shortest distance along the surface of an ellipsoid. When used to approximate the Earth and calculate the distance on the Earth surface, it has an accuracy on the order of 10 meters over thousands of kilometers, which is more precise than the haversine formula.
24
+
25
+
26
+ def mph_to_knots(mph_value):
27
+ """TODO:"""
28
+ # 1 mile per hour === 0.868976 Knots
29
+ return mph_value * 0.868976
30
+
31
+
32
+ def mps_to_knots(mps_value):
33
+ return mps_value * 1.94384
34
+
35
+
36
+ ###############################################################################
37
+ # Colab Notebook:
38
+ # https://colab.research.google.com/drive/1Ihb1x0EeYRNwGJ4Bqi4RqQQHu9-40oDk?usp=sharing#scrollTo=hIPziqVO48Xg
39
+ ###############################################################################
40
+
41
+
42
+ # https://shapely.readthedocs.io/en/stable/reference/shapely.MultiLineString.html#shapely.MultiLineString
43
+ class LineSimplification:
44
+ """
45
+ // [Decimal / Places / Degrees / Object that can be recognized at scale / N/S or E/W at equator, E/W at 23N/S, E/W at 45N/S, E/W at 67N/S]
46
+ // 0 1.0 1° 00′ 0″ country or large region 111.32 km 102.47 km 78.71 km 43.496 km
47
+ // 1 0.1 0° 06′ 0″ large city or district 11.132 km 10.247 km 7.871 km 4.3496 km
48
+ // 2 0.01 0° 00′ 36″ town or village 1.1132 km 1.0247 km 787.1 m 434.96 m
49
+ // 3 0.001 0° 00′ 3.6″ neighborhood, street 111.32 m 102.47 m 78.71 m 43.496 m
50
+ // 4 0.0001 0° 00′ 0.36″ individual street, land parcel 11.132 m 10.247 m 7.871 m 4.3496 m
51
+ // 5 0.00001 0° 00′ 0.036″ individual trees, door entrance 1.1132 m 1.0247 m 787.1 mm 434.96 mm
52
+ // 6 0.000001 0° 00′ 0.0036″ individual humans 111.32 mm 102.47 mm 78.71 mm 43.496 mm
53
+ // 7 0.0000001 0° 00′ 0.00036″ practical limit of commercial surveying 11.132 mm 10.247 mm 7.871 mm 4.3496 mm
54
+ private static final int SRID = 8307;
55
+ private static final double simplificationTolerance = 0.0001;
56
+ private static final long splitGeometryMs = 900000L;
57
+ private static final int batchSize = 10000;
58
+ private static final int geoJsonPrecision = 5;
59
+ final int geoJsonPrecision = 5;
60
+ final double simplificationTolerance = 0.0001;
61
+ final int simplifierBatchSize = 3000;
62
+ final long maxCount = 0;
63
+ private static final double maxAllowedSpeedKnts = 60D;
64
+ """
65
+
66
+ # TODO: in the future move to standalone library
67
+ #######################################################
68
+ def __init__(
69
+ self,
70
+ ):
71
+ pass
72
+
73
+ #######################################################
74
+ @staticmethod
75
+ def kalman_filter(
76
+ longitudes,
77
+ latitudes,
78
+ ):
79
+ """
80
+ # TODO: need to use masked array to get the right number of values
81
+ """
82
+ ### https://github.com/pykalman/pykalman
83
+ # https://stackoverflow.com/questions/43377626/how-to-use-kalman-filter-in-python-for-location-data
84
+ measurements = np.asarray([list(elem) for elem in zip(longitudes, latitudes)])
85
+ initial_state_mean = [measurements[0, 0], 0, measurements[0, 1], 0]
86
+ transition_matrix = [[1, 1, 0, 0], [0, 1, 0, 0], [0, 0, 1, 1], [0, 0, 0, 1]]
87
+ observation_matrix = [[1, 0, 0, 0], [0, 0, 1, 0]]
88
+
89
+ kf = KalmanFilter(
90
+ transition_matrices=transition_matrix,
91
+ observation_matrices=observation_matrix,
92
+ initial_state_mean=initial_state_mean,
93
+ )
94
+ kf = kf.em(measurements, n_iter=2) # TODO: 5
95
+ (smoothed_state_means, smoothed_state_covariances) = kf.smooth(measurements)
96
+
97
+ # plt.plot(longitudes, latitudes, label="original")
98
+ # plt.plot(smoothed_state_means[:, 0], smoothed_state_means[:, 2], label="smoothed")
99
+ # plt.legend()
100
+ # plt.show()
101
+
102
+ return smoothed_state_means[:, [0, 2]]
103
+
104
+ #######################################################
105
+ @staticmethod
106
+ def get_speeds(
107
+ times: np.ndarray, # don't really need time, do need to segment the dataset first
108
+ latitudes: np.ndarray,
109
+ longitudes: np.ndarray,
110
+ ) -> np.ndarray:
111
+ print(MAX_SPEED_KNOTS) # TODO: too high
112
+ print(times[0], latitudes[0], longitudes[0])
113
+ # TODO: distance/time ==> need to take position2 - position1 to get speed
114
+
115
+ # get distance difference
116
+ geom = [Point(xy) for xy in zip(longitudes, latitudes)]
117
+ points_df = gpd.GeoDataFrame({"geometry": geom}, crs="EPSG:4326")
118
+ # Conversion to UTM, a rectilinear projection coordinate system where distance can be calculated with pythagorean theorem
119
+ # an alternative could be to use EPSG 32663
120
+ points_df.to_crs(
121
+ epsg=3310, inplace=True
122
+ ) # https://gis.stackexchange.com/questions/293310/finding-distance-between-two-points-with-geoseries-distance
123
+ distance_diffs = points_df.distance(points_df.shift())
124
+ # distance_diffs_sorted = distance_diffs.sort_values(
125
+ # ascending=False
126
+ # ) # TODO: get avg cutoff time
127
+ #
128
+ time_diffs_ns = np.append(0, (times[1:] - times[:-1]).astype(int))
129
+ # time_diffs_ns_sorted = np.sort(time_diffs_ns)
130
+ # largest time diffs HB0707 [ 17. 17.93749786 21.0781271 54.82812723 85.09374797, 113.56249805 204.87500006 216. 440.68749798 544.81249818]
131
+ # largest diffs HB1906 [3.01015808e+00 3.01016013e+00 3.01017805e+00 3.01018701e+00, 3.01018701e+00 3.01018906e+00 3.01019802e+00 3.01021005e+00, 3.01021005e+00 3.01021414e+00 3.01022208e+00 3.01022899e+00, 3.01024998e+00 3.01025920e+00 3.01026202e+00 3.01028096e+00, 3.01119411e+00 3.01120896e+00 3.01120998e+00 3.01120998e+00, 3.01122099e+00 3.01122790e+00 3.01122790e+00 3.01124506e+00, 3.01125197e+00 3.01128090e+00 3.01142707e+00 3.01219814e+00, 3.01221120e+00 3.01223014e+00 3.01225498e+00 3.01225882e+00, 3.01226010e+00 3.01312998e+00 3.01316096e+00 3.01321190e+00, 3.01321293e+00 3.01322880e+00 3.01322906e+00 3.01323110e+00, 3.01323213e+00 3.01323290e+00 3.01326208e+00 3.01328512e+00, 3.01418112e+00 3.01420109e+00 3.01421107e+00 3.01421184e+00, 3.01421414e+00 3.01424819e+00 3.01512883e+00 3.01516006e+00, 3.01524198e+00 3.01619917e+00 3.01623194e+00 3.01623296e+00, 3.01917594e+00 3.01921408e+00 3.01921587e+00 3.02022195e+00, 3.02025216e+00 3.02121702e+00 3.02325811e+00 3.02410291e+00, 3.02421914e+00 3.02426701e+00 3.02523776e+00 3.02718694e+00, 3.02927590e+00 3.03621606e+00 3.03826304e+00 3.34047514e+00, 3.36345114e+00 3.39148595e+00 4.36819302e+00 4.50157901e+00, 4.50315699e+00 4.50330598e+00 4.50333491e+00 4.50428416e+00, 4.50430490e+00 4.50430694e+00 4.50526387e+00 4.50530790e+00, 4.50530995e+00 4.50532301e+00 4.50533478e+00 4.50629402e+00, 4.50730701e+00 4.50825882e+00 4.50939008e+00 6.50179098e+00, 2.25025029e+01 1.39939425e+02 1.54452331e+02 1.60632653e+03, 1.74574667e+05 4.33569587e+05 4.35150475e+05 8.00044883e+05]
132
+ nanoseconds_per_second = 1e9
133
+ speed_meters_per_second = (
134
+ distance_diffs / time_diffs_ns * nanoseconds_per_second
135
+ )
136
+ # returns the speed in meters per second #TODO: get speed in knots
137
+ return speed_meters_per_second.to_numpy(dtype="float32") # includes nan
138
+
139
+ # def remove_null_island_values(
140
+ # self,
141
+ # epsilon=1e-5,
142
+ # ) -> None:
143
+ # # TODO: low priority
144
+ # print(epsilon)
145
+ # pass
146
+
147
+ def break_linestring_into_multi_linestring(
148
+ self,
149
+ ) -> None:
150
+ # TODO: medium priority
151
+ # For any line-strings across the antimeridian, break into multilinestring
152
+ # average cadence is measurements every 1 second
153
+ # break when over 1 minute
154
+ pass
155
+
156
+ def simplify(
157
+ self,
158
+ ) -> None:
159
+ # TODO: medium-high priority
160
+ pass
161
+
162
+ #######################################################
163
+
164
+
165
+ # [(-72.2001724243164, 40.51750183105469), # latBB
166
+ # (-72.20023345947266, 40.51749038696289),
167
+ # (-72.20033264160156, 40.51750183105469), # lonAA, latBB
168
+ # (-72.20030212402344, 40.517391204833984),
169
+ # (-72.20033264160156, 40.517330169677734), # lonAA, latCC
170
+ # (-72.2003402709961, 40.51729965209961),
171
+ # (-72.20033264160156, 40.517330169677734), # lonAA, latCC
172
+ # (-72.20040130615234, 40.5172004699707),
173
+ # (-72.20050048828125, 40.51716995239258),
174
+ # (-72.2004623413086, 40.51710891723633)]
175
+
176
+ ###########################################################
@@ -0,0 +1,266 @@
1
+ import fiona
2
+ import geopandas as gpd
3
+ import numpy as np
4
+ import pandas as pd
5
+ import xarray as xr
6
+ from shapely.geometry import LineString
7
+
8
+ MAX_POOL_CONNECTIONS = 64
9
+ MAX_CONCURRENCY = 64
10
+ MAX_WORKERS = 64
11
+ GB = 1024**3
12
+
13
+ bucket_name = "noaa-wcsd-zarr-pds"
14
+ ship_name = "Henry_B._Bigelow"
15
+ sensor_name = "EK60"
16
+
17
+ # TODO: get pmtiles of all the evr points
18
+
19
+
20
+ class PMTileGeneration(object):
21
+ """
22
+ - iterate through the zarr stores for all cruises
23
+ - generate geojson in geopandas df, simplify linestrings
24
+ - consolidate into singular df, one cruise per row
25
+ - export as geojson
26
+ - using tippecanoe, geojson --> pmtiles w linux command
27
+ - upload to s3
28
+ """
29
+
30
+ #######################################################
31
+ def __init__(
32
+ self,
33
+ ):
34
+ self.bucket_name = "noaa-wcsd-zarr-pds"
35
+ self.ship_name = "Henry_B._Bigelow"
36
+ self.sensor_name = "EK60"
37
+
38
+ #######################################################
39
+ @staticmethod
40
+ def check_all_cruises(bucket_name, cruises):
41
+ completed = []
42
+ for cruise_name in cruises:
43
+ print(cruise_name)
44
+ try:
45
+ zarr_store = f"{cruise_name}.zarr"
46
+ s3_zarr_store_path = f"{bucket_name}/level_2/{ship_name}/{cruise_name}/{sensor_name}/{zarr_store}"
47
+ kwargs = {"consolidated": False}
48
+ cruise = xr.open_dataset(
49
+ filename_or_obj=f"s3://{s3_zarr_store_path}",
50
+ engine="zarr",
51
+ storage_options={"anon": True},
52
+ **kwargs,
53
+ )
54
+ width = cruise.Sv.shape[1]
55
+ height = cruise.Sv.shape[0]
56
+ depth = cruise.Sv.shape[2]
57
+ print(
58
+ f"height: {height}, width: {width}, depth: {depth} = {width * height * depth}"
59
+ )
60
+ lats = cruise.latitude.to_numpy()
61
+ percent_done = np.count_nonzero(~np.isnan(lats)) / width
62
+ if percent_done != 1.0:
63
+ print(
64
+ f"percent done: {np.round(percent_done, 2)}, {np.count_nonzero(~np.isnan(cruise.latitude.values))}, {width}"
65
+ )
66
+ else:
67
+ completed.append(cruise_name)
68
+ except Exception as err:
69
+ raise RuntimeError(f"Problem parsing Zarr stores, {err}")
70
+ return completed
71
+
72
+ #######################################################
73
+ @staticmethod
74
+ def get_cruise_geometry(cruise_name, index):
75
+ print(cruise_name)
76
+ try:
77
+ pieces = []
78
+ zarr_store = f"{cruise_name}.zarr"
79
+ s3_zarr_store_path = f"{bucket_name}/level_2/{ship_name}/{cruise_name}/{sensor_name}/{zarr_store}"
80
+ cruise = xr.open_dataset(
81
+ filename_or_obj=f"s3://{s3_zarr_store_path}",
82
+ engine="zarr",
83
+ storage_options={"anon": True},
84
+ chunks={},
85
+ cache=True,
86
+ )
87
+ latitude_array = cruise.latitude.to_numpy()
88
+ longitude_array = cruise.longitude.to_numpy()
89
+ if np.isnan(latitude_array).any() or np.isnan(longitude_array).any():
90
+ raise RuntimeError(
91
+ f"There was missing lat-lon dataset for, {cruise_name}"
92
+ )
93
+ geom = LineString(list(zip(longitude_array, latitude_array))).simplify(
94
+ tolerance=0.001, # preserve_topology=True # 113
95
+ ) # TODO: do speed check, convert linestrings to multilinestrings
96
+ print(len(geom.coords))
97
+ pieces.append(
98
+ {
99
+ "id": index,
100
+ "ship_name": ship_name,
101
+ "cruise_name": cruise_name,
102
+ "sensor_name": sensor_name,
103
+ "geom": geom,
104
+ }
105
+ )
106
+ df = pd.DataFrame(pieces)
107
+ gps_gdf = gpd.GeoDataFrame(
108
+ data=df[["id", "ship_name", "cruise_name", "sensor_name"]],
109
+ geometry=df["geom"],
110
+ crs="EPSG:4326",
111
+ )
112
+ print(gps_gdf)
113
+ # {'DXF': 'rw', 'CSV': 'raw', 'OpenFileGDB': 'raw', 'ESRIJSON': 'r', 'ESRI Shapefile': 'raw', 'FlatGeobuf': 'raw', 'GeoJSON': 'raw', 'GeoJSONSeq': 'raw', 'GPKG': 'raw', 'GML': 'rw', 'OGR_GMT': 'rw', 'GPX': 'rw', 'MapInfo File': 'raw', 'DGN': 'raw', 'S57': 'r', 'SQLite': 'raw', 'TopoJSON': 'r'}
114
+ if "GeoJSON" not in fiona.supported_drivers.keys():
115
+ raise RuntimeError("Missing GeoJSON driver")
116
+
117
+ gps_gdf.set_index("id", inplace=True)
118
+ # gps_gdf.to_file(f"dataframe_{cruise_name}.geojson", driver="GeoJSON") #, crs="epsg:4326")
119
+ return gps_gdf
120
+ except Exception as err:
121
+ raise RuntimeError(f"Problem parsing Zarr stores, {err}")
122
+
123
+ #######################################################
124
+ @staticmethod
125
+ def aggregate_geojson_into_dataframe(geoms):
126
+ gps_gdf = gpd.GeoDataFrame(
127
+ columns=["id", "ship", "cruise", "sensor", "geometry"],
128
+ geometry="geometry",
129
+ crs="EPSG:4326",
130
+ )
131
+ for iii, geom in enumerate(geoms):
132
+ gps_gdf.loc[iii] = (
133
+ iii,
134
+ geom.ship_name[iii],
135
+ geom.cruise_name[iii],
136
+ geom.sensor_name[iii],
137
+ geom.geometry[iii],
138
+ )
139
+ gps_gdf.set_index("id", inplace=True)
140
+ gps_gdf.to_file(
141
+ filename="dataset.geojson",
142
+ driver="GeoJSON",
143
+ engine="fiona", # or "pyogrio"
144
+ layer_options={"ID_GENERATE": "YES"},
145
+ crs="EPSG:4326",
146
+ id_generate=True, # required for the feature click selection
147
+ )
148
+ print(gps_gdf)
149
+
150
+ #######################################################
151
+ def create_collection_geojson(self):
152
+ cruises = [
153
+ "HB0706",
154
+ "HB0707",
155
+ "HB0710",
156
+ "HB0711",
157
+ "HB0802",
158
+ "HB0803",
159
+ "HB0805",
160
+ "HB0806",
161
+ "HB0807",
162
+ "HB0901",
163
+ "HB0902",
164
+ "HB0903",
165
+ "HB0904",
166
+ "HB0905",
167
+ "HB1002",
168
+ "HB1006",
169
+ "HB1102",
170
+ "HB1103",
171
+ "HB1105",
172
+ "HB1201",
173
+ "HB1206",
174
+ "HB1301",
175
+ "HB1303",
176
+ "HB1304",
177
+ "HB1401",
178
+ "HB1402",
179
+ "HB1403",
180
+ "HB1405",
181
+ "HB1501",
182
+ "HB1502",
183
+ "HB1503",
184
+ "HB1506",
185
+ "HB1507",
186
+ "HB1601",
187
+ "HB1603",
188
+ "HB1604",
189
+ "HB1701",
190
+ "HB1702",
191
+ "HB1801",
192
+ "HB1802",
193
+ "HB1803",
194
+ "HB1804",
195
+ "HB1805",
196
+ "HB1806",
197
+ "HB1901",
198
+ "HB1902",
199
+ "HB1903",
200
+ "HB1904",
201
+ "HB1906",
202
+ "HB1907",
203
+ "HB2001",
204
+ "HB2006",
205
+ "HB2007",
206
+ "HB20ORT",
207
+ "HB20TR",
208
+ ]
209
+ completed_cruises = self.check_all_cruises(
210
+ bucket_name=bucket_name, cruises=cruises
211
+ ) # TODO: threadpool this
212
+ ### create linestring ###
213
+ geometries = []
214
+ for jjj, completed_cruise in enumerate(
215
+ completed_cruises
216
+ ): # TODO: threadpool this
217
+ geometries.append(
218
+ self.get_cruise_geometry(cruise_name=completed_cruise, index=jjj)
219
+ )
220
+ #
221
+ self.aggregate_geojson_into_dataframe(geoms=geometries)
222
+ #
223
+ print(
224
+ 'Now run this: "tippecanoe --no-feature-limit -zg -o dataset.pmtiles -l cruises dataset.geojson --force"'
225
+ )
226
+ # # water-column-sonar-id.pmtiles
227
+ # linux command: "tippecanoe --no-feature-limit -zg -o water-column-sonar-id.pmtiles -l cruises dataset.geojson --force"
228
+ # note: 'cruises' is the name of the layer
229
+ # size is ~3.3 MB for the pmtiles
230
+ # then drag-and-drop here: https://pmtiles.io/#map=6.79/39.802/-71.51
231
+
232
+ #######################################################
233
+ # TODO: copy the .pmtiles file to the s3 bucket "noaa-wcsd-pds-index"
234
+ #######################################################
235
+
236
+ #######################################################
237
+ # TODO: get threadpool working
238
+ # def open_zarr_stores_with_thread_pool_executor(
239
+ # self,
240
+ # cruises: list,
241
+ # ):
242
+ # # 'cruises' is a list of cruises to process
243
+ # completed_cruises = []
244
+ # try:
245
+ # with ThreadPoolExecutor(max_workers=32) as executor:
246
+ # futures = [
247
+ # executor.submit(
248
+ # self.get_geospatial_info_from_zarr_store,
249
+ # "Henry_B._Bigelow", # ship_name
250
+ # cruise, # cruise_name
251
+ # )
252
+ # for cruise in cruises
253
+ # ]
254
+ # for future in as_completed(futures):
255
+ # result = future.result()
256
+ # if result:
257
+ # completed_cruises.extend([result])
258
+ # except Exception as err:
259
+ # raise RuntimeError(f"Problem, {err}")
260
+ # print("Done opening zarr stores using thread pool.")
261
+ # return completed_cruises # Took ~12 minutes
262
+
263
+ #######################################################
264
+
265
+
266
+ ###########################################################