water-column-sonar-processing 25.11.1__py3-none-any.whl → 26.1.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of water-column-sonar-processing might be problematic. Click here for more details.
- water_column_sonar_processing/aws/s3_manager.py +2 -4
- water_column_sonar_processing/aws/s3fs_manager.py +1 -9
- water_column_sonar_processing/cruise/create_empty_zarr_store.py +19 -81
- water_column_sonar_processing/cruise/resample_regrid.py +88 -104
- water_column_sonar_processing/geometry/__init__.py +2 -0
- water_column_sonar_processing/geometry/elevation_manager.py +2 -2
- water_column_sonar_processing/geometry/geometry_manager.py +11 -13
- water_column_sonar_processing/geometry/line_simplification.py +10 -10
- water_column_sonar_processing/geometry/pmtile_generation.py +8 -3
- water_column_sonar_processing/geometry/spatiotemporal.py +106 -0
- water_column_sonar_processing/index/index_manager.py +43 -46
- water_column_sonar_processing/model/zarr_manager.py +533 -514
- water_column_sonar_processing/processing/raw_to_zarr.py +45 -139
- water_column_sonar_processing/utility/cleaner.py +2 -1
- water_column_sonar_processing/utility/constants.py +29 -29
- water_column_sonar_processing-26.1.14.dist-info/METADATA +240 -0
- {water_column_sonar_processing-25.11.1.dist-info → water_column_sonar_processing-26.1.14.dist-info}/RECORD +20 -20
- water_column_sonar_processing/process.py +0 -149
- water_column_sonar_processing-25.11.1.dist-info/METADATA +0 -182
- {water_column_sonar_processing-25.11.1.dist-info → water_column_sonar_processing-26.1.14.dist-info}/WHEEL +0 -0
- {water_column_sonar_processing-25.11.1.dist-info → water_column_sonar_processing-26.1.14.dist-info}/licenses/LICENSE +0 -0
- {water_column_sonar_processing-25.11.1.dist-info → water_column_sonar_processing-26.1.14.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import os
|
|
2
1
|
from pathlib import Path
|
|
3
2
|
|
|
4
3
|
import geopandas
|
|
@@ -8,6 +7,7 @@ import pandas as pd
|
|
|
8
7
|
from water_column_sonar_processing.aws import S3Manager
|
|
9
8
|
from water_column_sonar_processing.utility import Cleaner
|
|
10
9
|
|
|
10
|
+
|
|
11
11
|
# // [Decimal / Places / Degrees / Object that can be recognized at scale / N/S or E/W at equator, E/W at 23N/S, E/W at 45N/S, E/W at 67N/S]
|
|
12
12
|
# // 0 1.0 1° 00′ 0″ country or large region 111.32 km 102.47 km 78.71 km 43.496 km
|
|
13
13
|
# // 1 0.1 0° 06′ 0″ large city or district 11.132 km 10.247 km 7.871 km 4.3496 km
|
|
@@ -24,7 +24,7 @@ class GeometryManager:
|
|
|
24
24
|
def __init__(
|
|
25
25
|
self,
|
|
26
26
|
):
|
|
27
|
-
self.DECIMAL_PRECISION =
|
|
27
|
+
self.DECIMAL_PRECISION = 6 # precision for GPS coordinates
|
|
28
28
|
self.SIMPLIFICATION_TOLERANCE = 0.0001 # RDP simplification to "street level"
|
|
29
29
|
|
|
30
30
|
#######################################################
|
|
@@ -44,12 +44,10 @@ class GeometryManager:
|
|
|
44
44
|
|
|
45
45
|
print("Getting GPS dataset from echopype object.")
|
|
46
46
|
try:
|
|
47
|
-
latitude =
|
|
48
|
-
echodata.platform.latitude.values
|
|
49
|
-
)
|
|
50
|
-
longitude =
|
|
51
|
-
echodata.platform.longitude.values, self.DECIMAL_PRECISION
|
|
52
|
-
)
|
|
47
|
+
latitude = (
|
|
48
|
+
echodata.platform.latitude.values
|
|
49
|
+
) # TODO: DONT get values from here!
|
|
50
|
+
longitude = echodata.platform.longitude.values
|
|
53
51
|
|
|
54
52
|
# RE: time coordinates: https://github.com/OSOceanAcoustics/echopype/issues/656#issue-1219104771
|
|
55
53
|
# 'nmea_times' are times from the nmea datalogger associated with GPS
|
|
@@ -192,8 +190,8 @@ class GeometryManager:
|
|
|
192
190
|
# can include np.nan values?
|
|
193
191
|
|
|
194
192
|
#######################################################
|
|
193
|
+
@staticmethod
|
|
195
194
|
def read_s3_geo_json(
|
|
196
|
-
self,
|
|
197
195
|
ship_name,
|
|
198
196
|
cruise_name,
|
|
199
197
|
sensor_name,
|
|
@@ -234,10 +232,10 @@ class GeometryManager:
|
|
|
234
232
|
|
|
235
233
|
############################################################################
|
|
236
234
|
# COMES from the raw-to-zarr conversion
|
|
237
|
-
def __write_geojson_to_file(self, store_name, data) -> None:
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
235
|
+
# def __write_geojson_to_file(self, store_name, data) -> None:
|
|
236
|
+
# print("Writing GeoJSON to file.")
|
|
237
|
+
# with open(os.path.join(store_name, "geo.json"), "w") as outfile:
|
|
238
|
+
# outfile.write(data)
|
|
241
239
|
|
|
242
240
|
|
|
243
241
|
###########################################################
|
|
@@ -71,11 +71,11 @@ class LineSimplification:
|
|
|
71
71
|
pass
|
|
72
72
|
|
|
73
73
|
#######################################################
|
|
74
|
+
@staticmethod
|
|
74
75
|
def kalman_filter(
|
|
75
|
-
self,
|
|
76
76
|
longitudes,
|
|
77
77
|
latitudes,
|
|
78
|
-
)
|
|
78
|
+
):
|
|
79
79
|
"""
|
|
80
80
|
# TODO: need to use masked array to get the right number of values
|
|
81
81
|
"""
|
|
@@ -102,8 +102,8 @@ class LineSimplification:
|
|
|
102
102
|
return smoothed_state_means[:, [0, 2]]
|
|
103
103
|
|
|
104
104
|
#######################################################
|
|
105
|
+
@staticmethod
|
|
105
106
|
def get_speeds(
|
|
106
|
-
self,
|
|
107
107
|
times: np.ndarray, # don't really need time, do need to segment the dataset first
|
|
108
108
|
latitudes: np.ndarray,
|
|
109
109
|
longitudes: np.ndarray,
|
|
@@ -136,13 +136,13 @@ class LineSimplification:
|
|
|
136
136
|
# returns the speed in meters per second #TODO: get speed in knots
|
|
137
137
|
return speed_meters_per_second.to_numpy(dtype="float32") # includes nan
|
|
138
138
|
|
|
139
|
-
def remove_null_island_values(
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
) -> None:
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
139
|
+
# def remove_null_island_values(
|
|
140
|
+
# self,
|
|
141
|
+
# epsilon=1e-5,
|
|
142
|
+
# ) -> None:
|
|
143
|
+
# # TODO: low priority
|
|
144
|
+
# print(epsilon)
|
|
145
|
+
# pass
|
|
146
146
|
|
|
147
147
|
def break_linestring_into_multi_linestring(
|
|
148
148
|
self,
|
|
@@ -36,17 +36,20 @@ class PMTileGeneration(object):
|
|
|
36
36
|
self.sensor_name = "EK60"
|
|
37
37
|
|
|
38
38
|
#######################################################
|
|
39
|
-
|
|
39
|
+
@staticmethod
|
|
40
|
+
def check_all_cruises(bucket_name, cruises):
|
|
40
41
|
completed = []
|
|
41
42
|
for cruise_name in cruises:
|
|
42
43
|
print(cruise_name)
|
|
43
44
|
try:
|
|
44
45
|
zarr_store = f"{cruise_name}.zarr"
|
|
45
46
|
s3_zarr_store_path = f"{bucket_name}/level_2/{ship_name}/{cruise_name}/{sensor_name}/{zarr_store}"
|
|
47
|
+
kwargs = {"consolidated": False}
|
|
46
48
|
cruise = xr.open_dataset(
|
|
47
49
|
filename_or_obj=f"s3://{s3_zarr_store_path}",
|
|
48
50
|
engine="zarr",
|
|
49
51
|
storage_options={"anon": True},
|
|
52
|
+
**kwargs,
|
|
50
53
|
)
|
|
51
54
|
width = cruise.Sv.shape[1]
|
|
52
55
|
height = cruise.Sv.shape[0]
|
|
@@ -67,7 +70,8 @@ class PMTileGeneration(object):
|
|
|
67
70
|
return completed
|
|
68
71
|
|
|
69
72
|
#######################################################
|
|
70
|
-
|
|
73
|
+
@staticmethod
|
|
74
|
+
def get_cruise_geometry(cruise_name, index):
|
|
71
75
|
print(cruise_name)
|
|
72
76
|
try:
|
|
73
77
|
pieces = []
|
|
@@ -117,7 +121,8 @@ class PMTileGeneration(object):
|
|
|
117
121
|
raise RuntimeError(f"Problem parsing Zarr stores, {err}")
|
|
118
122
|
|
|
119
123
|
#######################################################
|
|
120
|
-
|
|
124
|
+
@staticmethod
|
|
125
|
+
def aggregate_geojson_into_dataframe(geoms):
|
|
121
126
|
gps_gdf = gpd.GeoDataFrame(
|
|
122
127
|
columns=["id", "ship", "cruise", "sensor", "geometry"],
|
|
123
128
|
geometry="geometry",
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
import geopandas as gpd
|
|
2
|
+
import numpy as np
|
|
3
|
+
import pandas as pd
|
|
4
|
+
from shapely.geometry import Point
|
|
5
|
+
|
|
6
|
+
from water_column_sonar_processing.model import ZarrManager
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
# Convert "meters per second" to "knots"
|
|
10
|
+
# meters_per_second_to_knots = lambda mps_value: mps_value * 1.94384
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class Spatiotemporal:
|
|
14
|
+
#######################################################
|
|
15
|
+
def __init__(
|
|
16
|
+
self,
|
|
17
|
+
):
|
|
18
|
+
self.NANOSECONDS_PER_SECOND = 1e9
|
|
19
|
+
self.CUTOFF_DISTANCE_METERS = 50.0
|
|
20
|
+
self.CUTOFF_TIME_SECONDS = 10.0
|
|
21
|
+
|
|
22
|
+
#######################################################
|
|
23
|
+
@staticmethod
|
|
24
|
+
def meters_per_second_to_knots(
|
|
25
|
+
mps_value,
|
|
26
|
+
):
|
|
27
|
+
return mps_value * 1.94384
|
|
28
|
+
|
|
29
|
+
#######################################################
|
|
30
|
+
def compute_speed_and_distance(
|
|
31
|
+
self,
|
|
32
|
+
times_ns, #: np.ndarray[tuple[int], np.dtype[np.int64]],
|
|
33
|
+
latitudes, #: np.ndarray,
|
|
34
|
+
longitudes, #: np.ndarray,
|
|
35
|
+
) -> pd.DataFrame:
|
|
36
|
+
try:
|
|
37
|
+
# fix times
|
|
38
|
+
times = np.array([np.datetime64(int(i), "ns") for i in times_ns])
|
|
39
|
+
geom = [Point(xy) for xy in zip(longitudes, latitudes)]
|
|
40
|
+
points_df = gpd.GeoDataFrame({"geometry": geom}, crs="EPSG:4326")
|
|
41
|
+
# Conversion to a rectilinear projection coordinate system where distance can be calculated with pythagorean theorem
|
|
42
|
+
# EPSG:4087, WGS 84 / World Equidistant Cylindrical
|
|
43
|
+
# https://epsg.io/4087
|
|
44
|
+
points_df.to_crs(epsg=4087, inplace=True)
|
|
45
|
+
distance_diffs = points_df.distance(points_df.geometry.shift())
|
|
46
|
+
distance_diffs[0] = distance_diffs[1] # missing first datapoint, backfill
|
|
47
|
+
# Issue: np.max(distance_diffs) = 3397 meters
|
|
48
|
+
time_diffs_ns = np.append(0, (times[1:] - times[:-1]).astype(int))
|
|
49
|
+
time_diffs_ns[0] = time_diffs_ns[1] # missing first datapoint, backfill
|
|
50
|
+
time_diffs_seconds = time_diffs_ns / self.NANOSECONDS_PER_SECOND
|
|
51
|
+
# Calculate the speed in knots
|
|
52
|
+
speed_meters_per_second = np.array(
|
|
53
|
+
(distance_diffs / time_diffs_ns * self.NANOSECONDS_PER_SECOND),
|
|
54
|
+
dtype=np.float32,
|
|
55
|
+
)
|
|
56
|
+
knots = self.meters_per_second_to_knots(speed_meters_per_second)
|
|
57
|
+
metrics_df = pd.DataFrame(
|
|
58
|
+
{
|
|
59
|
+
"speed_knots": knots.astype(dtype=np.float32),
|
|
60
|
+
"distance_meters": distance_diffs.to_numpy(dtype=np.float32),
|
|
61
|
+
"diff_seconds": time_diffs_seconds.astype(np.float32),
|
|
62
|
+
},
|
|
63
|
+
index=times,
|
|
64
|
+
)
|
|
65
|
+
#
|
|
66
|
+
return metrics_df
|
|
67
|
+
except Exception as err:
|
|
68
|
+
raise RuntimeError(f"Exception encountered, {err}")
|
|
69
|
+
|
|
70
|
+
#######################################################
|
|
71
|
+
def add_speed_and_distance(
|
|
72
|
+
self,
|
|
73
|
+
ship_name,
|
|
74
|
+
cruise_name,
|
|
75
|
+
sensor_name,
|
|
76
|
+
bucket_name,
|
|
77
|
+
endpoint_url=None,
|
|
78
|
+
) -> None:
|
|
79
|
+
try:
|
|
80
|
+
zarr_manager = ZarrManager()
|
|
81
|
+
zarr_store = zarr_manager.open_s3_zarr_store_with_zarr(
|
|
82
|
+
ship_name=ship_name,
|
|
83
|
+
cruise_name=cruise_name,
|
|
84
|
+
sensor_name=sensor_name,
|
|
85
|
+
output_bucket_name=bucket_name,
|
|
86
|
+
endpoint_url=endpoint_url,
|
|
87
|
+
)
|
|
88
|
+
longitudes = zarr_store["longitude"][:]
|
|
89
|
+
latitudes = zarr_store["latitude"][:]
|
|
90
|
+
times = zarr_store["time"][:]
|
|
91
|
+
#
|
|
92
|
+
metrics_df = self.compute_speed_and_distance(
|
|
93
|
+
times_ns=times,
|
|
94
|
+
latitudes=latitudes,
|
|
95
|
+
longitudes=longitudes,
|
|
96
|
+
)
|
|
97
|
+
# Write the speed and distance to the output zarr store
|
|
98
|
+
zarr_store["speed"][:] = metrics_df.speed_knots.values
|
|
99
|
+
zarr_store["distance"][:] = metrics_df.distance_meters.values
|
|
100
|
+
except Exception as err:
|
|
101
|
+
raise RuntimeError(
|
|
102
|
+
f"Exception encountered writing the speed and distance, {err}"
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
###########################################################
|
|
@@ -2,10 +2,8 @@ import os
|
|
|
2
2
|
import re
|
|
3
3
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
4
4
|
from datetime import datetime
|
|
5
|
-
from hashlib import sha256
|
|
6
5
|
|
|
7
|
-
import networkx as nx
|
|
8
|
-
import numpy as np
|
|
6
|
+
# import networkx as nx
|
|
9
7
|
import pandas as pd
|
|
10
8
|
|
|
11
9
|
from water_column_sonar_processing.aws import S3Manager
|
|
@@ -120,6 +118,7 @@ class IndexManager:
|
|
|
120
118
|
for res in page_iterator:
|
|
121
119
|
if "Key" in res:
|
|
122
120
|
return res["Key"]
|
|
121
|
+
return None
|
|
123
122
|
# else raise exception?
|
|
124
123
|
|
|
125
124
|
# DSJ0604-D20060406-T050022.bot 2kB == 2152 'Size'
|
|
@@ -190,9 +189,8 @@ class IndexManager:
|
|
|
190
189
|
return files_list
|
|
191
190
|
|
|
192
191
|
#################################################################
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
) -> pd.DataFrame: # TODO: is this used?
|
|
192
|
+
@staticmethod
|
|
193
|
+
def get_subset_ek60_prefix(df: pd.DataFrame) -> pd.DataFrame: # TODO: is this used?
|
|
196
194
|
# Returns all objects with 'EK60' in prefix of file path
|
|
197
195
|
# Note that this can include 'EK80' dataset that are false-positives
|
|
198
196
|
# in dataframe with ['key', 'filename', 'ship', 'cruise', 'sensor', 'size', 'date', 'datagram']
|
|
@@ -274,9 +272,8 @@ class IndexManager:
|
|
|
274
272
|
return all_datagrams
|
|
275
273
|
|
|
276
274
|
#################################################################
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
) -> pd.DataFrame:
|
|
275
|
+
@staticmethod
|
|
276
|
+
def get_ek60_objects(df: pd.DataFrame, subset_datagrams: list) -> pd.DataFrame:
|
|
280
277
|
# for each key write datagram value to all other files in same cruise
|
|
281
278
|
for subset_datagram in subset_datagrams:
|
|
282
279
|
if subset_datagram["DATAGRAM"] == "CON0":
|
|
@@ -345,40 +342,40 @@ class IndexManager:
|
|
|
345
342
|
# print(end_time)
|
|
346
343
|
|
|
347
344
|
# TODO: wip
|
|
348
|
-
def build_merkle_tree(self):
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
345
|
+
# def build_merkle_tree(self):
|
|
346
|
+
# G = nx.DiGraph()
|
|
347
|
+
# # https://noaa-wcsd-pds.s3.amazonaws.com/index.html#data/raw/Henry_B._Bigelow/HB0707/
|
|
348
|
+
# ship_name = "Henry_B._Bigelow"
|
|
349
|
+
# cruise_name = "HB0707"
|
|
350
|
+
# # cruise_name = "HB0805"
|
|
351
|
+
# prefix = f"data/raw/{ship_name}/{cruise_name}/"
|
|
352
|
+
# # prefix = f"data/raw/{ship_name}/"
|
|
353
|
+
# page_iterator = self.s3_manager.paginator.paginate(
|
|
354
|
+
# Bucket=self.input_bucket_name,
|
|
355
|
+
# Prefix=prefix,
|
|
356
|
+
# )
|
|
357
|
+
# for page in page_iterator:
|
|
358
|
+
# for contents in page["Contents"]:
|
|
359
|
+
# obj_key = contents["Key"]
|
|
360
|
+
# # https://datatracker.ietf.org/doc/html/rfc7232#section-2.3
|
|
361
|
+
# obj_etag = contents["ETag"].split('"')[1] # properties
|
|
362
|
+
# obj_size = contents["Size"]
|
|
363
|
+
# basename = os.path.basename(obj_key)
|
|
364
|
+
# G.add_node(
|
|
365
|
+
# node_for_adding=basename, ETag=obj_etag, Size=obj_size, Key=obj_key
|
|
366
|
+
# ) # TODO: add parent hash
|
|
367
|
+
# split_path = os.path.normpath(obj_key).split(os.path.sep)
|
|
368
|
+
# # split_path: ['dataset', 'raw', 'Henry_B._Bigelow', 'HB0707', 'EK60', 'D20070712-T004447.raw']
|
|
369
|
+
# for previous, current in zip(split_path, split_path[1:]):
|
|
370
|
+
# if not G.has_edge(previous, current):
|
|
371
|
+
# G.add_edge(previous, current)
|
|
372
|
+
# # print(G)
|
|
373
|
+
# etag_set = frozenset(
|
|
374
|
+
# [k for j, k in list(G.nodes.data("ETag")) if k is not None]
|
|
375
|
+
# )
|
|
376
|
+
# new_hash = sha256(str(etag_set.__hash__()).encode("utf-8")).hexdigest()
|
|
377
|
+
# total_size = [k for j, k in list(G.nodes.data("Size")) if k is not None]
|
|
378
|
+
# print(np.sum(total_size)) # 22.24 Terabytes in Henry_B._Bigelow cruises
|
|
379
|
+
# print(" ")
|
|
380
|
+
# print(new_hash)
|
|
381
|
+
# return new_hash
|