water-column-sonar-processing 0.0.1__py3-none-any.whl → 26.1.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of water-column-sonar-processing might be problematic. Click here for more details.
- water_column_sonar_processing/__init__.py +13 -0
- water_column_sonar_processing/aws/__init__.py +7 -0
- water_column_sonar_processing/aws/dynamodb_manager.py +355 -0
- water_column_sonar_processing/aws/s3_manager.py +418 -0
- water_column_sonar_processing/aws/s3fs_manager.py +64 -0
- {model → water_column_sonar_processing}/aws/sns_manager.py +10 -21
- {model → water_column_sonar_processing}/aws/sqs_manager.py +11 -19
- water_column_sonar_processing/cruise/__init__.py +4 -0
- water_column_sonar_processing/cruise/create_empty_zarr_store.py +129 -0
- water_column_sonar_processing/cruise/datatree_manager.py +21 -0
- water_column_sonar_processing/cruise/resample_regrid.py +323 -0
- water_column_sonar_processing/geometry/__init__.py +13 -0
- water_column_sonar_processing/geometry/elevation_manager.py +111 -0
- water_column_sonar_processing/geometry/geometry_manager.py +241 -0
- water_column_sonar_processing/geometry/line_simplification.py +176 -0
- water_column_sonar_processing/geometry/pmtile_generation.py +266 -0
- water_column_sonar_processing/geometry/spatiotemporal.py +106 -0
- water_column_sonar_processing/index/__init__.py +3 -0
- water_column_sonar_processing/index/index_manager.py +381 -0
- water_column_sonar_processing/model/__init__.py +3 -0
- water_column_sonar_processing/model/zarr_manager.py +741 -0
- water_column_sonar_processing/processing/__init__.py +4 -0
- water_column_sonar_processing/processing/raw_to_netcdf.py +320 -0
- water_column_sonar_processing/processing/raw_to_zarr.py +331 -0
- water_column_sonar_processing/utility/__init__.py +13 -0
- {model → water_column_sonar_processing}/utility/cleaner.py +7 -7
- water_column_sonar_processing/utility/constants.py +118 -0
- {model → water_column_sonar_processing}/utility/pipeline_status.py +47 -24
- water_column_sonar_processing/utility/timestamp.py +12 -0
- water_column_sonar_processing-26.1.14.dist-info/METADATA +240 -0
- water_column_sonar_processing-26.1.14.dist-info/RECORD +34 -0
- {water_column_sonar_processing-0.0.1.dist-info → water_column_sonar_processing-26.1.14.dist-info}/WHEEL +1 -1
- {water_column_sonar_processing-0.0.1.dist-info → water_column_sonar_processing-26.1.14.dist-info/licenses}/LICENSE +1 -1
- water_column_sonar_processing-26.1.14.dist-info/top_level.txt +1 -0
- __init__.py +0 -0
- model/__init__.py +0 -0
- model/aws/__init__.py +0 -0
- model/aws/dynamodb_manager.py +0 -149
- model/aws/s3_manager.py +0 -356
- model/aws/s3fs_manager.py +0 -74
- model/cruise/__init__.py +0 -0
- model/cruise/create_empty_zarr_store.py +0 -166
- model/cruise/resample_regrid.py +0 -248
- model/geospatial/__init__.py +0 -0
- model/geospatial/geometry_manager.py +0 -194
- model/geospatial/geometry_simplification.py +0 -81
- model/geospatial/pmtile_generation.py +0 -74
- model/index/__init__.py +0 -0
- model/index/index.py +0 -228
- model/model.py +0 -138
- model/utility/__init__.py +0 -0
- model/utility/constants.py +0 -56
- model/utility/timestamp.py +0 -12
- model/zarr/__init__.py +0 -0
- model/zarr/bar.py +0 -28
- model/zarr/foo.py +0 -11
- model/zarr/zarr_manager.py +0 -298
- water_column_sonar_processing-0.0.1.dist-info/METADATA +0 -89
- water_column_sonar_processing-0.0.1.dist-info/RECORD +0 -32
- water_column_sonar_processing-0.0.1.dist-info/top_level.txt +0 -2
|
@@ -1,81 +0,0 @@
|
|
|
1
|
-
# import json
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
# lambda for timestamp in form "yyyy-MM-ddTHH:mm:ssZ"
|
|
5
|
-
# dt = lambda: datetime.now().isoformat(timespec="seconds") + "Z"
|
|
6
|
-
|
|
7
|
-
# https://shapely.readthedocs.io/en/stable/reference/shapely.MultiLineString.html#shapely.MultiLineString
|
|
8
|
-
"""
|
|
9
|
-
// [Decimal / Places / Degrees / Object that can be recognized at scale / N/S or E/W at equator, E/W at 23N/S, E/W at 45N/S, E/W at 67N/S]
|
|
10
|
-
// 0 1.0 1° 00′ 0″ country or large region 111.32 km 102.47 km 78.71 km 43.496 km
|
|
11
|
-
// 1 0.1 0° 06′ 0″ large city or district 11.132 km 10.247 km 7.871 km 4.3496 km
|
|
12
|
-
// 2 0.01 0° 00′ 36″ town or village 1.1132 km 1.0247 km 787.1 m 434.96 m
|
|
13
|
-
// 3 0.001 0° 00′ 3.6″ neighborhood, street 111.32 m 102.47 m 78.71 m 43.496 m
|
|
14
|
-
// 4 0.0001 0° 00′ 0.36″ individual street, land parcel 11.132 m 10.247 m 7.871 m 4.3496 m
|
|
15
|
-
// 5 0.00001 0° 00′ 0.036″ individual trees, door entrance 1.1132 m 1.0247 m 787.1 mm 434.96 mm
|
|
16
|
-
// 6 0.000001 0° 00′ 0.0036″ individual humans 111.32 mm 102.47 mm 78.71 mm 43.496 mm
|
|
17
|
-
// 7 0.0000001 0° 00′ 0.00036″ practical limit of commercial surveying 11.132 mm 10.247 mm 7.871 mm 4.3496 mm
|
|
18
|
-
"""
|
|
19
|
-
|
|
20
|
-
"""
|
|
21
|
-
private static final int SRID = 8307;
|
|
22
|
-
private static final double simplificationTolerance = 0.0001;
|
|
23
|
-
private static final long splitGeometryMs = 900000L;
|
|
24
|
-
private static final int batchSize = 10000;
|
|
25
|
-
private static final int geoJsonPrecision = 5;
|
|
26
|
-
final int geoJsonPrecision = 5;
|
|
27
|
-
final double simplificationTolerance = 0.0001;
|
|
28
|
-
final int simplifierBatchSize = 3000;
|
|
29
|
-
final long maxCount = 0;
|
|
30
|
-
private static final double maxAllowedSpeedKnts = 60D;
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
"""
|
|
34
|
-
class GeometrySimplification(object):
|
|
35
|
-
# TODO: in the future move to standalone library
|
|
36
|
-
#######################################################
|
|
37
|
-
def __init__(
|
|
38
|
-
self,
|
|
39
|
-
):
|
|
40
|
-
pass
|
|
41
|
-
|
|
42
|
-
#######################################################
|
|
43
|
-
def speed_check(
|
|
44
|
-
self,
|
|
45
|
-
speed_knots=50,
|
|
46
|
-
) -> None:
|
|
47
|
-
print(speed_knots)
|
|
48
|
-
pass
|
|
49
|
-
|
|
50
|
-
def remove_null_island_values(
|
|
51
|
-
self,
|
|
52
|
-
epsilon=1e-5,
|
|
53
|
-
) -> None:
|
|
54
|
-
print(epsilon)
|
|
55
|
-
pass
|
|
56
|
-
|
|
57
|
-
def stream_geometry(
|
|
58
|
-
self,
|
|
59
|
-
) -> None:
|
|
60
|
-
pass
|
|
61
|
-
|
|
62
|
-
def break_linestring_into_multi_linestring(
|
|
63
|
-
self,
|
|
64
|
-
) -> None:
|
|
65
|
-
# For any line-strings across the antimeridian, break into multilinestring
|
|
66
|
-
pass
|
|
67
|
-
|
|
68
|
-
def simplify(
|
|
69
|
-
self,
|
|
70
|
-
) -> None:
|
|
71
|
-
pass
|
|
72
|
-
|
|
73
|
-
def kalman_filter(
|
|
74
|
-
self
|
|
75
|
-
):
|
|
76
|
-
# for cruises with bad signal, filter so that
|
|
77
|
-
pass
|
|
78
|
-
|
|
79
|
-
#######################################################
|
|
80
|
-
|
|
81
|
-
###########################################################
|
|
@@ -1,74 +0,0 @@
|
|
|
1
|
-
# import json
|
|
2
|
-
# This is a sample Python script.
|
|
3
|
-
import pandas as pd
|
|
4
|
-
# import numpy as np
|
|
5
|
-
import os
|
|
6
|
-
# from glob import glob
|
|
7
|
-
from pathlib import Path
|
|
8
|
-
import geopandas
|
|
9
|
-
# import shapely
|
|
10
|
-
from shapely.geometry import LineString
|
|
11
|
-
# from shapely import wkt
|
|
12
|
-
# import json
|
|
13
|
-
# from shapely.geometry import shape, GeometryCollection
|
|
14
|
-
import fiona
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
class PMTileGeneration(object):
|
|
18
|
-
#######################################################
|
|
19
|
-
def __init__(
|
|
20
|
-
self,
|
|
21
|
-
):
|
|
22
|
-
pass
|
|
23
|
-
|
|
24
|
-
#######################################################
|
|
25
|
-
def generate_geojson_feature_collection(self):
|
|
26
|
-
# This was used to read from noaa-wcsd-zarr-pds bucket geojson files and then to
|
|
27
|
-
# generate the geopandas dataframe which could be exported to another comprehensive
|
|
28
|
-
# geojson file. That
|
|
29
|
-
result = list(Path("/Users/r2d2/Documents/echofish/geojson").rglob("*.json"))
|
|
30
|
-
# result = result[:100]
|
|
31
|
-
iii = 0
|
|
32
|
-
pieces = []
|
|
33
|
-
for iii in range(len(result)):
|
|
34
|
-
file_name = os.path.normpath(result[iii]).split(os.sep)[-1]
|
|
35
|
-
file_stem = os.path.splitext(os.path.basename(file_name))[0]
|
|
36
|
-
geom = geopandas.read_file(result[iii]).iloc[0]['geometry']
|
|
37
|
-
# TDOO: Filter (0,0) coordinates
|
|
38
|
-
if len(geom.coords.xy[0]) < 2:
|
|
39
|
-
continue
|
|
40
|
-
geom = LineString(list(zip(geom.coords.xy[1], geom.coords.xy[0])))
|
|
41
|
-
pieces.append({
|
|
42
|
-
'ship_name': os.path.normpath(result[iii]).split(os.sep)[-4],
|
|
43
|
-
'cruise_name': os.path.normpath(result[iii]).split(os.sep)[-3],
|
|
44
|
-
'file_stem': file_stem,
|
|
45
|
-
'file_path': result[iii],
|
|
46
|
-
'geom': geom,
|
|
47
|
-
})
|
|
48
|
-
df = pd.DataFrame(pieces)
|
|
49
|
-
print(df)
|
|
50
|
-
gps_gdf = geopandas.GeoDataFrame(
|
|
51
|
-
data=df[['ship_name', 'cruise_name', 'file_stem']], # try again with file_stem
|
|
52
|
-
geometry=df['geom'],
|
|
53
|
-
crs='EPSG:4326'
|
|
54
|
-
)
|
|
55
|
-
print(fiona.supported_drivers)
|
|
56
|
-
# gps_gdf.to_file('dataframe.shp', crs='epsg:4326')
|
|
57
|
-
# Convert geojson feature collection to pmtiles
|
|
58
|
-
gps_gdf.to_file('dataframe.geojson', driver='GeoJSON', crs='epsg:4326')
|
|
59
|
-
print('done')
|
|
60
|
-
"""
|
|
61
|
-
# need to eliminate visits to null island
|
|
62
|
-
tippecanoe --no-feature-limit -zg --projection=EPSG:4326 -o dataframe.pmtiles -l cruises dataframe.geojson
|
|
63
|
-
|
|
64
|
-
https://docs.protomaps.com/pmtiles/create
|
|
65
|
-
PMTiles
|
|
66
|
-
https://drive.google.com/file/d/17Bi-UIXB9IJkIz30BHpiKHXYpCOgRFge/view?usp=sharing
|
|
67
|
-
|
|
68
|
-
Viewer
|
|
69
|
-
https://protomaps.github.io/PMTiles/#map=8.91/56.0234/-166.6346
|
|
70
|
-
"""
|
|
71
|
-
|
|
72
|
-
#######################################################
|
|
73
|
-
|
|
74
|
-
###########################################################
|
model/index/__init__.py
DELETED
|
File without changes
|
model/index/index.py
DELETED
|
@@ -1,228 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
import re
|
|
3
|
-
import pandas as pd
|
|
4
|
-
from datetime import datetime
|
|
5
|
-
from concurrent.futures import ThreadPoolExecutor
|
|
6
|
-
from concurrent.futures import as_completed
|
|
7
|
-
from ..aws.s3_manager import S3Manager
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
class IndexManager:
|
|
11
|
-
|
|
12
|
-
def __init__(
|
|
13
|
-
self,
|
|
14
|
-
input_bucket_name,
|
|
15
|
-
calibration_bucket,
|
|
16
|
-
calibration_key
|
|
17
|
-
):
|
|
18
|
-
self.input_bucket_name = input_bucket_name
|
|
19
|
-
self.calibration_bucket = calibration_bucket
|
|
20
|
-
self.calibration_key = calibration_key
|
|
21
|
-
self.s3_manager = S3Manager()
|
|
22
|
-
|
|
23
|
-
#################################################################
|
|
24
|
-
def list_ships(
|
|
25
|
-
self,
|
|
26
|
-
prefix='data/raw/',
|
|
27
|
-
):
|
|
28
|
-
# s3_client = self.s3_manager.s3_client
|
|
29
|
-
page_iterator = self.s3_manager.paginator.paginate(Bucket=self.input_bucket_name, Prefix=prefix, Delimiter="/")
|
|
30
|
-
# common_prefixes = s3_client.list_objects(Bucket=self.input_bucket_name, Prefix=prefix, Delimiter='/')
|
|
31
|
-
# print(common_prefixes)
|
|
32
|
-
ships = []
|
|
33
|
-
for page in page_iterator:
|
|
34
|
-
if 'Contents' in page.keys():
|
|
35
|
-
ships.extend([k['Prefix'] for k in page['CommonPrefixes']])
|
|
36
|
-
return ships # ~76 ships
|
|
37
|
-
|
|
38
|
-
#################################################################
|
|
39
|
-
def list_cruises(
|
|
40
|
-
self,
|
|
41
|
-
ship_prefixes, # e.g. 'data/raw/Alaska_Knight/'
|
|
42
|
-
):
|
|
43
|
-
cruises = []
|
|
44
|
-
for ship_prefix in ship_prefixes:
|
|
45
|
-
page_iterator = self.s3_manager.paginator.paginate(Bucket=self.input_bucket_name, Prefix=ship_prefix, Delimiter="/")
|
|
46
|
-
for page in page_iterator:
|
|
47
|
-
cruises.extend([k['Prefix'] for k in page['CommonPrefixes']])
|
|
48
|
-
return cruises # ~1204 cruises
|
|
49
|
-
|
|
50
|
-
#################################################################
|
|
51
|
-
def list_ek60_cruises(
|
|
52
|
-
self,
|
|
53
|
-
cruise_prefixes,
|
|
54
|
-
):
|
|
55
|
-
cruise_sensors = [] # includes all sensor types
|
|
56
|
-
for cruise_prefix in cruise_prefixes:
|
|
57
|
-
page_iterator = self.s3_manager.paginator.paginate(Bucket=self.input_bucket_name, Prefix=cruise_prefix, Delimiter="/")
|
|
58
|
-
for page in page_iterator:
|
|
59
|
-
cruise_sensors.extend([k['Prefix'] for k in page['CommonPrefixes']])
|
|
60
|
-
# Note: these are "EK60" by prefix. They still need to be verified by scanning the datagram.
|
|
61
|
-
return [i for i in cruise_sensors if '/EK60/' in i] # ~447 different cruises
|
|
62
|
-
|
|
63
|
-
#################################################################
|
|
64
|
-
def get_raw_files(
|
|
65
|
-
self,
|
|
66
|
-
ship_name,
|
|
67
|
-
cruise_name,
|
|
68
|
-
sensor_name,
|
|
69
|
-
):
|
|
70
|
-
prefix = f"data/raw/{ship_name}/{cruise_name}/{sensor_name}/" # Note no forward slash at beginning
|
|
71
|
-
page_iterator = self.s3_manager.paginator.paginate(Bucket=self.input_bucket_name, Prefix=prefix, Delimiter="/")
|
|
72
|
-
all_files = []
|
|
73
|
-
for page in page_iterator:
|
|
74
|
-
if 'Contents' in page.keys():
|
|
75
|
-
all_files.extend([i['Key'] for i in page['Contents']])
|
|
76
|
-
return [i for i in all_files if i.endswith('.raw')]
|
|
77
|
-
|
|
78
|
-
def get_raw_files_csv(
|
|
79
|
-
self,
|
|
80
|
-
ship_name,
|
|
81
|
-
cruise_name,
|
|
82
|
-
sensor_name,
|
|
83
|
-
):
|
|
84
|
-
raw_files = self.get_raw_files(ship_name=ship_name, cruise_name=cruise_name, sensor_name=sensor_name)
|
|
85
|
-
files_list = [
|
|
86
|
-
{
|
|
87
|
-
'ship_name': ship_name,
|
|
88
|
-
'cruise_name': cruise_name,
|
|
89
|
-
'sensor_name': sensor_name,
|
|
90
|
-
'file_name': os.path.basename(raw_file)
|
|
91
|
-
} for raw_file in raw_files
|
|
92
|
-
]
|
|
93
|
-
df = pd.DataFrame(files_list)
|
|
94
|
-
df.to_csv(f'{ship_name}_{cruise_name}.csv', index=False, header=False, sep=' ')
|
|
95
|
-
print('done')
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
#################################################################
|
|
99
|
-
def get_subset_ek60_prefix(
|
|
100
|
-
self,
|
|
101
|
-
df: pd.DataFrame
|
|
102
|
-
) -> pd.DataFrame:
|
|
103
|
-
# Returns all objects with 'EK60' in prefix of file path
|
|
104
|
-
# Note that this can include 'EK80' data that are false-positives
|
|
105
|
-
# in dataframe with ['key', 'filename', 'ship', 'cruise', 'sensor', 'size', 'date', 'datagram']
|
|
106
|
-
print("getting subset of ek60 data by prefix")
|
|
107
|
-
objects = []
|
|
108
|
-
for row in df.itertuples():
|
|
109
|
-
row_split = row[1].split(os.sep)
|
|
110
|
-
if len(row_split) == 6:
|
|
111
|
-
filename = os.path.basename(row[1]) # 'EX1608_EK60-D20161205-T040300.raw'
|
|
112
|
-
if filename.endswith(".raw"):
|
|
113
|
-
ship_name, cruise_name, sensor_name = row_split[2:5] # 'Okeanos_Explorer', 'EX1608', 'EK60'
|
|
114
|
-
if re.search("[D](\d{8})", filename) is not None and re.search("[T](\d{6})", filename) is not None:
|
|
115
|
-
# Parse date if possible e.g.: 'data/raw/Henry_B._Bigelow/HB1006/EK60/HBB-D20100723-T025105.raw'
|
|
116
|
-
# and 'data/raw/Henry_B._Bigelow/HB1802/EK60/D20180513-T150250.raw'
|
|
117
|
-
date_substring = re.search("[D](\d{8})", filename).group(1)
|
|
118
|
-
time_substring = re.search("[T](\d{6})", filename).group(1)
|
|
119
|
-
date_string = datetime.strptime(f'{date_substring}{time_substring}', '%Y%m%d%H%M%S')
|
|
120
|
-
else: # otherwise use current date
|
|
121
|
-
date_string = f"{datetime.utcnow().isoformat()[:19]}Z"
|
|
122
|
-
objects.append(
|
|
123
|
-
{
|
|
124
|
-
'KEY': row[1],
|
|
125
|
-
'FILENAME': filename,
|
|
126
|
-
'SHIP': ship_name,
|
|
127
|
-
'CRUISE': cruise_name,
|
|
128
|
-
'SENSOR': sensor_name,
|
|
129
|
-
'SIZE': row[2],
|
|
130
|
-
'DATE': date_string,
|
|
131
|
-
'DATAGRAM': None
|
|
132
|
-
}
|
|
133
|
-
)
|
|
134
|
-
return pd.DataFrame(objects)
|
|
135
|
-
|
|
136
|
-
#################################################################
|
|
137
|
-
def scan_datagram(
|
|
138
|
-
self,
|
|
139
|
-
select_key: str
|
|
140
|
-
) -> list:
|
|
141
|
-
# Reads the first 8 bytes of S3 file. Used to determine if ek60 or ek80
|
|
142
|
-
# Note: uses boto3 session instead of boto3 client: https://github.com/boto/boto3/issues/801
|
|
143
|
-
# select_key = 'data/raw/Albatross_Iv/AL0403/EK60/L0005-D20040302-T200108-EK60.raw'
|
|
144
|
-
s3_resource = self.s3_manager.s3_resource
|
|
145
|
-
obj = s3_resource.Object(bucket_name=self.input_bucket_name, key=select_key) # XML0
|
|
146
|
-
first_datagram = obj.get(Range='bytes=3-7')['Body'].read().decode().strip('\x00')
|
|
147
|
-
# return [{'KEY': select_key, 'DATAGRAM': first_datagram}]
|
|
148
|
-
### EK60 data are denoted by 'CON0' ###
|
|
149
|
-
return first_datagram
|
|
150
|
-
|
|
151
|
-
#################################################################
|
|
152
|
-
def get_subset_datagrams(
|
|
153
|
-
self,
|
|
154
|
-
df: pd.DataFrame
|
|
155
|
-
) -> list:
|
|
156
|
-
print("getting subset of datagrams")
|
|
157
|
-
select_keys = list(df[['KEY', 'CRUISE']].drop_duplicates(subset='CRUISE')['KEY'].values)
|
|
158
|
-
all_datagrams = []
|
|
159
|
-
with ThreadPoolExecutor(max_workers=self.max_pool_connections) as executor:
|
|
160
|
-
futures = [executor.submit(self.scan_datagram, select_key) for select_key in select_keys]
|
|
161
|
-
for future in as_completed(futures):
|
|
162
|
-
result = future.result()
|
|
163
|
-
if result:
|
|
164
|
-
all_datagrams.extend(result)
|
|
165
|
-
return all_datagrams
|
|
166
|
-
|
|
167
|
-
#################################################################
|
|
168
|
-
def get_ek60_objects(
|
|
169
|
-
self,
|
|
170
|
-
df: pd.DataFrame,
|
|
171
|
-
subset_datagrams: list
|
|
172
|
-
) -> pd.DataFrame:
|
|
173
|
-
# for each key write datagram value to all other files in same cruise
|
|
174
|
-
for subset_datagram in subset_datagrams:
|
|
175
|
-
if subset_datagram['DATAGRAM'] == 'CON0':
|
|
176
|
-
select_cruise = df.loc[df['KEY'] == subset_datagram['KEY']]['CRUISE'].iloc[0]
|
|
177
|
-
df.loc[df['CRUISE'] == select_cruise, ['DATAGRAM']] = subset_datagram['DATAGRAM']
|
|
178
|
-
return df.loc[df['DATAGRAM'] == 'CON0']
|
|
179
|
-
|
|
180
|
-
#################################################################
|
|
181
|
-
def get_calibration_information( # tested
|
|
182
|
-
self,
|
|
183
|
-
) -> pd.DataFrame:
|
|
184
|
-
# Calibration data generated by data manager currently located here:
|
|
185
|
-
# https://noaa-wcsd-pds-index.s3.amazonaws.com/calibrated_crusies.csv
|
|
186
|
-
# Note: Data are either:
|
|
187
|
-
# [1] Calibrated w/ calibration data
|
|
188
|
-
# [2] Calibrated w/o calibration data
|
|
189
|
-
# [3] uncalibrated
|
|
190
|
-
response = self.s3_manager.get_object(bucket_name=self.calibration_bucket, key_name=self.calibration_key)
|
|
191
|
-
calibration_statuses = pd.read_csv(response.get("Body"))
|
|
192
|
-
calibration_statuses['DATASET_NAME'] = calibration_statuses['DATASET_NAME'].apply(lambda x: x.split('_EK60')[0])
|
|
193
|
-
calibration_statuses['CAL_STATE'] = calibration_statuses['CAL_STATE'].apply(lambda x: x.find('Calibrated') >= 0)
|
|
194
|
-
return calibration_statuses
|
|
195
|
-
|
|
196
|
-
#################################################################
|
|
197
|
-
# def index( # TODO: get rid of this?
|
|
198
|
-
# self
|
|
199
|
-
# ):
|
|
200
|
-
# start_time = datetime.now() # used for benchmarking
|
|
201
|
-
# # Get all object in public dataset bucket
|
|
202
|
-
# all_objects = self.get_all_objects()
|
|
203
|
-
# #
|
|
204
|
-
# subset_ek60_by_prefix = self.get_subset_ek60_prefix(
|
|
205
|
-
# df=all_objects[all_objects['Key'].str.contains('EK60')][['Key', 'Size']]
|
|
206
|
-
# )
|
|
207
|
-
# #
|
|
208
|
-
# subset_datagrams = self.get_subset_datagrams(df=subset_ek60_by_prefix)
|
|
209
|
-
# print("done getting subset of datagrams")
|
|
210
|
-
# ek60_objects = self.get_ek60_objects(subset_ek60_by_prefix, subset_datagrams)
|
|
211
|
-
# print("done getting ek60_objects")
|
|
212
|
-
# print(start_time)
|
|
213
|
-
# calibration_status = self.get_calibration_information(s3)
|
|
214
|
-
# cruise_names = list(set(ek60_objects['CRUISE']))
|
|
215
|
-
# cruise_names.sort()
|
|
216
|
-
# for cruise_name in cruise_names: # ~322 cruises
|
|
217
|
-
# cruise_data = ek60_objects.groupby('CRUISE').get_group(cruise_name)
|
|
218
|
-
# ship = cruise_data['SHIP'].iloc[0]
|
|
219
|
-
# sensor = cruise_data['SENSOR'].iloc[0]
|
|
220
|
-
# datagram = cruise_data['DATAGRAM'].iloc[0]
|
|
221
|
-
# file_count = cruise_data.shape[0]
|
|
222
|
-
# total_size = np.sum(cruise_data['SIZE'])
|
|
223
|
-
# calibrated = cruise_name in calibration_status['DATASET_NAME'].unique() # ~276 entries
|
|
224
|
-
# start_date = np.min(cruise_data['DATE']).isoformat(timespec="seconds") + "Z"
|
|
225
|
-
# end_date = np.max(cruise_data['DATE']).isoformat(timespec="seconds") + "Z"
|
|
226
|
-
# end_time = datetime.now() # used for benchmarking
|
|
227
|
-
# print(start_time)
|
|
228
|
-
# print(end_time)
|
model/model.py
DELETED
|
@@ -1,138 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
import json
|
|
3
|
-
import numpy as np
|
|
4
|
-
from src.model.aws.s3_manager import S3Manager
|
|
5
|
-
from src.model.aws.sns_manager import SNSManager
|
|
6
|
-
from src.model.aws.dynamodb_manager import DynamoDBManager
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
###########################################################
|
|
10
|
-
class Model:
|
|
11
|
-
#######################################################
|
|
12
|
-
def __init__(
|
|
13
|
-
self,
|
|
14
|
-
):
|
|
15
|
-
self.input_bucket_name = os.environ['INPUT_BUCKET_NAME']
|
|
16
|
-
self.output_bucket_name = os.environ['OUTPUT_BUCKET_NAME']
|
|
17
|
-
self.table_name = os.environ['TABLE_NAME']
|
|
18
|
-
self.topic_arn = os.environ['TOPIC_ARN']
|
|
19
|
-
# self.output_bucket_access_key = ?
|
|
20
|
-
# self.output_bucket_secret_access_key = ?
|
|
21
|
-
|
|
22
|
-
def execute(self):
|
|
23
|
-
input_s3_manager = S3Manager() # TODO: Need to allow passing in of credentials when writing to protected bucket
|
|
24
|
-
output_s3_manager = S3Manager()
|
|
25
|
-
# TODO: s3fs?
|
|
26
|
-
sns_manager = SNSManager()
|
|
27
|
-
ddb_manager = DynamoDBManager()
|
|
28
|
-
|
|
29
|
-
# [1 of 5] Update Pipeline Status in DynamoDB
|
|
30
|
-
#self.dynamodb.update_ status ()
|
|
31
|
-
|
|
32
|
-
# [2 of 5] Download Object From Input Bucket
|
|
33
|
-
return_value = input_s3_manager.get(
|
|
34
|
-
bucket_name=self.input_bucket_name,
|
|
35
|
-
key='the_input_key'
|
|
36
|
-
)
|
|
37
|
-
print(return_value)
|
|
38
|
-
|
|
39
|
-
# [3 of 5] Update Entry in DynamoDB
|
|
40
|
-
ship_name = 'David_Starr_Jordan' # TODO: get this from input sns message
|
|
41
|
-
cruise_name = 'DS0604'
|
|
42
|
-
sensor_name = 'EK60'
|
|
43
|
-
file_name = "DSJ0604-D20060406-T113407.raw"
|
|
44
|
-
|
|
45
|
-
test_channels = [
|
|
46
|
-
"GPT 38 kHz 009072055a7f 2 ES38B",
|
|
47
|
-
"GPT 70 kHz 00907203400a 3 ES70-7C",
|
|
48
|
-
"GPT 120 kHz 009072034d52 1 ES120-7",
|
|
49
|
-
"GPT 200 kHz 0090720564e4 4 ES200-7C"
|
|
50
|
-
]
|
|
51
|
-
test_frequencies = [38_000, 70_000, 120_000, 200_000]
|
|
52
|
-
ddb_manager.update_item(
|
|
53
|
-
table_name=self.table_name,
|
|
54
|
-
key={
|
|
55
|
-
'FILE_NAME': {'S': file_name}, # Partition Key
|
|
56
|
-
'CRUISE_NAME': {'S': cruise_name}, # Sort Key
|
|
57
|
-
},
|
|
58
|
-
expression_attribute_names={
|
|
59
|
-
'#CH': 'CHANNELS',
|
|
60
|
-
'#ET': 'END_TIME',
|
|
61
|
-
'#ED': 'ERROR_DETAIL',
|
|
62
|
-
'#FR': 'FREQUENCIES',
|
|
63
|
-
'#MA': 'MAX_ECHO_RANGE',
|
|
64
|
-
'#MI': 'MIN_ECHO_RANGE',
|
|
65
|
-
'#ND': 'NUM_PING_TIME_DROPNA',
|
|
66
|
-
'#PS': 'PIPELINE_STATUS', # testing this updated
|
|
67
|
-
'#PT': 'PIPELINE_TIME', # testing this updated
|
|
68
|
-
'#SE': 'SENSOR_NAME',
|
|
69
|
-
'#SH': 'SHIP_NAME',
|
|
70
|
-
'#ST': 'START_TIME',
|
|
71
|
-
'#ZB': 'ZARR_BUCKET',
|
|
72
|
-
'#ZP': 'ZARR_PATH',
|
|
73
|
-
},
|
|
74
|
-
expression_attribute_values={
|
|
75
|
-
':ch': {'L': [{'S': i} for i in test_channels]},
|
|
76
|
-
':et': {'S': '2006-04-06T13:35:28.688Z'},
|
|
77
|
-
':ed': {'S': ''},
|
|
78
|
-
':fr': {'L': [{'N': str(i)} for i in test_frequencies]},
|
|
79
|
-
':ma': {'N': str(np.round(499.7653, 4))},
|
|
80
|
-
':mi': {'N': str(np.round(0.25, 4))},
|
|
81
|
-
':nd': {'N': str(2458)},
|
|
82
|
-
':ps': {'S': 'SUCCESS_AGGREGATOR'},
|
|
83
|
-
':pt': {'S': '2023-10-02T08:54:43Z'},
|
|
84
|
-
':se': {'S': sensor_name},
|
|
85
|
-
':sh': {'S': ship_name},
|
|
86
|
-
':st': {'S': '2006-04-06T11:34:07.288Z'},
|
|
87
|
-
':zb': {'S': 'r2d2-dev-echofish2-118234403147-echofish-dev-output'},
|
|
88
|
-
':zp': {'S': 'level_1/David_Starr_Jordan/DS0604/EK60/DSJ0604-D20060406-T113407.zarr'},
|
|
89
|
-
},
|
|
90
|
-
update_expression=(
|
|
91
|
-
'SET '
|
|
92
|
-
'#CH = :ch, '
|
|
93
|
-
'#ET = :et, '
|
|
94
|
-
'#ED = :ed, '
|
|
95
|
-
'#FR = :fr, '
|
|
96
|
-
'#MA = :ma, '
|
|
97
|
-
'#MI = :mi, '
|
|
98
|
-
'#ND = :nd, '
|
|
99
|
-
'#PS = :ps, '
|
|
100
|
-
'#PT = :pt, '
|
|
101
|
-
'#SE = :se, '
|
|
102
|
-
'#SH = :sh, '
|
|
103
|
-
'#ST = :st, '
|
|
104
|
-
'#ZB = :zb, '
|
|
105
|
-
'#ZP = :zp'
|
|
106
|
-
)
|
|
107
|
-
)
|
|
108
|
-
|
|
109
|
-
# [4 of 5] Write Object to Output Bucket
|
|
110
|
-
output_s3_manager.put(
|
|
111
|
-
bucket_name=self.output_bucket_name,
|
|
112
|
-
key='123',
|
|
113
|
-
body='456'
|
|
114
|
-
)
|
|
115
|
-
|
|
116
|
-
# [_ of _] Read file-level Zarr store from bucket, Create GeoJSON, Write to bucket
|
|
117
|
-
# [_ of _] Create empty cruise-level Zarr store
|
|
118
|
-
# [_ of _] Resample and write to cruise-level Zarr Store
|
|
119
|
-
|
|
120
|
-
# [5 of 5] Publish Done Message
|
|
121
|
-
success_message = {
|
|
122
|
-
"default": {
|
|
123
|
-
"shipName": ship_name,
|
|
124
|
-
"cruiseName": cruise_name,
|
|
125
|
-
"sensorName": sensor_name,
|
|
126
|
-
"fileName": file_name,
|
|
127
|
-
}
|
|
128
|
-
}
|
|
129
|
-
sns_manager.publish(
|
|
130
|
-
topic_arn=self.topic_arn,
|
|
131
|
-
message=json.dumps(success_message),
|
|
132
|
-
)
|
|
133
|
-
print("done...")
|
|
134
|
-
|
|
135
|
-
#######################################################
|
|
136
|
-
|
|
137
|
-
###########################################################
|
|
138
|
-
###########################################################
|
model/utility/__init__.py
DELETED
|
File without changes
|
model/utility/constants.py
DELETED
|
@@ -1,56 +0,0 @@
|
|
|
1
|
-
from enum import Enum, Flag, unique
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
@unique
|
|
5
|
-
class Constants(Flag):
|
|
6
|
-
TILE_SIZE = 512
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
class Coordinates(Enum):
|
|
10
|
-
"""
|
|
11
|
-
Should try to specify
|
|
12
|
-
dtype
|
|
13
|
-
units
|
|
14
|
-
long_name — most readable description of variable
|
|
15
|
-
standard_name — name in lowercase and snake_case
|
|
16
|
-
"""
|
|
17
|
-
PROJECT_NAME = 'echofish'
|
|
18
|
-
|
|
19
|
-
DEPTH = 'depth'
|
|
20
|
-
DEPTH_DTYPE = 'float32'
|
|
21
|
-
DEPTH_UNITS = 'm' # TODO: Pint? <https://pint.readthedocs.io/en/stable/>
|
|
22
|
-
DEPTH_LONG_NAME = 'Depth below surface'
|
|
23
|
-
DEPTH_STANDARD_NAME = 'depth'
|
|
24
|
-
|
|
25
|
-
TIME = 'time'
|
|
26
|
-
TIME_DTYPE = 'float64'
|
|
27
|
-
# Note: units and calendar are used downstream by Xarray
|
|
28
|
-
TIME_UNITS = 'seconds since 1970-01-01 00:00:00'
|
|
29
|
-
TIME_LONG_NAME = 'Timestamp of each ping'
|
|
30
|
-
TIME_STANDARD_NAME = 'time'
|
|
31
|
-
TIME_CALENDAR = 'proleptic_gregorian'
|
|
32
|
-
# TODO: create test for reading out timestamps in Xarray
|
|
33
|
-
|
|
34
|
-
FREQUENCY = 'frequency'
|
|
35
|
-
FREQUENCY_DTYPE = 'int'
|
|
36
|
-
FREQUENCY_UNITS = 'Hz'
|
|
37
|
-
FREQUENCY_LONG_NAME = 'Transducer frequency'
|
|
38
|
-
FREQUENCY_STANDARD_NAME = 'sound_frequency'
|
|
39
|
-
|
|
40
|
-
LATITUDE = 'latitude'
|
|
41
|
-
LATITUDE_DTYPE = 'float32'
|
|
42
|
-
LATITUDE_UNITS = 'degrees_north'
|
|
43
|
-
LATITUDE_LONG_NAME = 'Latitude'
|
|
44
|
-
LATITUDE_STANDARD_NAME = 'latitude'
|
|
45
|
-
|
|
46
|
-
LONGITUDE = 'longitude'
|
|
47
|
-
LONGITUDE_DTYPE = 'float32'
|
|
48
|
-
LONGITUDE_UNITS = 'degrees_east'
|
|
49
|
-
LONGITUDE_LONG_NAME = 'Longitude'
|
|
50
|
-
LONGITUDE_STANDARD_NAME = 'longitude'
|
|
51
|
-
|
|
52
|
-
SV = 'Sv'
|
|
53
|
-
SV_DTYPE = 'float32' # TODO: experiment with dtype of int
|
|
54
|
-
SV_UNITS = 'dB'
|
|
55
|
-
SV_LONG_NAME = 'Volume backscattering strength (Sv re 1 m-1)'
|
|
56
|
-
SV_STANDARD_NAME = 'volume_backscattering_strength'
|
model/utility/timestamp.py
DELETED
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
from datetime import datetime
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
###########################################################
|
|
5
|
-
class Timestamp:
|
|
6
|
-
@staticmethod
|
|
7
|
-
def get_timestamp():
|
|
8
|
-
# return timestamp in form:
|
|
9
|
-
# PIPELINE_TIME = '2024-03-29T19:36:52.433Z'
|
|
10
|
-
return f'{datetime.utcnow().isoformat()[:23]}Z'
|
|
11
|
-
|
|
12
|
-
###########################################################
|
model/zarr/__init__.py
DELETED
|
File without changes
|
model/zarr/bar.py
DELETED
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
import requests
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
# class Bar(object):
|
|
5
|
-
#
|
|
6
|
-
# def biz(self):
|
|
7
|
-
# pass
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
# class Bar(object):
|
|
11
|
-
#
|
|
12
|
-
# def sync(self, id, query_first):
|
|
13
|
-
# if query_first:
|
|
14
|
-
# requests.get('/remote/api/{id}'.format(id=id))
|
|
15
|
-
#
|
|
16
|
-
# requests.put(
|
|
17
|
-
# '/remote/other/api/{id}'.format(id=id),
|
|
18
|
-
# data=123 # current_data()
|
|
19
|
-
# )
|
|
20
|
-
|
|
21
|
-
# class Bar(object):
|
|
22
|
-
# def biz(self, url, method, data, headers):
|
|
23
|
-
# pass
|
|
24
|
-
|
|
25
|
-
class Bar(object):
|
|
26
|
-
|
|
27
|
-
def biz(self):
|
|
28
|
-
return 1
|