water-column-sonar-processing 0.0.1__py3-none-any.whl → 26.1.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of water-column-sonar-processing might be problematic. Click here for more details.

Files changed (60) hide show
  1. water_column_sonar_processing/__init__.py +13 -0
  2. water_column_sonar_processing/aws/__init__.py +7 -0
  3. water_column_sonar_processing/aws/dynamodb_manager.py +355 -0
  4. water_column_sonar_processing/aws/s3_manager.py +418 -0
  5. water_column_sonar_processing/aws/s3fs_manager.py +64 -0
  6. {model → water_column_sonar_processing}/aws/sns_manager.py +10 -21
  7. {model → water_column_sonar_processing}/aws/sqs_manager.py +11 -19
  8. water_column_sonar_processing/cruise/__init__.py +4 -0
  9. water_column_sonar_processing/cruise/create_empty_zarr_store.py +129 -0
  10. water_column_sonar_processing/cruise/datatree_manager.py +21 -0
  11. water_column_sonar_processing/cruise/resample_regrid.py +323 -0
  12. water_column_sonar_processing/geometry/__init__.py +13 -0
  13. water_column_sonar_processing/geometry/elevation_manager.py +111 -0
  14. water_column_sonar_processing/geometry/geometry_manager.py +241 -0
  15. water_column_sonar_processing/geometry/line_simplification.py +176 -0
  16. water_column_sonar_processing/geometry/pmtile_generation.py +266 -0
  17. water_column_sonar_processing/geometry/spatiotemporal.py +106 -0
  18. water_column_sonar_processing/index/__init__.py +3 -0
  19. water_column_sonar_processing/index/index_manager.py +381 -0
  20. water_column_sonar_processing/model/__init__.py +3 -0
  21. water_column_sonar_processing/model/zarr_manager.py +741 -0
  22. water_column_sonar_processing/processing/__init__.py +4 -0
  23. water_column_sonar_processing/processing/raw_to_netcdf.py +320 -0
  24. water_column_sonar_processing/processing/raw_to_zarr.py +331 -0
  25. water_column_sonar_processing/utility/__init__.py +13 -0
  26. {model → water_column_sonar_processing}/utility/cleaner.py +7 -7
  27. water_column_sonar_processing/utility/constants.py +118 -0
  28. {model → water_column_sonar_processing}/utility/pipeline_status.py +47 -24
  29. water_column_sonar_processing/utility/timestamp.py +12 -0
  30. water_column_sonar_processing-26.1.14.dist-info/METADATA +240 -0
  31. water_column_sonar_processing-26.1.14.dist-info/RECORD +34 -0
  32. {water_column_sonar_processing-0.0.1.dist-info → water_column_sonar_processing-26.1.14.dist-info}/WHEEL +1 -1
  33. {water_column_sonar_processing-0.0.1.dist-info → water_column_sonar_processing-26.1.14.dist-info/licenses}/LICENSE +1 -1
  34. water_column_sonar_processing-26.1.14.dist-info/top_level.txt +1 -0
  35. __init__.py +0 -0
  36. model/__init__.py +0 -0
  37. model/aws/__init__.py +0 -0
  38. model/aws/dynamodb_manager.py +0 -149
  39. model/aws/s3_manager.py +0 -356
  40. model/aws/s3fs_manager.py +0 -74
  41. model/cruise/__init__.py +0 -0
  42. model/cruise/create_empty_zarr_store.py +0 -166
  43. model/cruise/resample_regrid.py +0 -248
  44. model/geospatial/__init__.py +0 -0
  45. model/geospatial/geometry_manager.py +0 -194
  46. model/geospatial/geometry_simplification.py +0 -81
  47. model/geospatial/pmtile_generation.py +0 -74
  48. model/index/__init__.py +0 -0
  49. model/index/index.py +0 -228
  50. model/model.py +0 -138
  51. model/utility/__init__.py +0 -0
  52. model/utility/constants.py +0 -56
  53. model/utility/timestamp.py +0 -12
  54. model/zarr/__init__.py +0 -0
  55. model/zarr/bar.py +0 -28
  56. model/zarr/foo.py +0 -11
  57. model/zarr/zarr_manager.py +0 -298
  58. water_column_sonar_processing-0.0.1.dist-info/METADATA +0 -89
  59. water_column_sonar_processing-0.0.1.dist-info/RECORD +0 -32
  60. water_column_sonar_processing-0.0.1.dist-info/top_level.txt +0 -2
@@ -1,81 +0,0 @@
1
- # import json
2
-
3
-
4
- # lambda for timestamp in form "yyyy-MM-ddTHH:mm:ssZ"
5
- # dt = lambda: datetime.now().isoformat(timespec="seconds") + "Z"
6
-
7
- # https://shapely.readthedocs.io/en/stable/reference/shapely.MultiLineString.html#shapely.MultiLineString
8
- """
9
- // [Decimal / Places / Degrees / Object that can be recognized at scale / N/S or E/W at equator, E/W at 23N/S, E/W at 45N/S, E/W at 67N/S]
10
- // 0 1.0 1° 00′ 0″ country or large region 111.32 km 102.47 km 78.71 km 43.496 km
11
- // 1 0.1 0° 06′ 0″ large city or district 11.132 km 10.247 km 7.871 km 4.3496 km
12
- // 2 0.01 0° 00′ 36″ town or village 1.1132 km 1.0247 km 787.1 m 434.96 m
13
- // 3 0.001 0° 00′ 3.6″ neighborhood, street 111.32 m 102.47 m 78.71 m 43.496 m
14
- // 4 0.0001 0° 00′ 0.36″ individual street, land parcel 11.132 m 10.247 m 7.871 m 4.3496 m
15
- // 5 0.00001 0° 00′ 0.036″ individual trees, door entrance 1.1132 m 1.0247 m 787.1 mm 434.96 mm
16
- // 6 0.000001 0° 00′ 0.0036″ individual humans 111.32 mm 102.47 mm 78.71 mm 43.496 mm
17
- // 7 0.0000001 0° 00′ 0.00036″ practical limit of commercial surveying 11.132 mm 10.247 mm 7.871 mm 4.3496 mm
18
- """
19
-
20
- """
21
- private static final int SRID = 8307;
22
- private static final double simplificationTolerance = 0.0001;
23
- private static final long splitGeometryMs = 900000L;
24
- private static final int batchSize = 10000;
25
- private static final int geoJsonPrecision = 5;
26
- final int geoJsonPrecision = 5;
27
- final double simplificationTolerance = 0.0001;
28
- final int simplifierBatchSize = 3000;
29
- final long maxCount = 0;
30
- private static final double maxAllowedSpeedKnts = 60D;
31
-
32
-
33
- """
34
- class GeometrySimplification(object):
35
- # TODO: in the future move to standalone library
36
- #######################################################
37
- def __init__(
38
- self,
39
- ):
40
- pass
41
-
42
- #######################################################
43
- def speed_check(
44
- self,
45
- speed_knots=50,
46
- ) -> None:
47
- print(speed_knots)
48
- pass
49
-
50
- def remove_null_island_values(
51
- self,
52
- epsilon=1e-5,
53
- ) -> None:
54
- print(epsilon)
55
- pass
56
-
57
- def stream_geometry(
58
- self,
59
- ) -> None:
60
- pass
61
-
62
- def break_linestring_into_multi_linestring(
63
- self,
64
- ) -> None:
65
- # For any line-strings across the antimeridian, break into multilinestring
66
- pass
67
-
68
- def simplify(
69
- self,
70
- ) -> None:
71
- pass
72
-
73
- def kalman_filter(
74
- self
75
- ):
76
- # for cruises with bad signal, filter so that
77
- pass
78
-
79
- #######################################################
80
-
81
- ###########################################################
@@ -1,74 +0,0 @@
1
- # import json
2
- # This is a sample Python script.
3
- import pandas as pd
4
- # import numpy as np
5
- import os
6
- # from glob import glob
7
- from pathlib import Path
8
- import geopandas
9
- # import shapely
10
- from shapely.geometry import LineString
11
- # from shapely import wkt
12
- # import json
13
- # from shapely.geometry import shape, GeometryCollection
14
- import fiona
15
-
16
-
17
- class PMTileGeneration(object):
18
- #######################################################
19
- def __init__(
20
- self,
21
- ):
22
- pass
23
-
24
- #######################################################
25
- def generate_geojson_feature_collection(self):
26
- # This was used to read from noaa-wcsd-zarr-pds bucket geojson files and then to
27
- # generate the geopandas dataframe which could be exported to another comprehensive
28
- # geojson file. That
29
- result = list(Path("/Users/r2d2/Documents/echofish/geojson").rglob("*.json"))
30
- # result = result[:100]
31
- iii = 0
32
- pieces = []
33
- for iii in range(len(result)):
34
- file_name = os.path.normpath(result[iii]).split(os.sep)[-1]
35
- file_stem = os.path.splitext(os.path.basename(file_name))[0]
36
- geom = geopandas.read_file(result[iii]).iloc[0]['geometry']
37
- # TDOO: Filter (0,0) coordinates
38
- if len(geom.coords.xy[0]) < 2:
39
- continue
40
- geom = LineString(list(zip(geom.coords.xy[1], geom.coords.xy[0])))
41
- pieces.append({
42
- 'ship_name': os.path.normpath(result[iii]).split(os.sep)[-4],
43
- 'cruise_name': os.path.normpath(result[iii]).split(os.sep)[-3],
44
- 'file_stem': file_stem,
45
- 'file_path': result[iii],
46
- 'geom': geom,
47
- })
48
- df = pd.DataFrame(pieces)
49
- print(df)
50
- gps_gdf = geopandas.GeoDataFrame(
51
- data=df[['ship_name', 'cruise_name', 'file_stem']], # try again with file_stem
52
- geometry=df['geom'],
53
- crs='EPSG:4326'
54
- )
55
- print(fiona.supported_drivers)
56
- # gps_gdf.to_file('dataframe.shp', crs='epsg:4326')
57
- # Convert geojson feature collection to pmtiles
58
- gps_gdf.to_file('dataframe.geojson', driver='GeoJSON', crs='epsg:4326')
59
- print('done')
60
- """
61
- # need to eliminate visits to null island
62
- tippecanoe --no-feature-limit -zg --projection=EPSG:4326 -o dataframe.pmtiles -l cruises dataframe.geojson
63
-
64
- https://docs.protomaps.com/pmtiles/create
65
- PMTiles
66
- https://drive.google.com/file/d/17Bi-UIXB9IJkIz30BHpiKHXYpCOgRFge/view?usp=sharing
67
-
68
- Viewer
69
- https://protomaps.github.io/PMTiles/#map=8.91/56.0234/-166.6346
70
- """
71
-
72
- #######################################################
73
-
74
- ###########################################################
model/index/__init__.py DELETED
File without changes
model/index/index.py DELETED
@@ -1,228 +0,0 @@
1
- import os
2
- import re
3
- import pandas as pd
4
- from datetime import datetime
5
- from concurrent.futures import ThreadPoolExecutor
6
- from concurrent.futures import as_completed
7
- from ..aws.s3_manager import S3Manager
8
-
9
-
10
- class IndexManager:
11
-
12
- def __init__(
13
- self,
14
- input_bucket_name,
15
- calibration_bucket,
16
- calibration_key
17
- ):
18
- self.input_bucket_name = input_bucket_name
19
- self.calibration_bucket = calibration_bucket
20
- self.calibration_key = calibration_key
21
- self.s3_manager = S3Manager()
22
-
23
- #################################################################
24
- def list_ships(
25
- self,
26
- prefix='data/raw/',
27
- ):
28
- # s3_client = self.s3_manager.s3_client
29
- page_iterator = self.s3_manager.paginator.paginate(Bucket=self.input_bucket_name, Prefix=prefix, Delimiter="/")
30
- # common_prefixes = s3_client.list_objects(Bucket=self.input_bucket_name, Prefix=prefix, Delimiter='/')
31
- # print(common_prefixes)
32
- ships = []
33
- for page in page_iterator:
34
- if 'Contents' in page.keys():
35
- ships.extend([k['Prefix'] for k in page['CommonPrefixes']])
36
- return ships # ~76 ships
37
-
38
- #################################################################
39
- def list_cruises(
40
- self,
41
- ship_prefixes, # e.g. 'data/raw/Alaska_Knight/'
42
- ):
43
- cruises = []
44
- for ship_prefix in ship_prefixes:
45
- page_iterator = self.s3_manager.paginator.paginate(Bucket=self.input_bucket_name, Prefix=ship_prefix, Delimiter="/")
46
- for page in page_iterator:
47
- cruises.extend([k['Prefix'] for k in page['CommonPrefixes']])
48
- return cruises # ~1204 cruises
49
-
50
- #################################################################
51
- def list_ek60_cruises(
52
- self,
53
- cruise_prefixes,
54
- ):
55
- cruise_sensors = [] # includes all sensor types
56
- for cruise_prefix in cruise_prefixes:
57
- page_iterator = self.s3_manager.paginator.paginate(Bucket=self.input_bucket_name, Prefix=cruise_prefix, Delimiter="/")
58
- for page in page_iterator:
59
- cruise_sensors.extend([k['Prefix'] for k in page['CommonPrefixes']])
60
- # Note: these are "EK60" by prefix. They still need to be verified by scanning the datagram.
61
- return [i for i in cruise_sensors if '/EK60/' in i] # ~447 different cruises
62
-
63
- #################################################################
64
- def get_raw_files(
65
- self,
66
- ship_name,
67
- cruise_name,
68
- sensor_name,
69
- ):
70
- prefix = f"data/raw/{ship_name}/{cruise_name}/{sensor_name}/" # Note no forward slash at beginning
71
- page_iterator = self.s3_manager.paginator.paginate(Bucket=self.input_bucket_name, Prefix=prefix, Delimiter="/")
72
- all_files = []
73
- for page in page_iterator:
74
- if 'Contents' in page.keys():
75
- all_files.extend([i['Key'] for i in page['Contents']])
76
- return [i for i in all_files if i.endswith('.raw')]
77
-
78
- def get_raw_files_csv(
79
- self,
80
- ship_name,
81
- cruise_name,
82
- sensor_name,
83
- ):
84
- raw_files = self.get_raw_files(ship_name=ship_name, cruise_name=cruise_name, sensor_name=sensor_name)
85
- files_list = [
86
- {
87
- 'ship_name': ship_name,
88
- 'cruise_name': cruise_name,
89
- 'sensor_name': sensor_name,
90
- 'file_name': os.path.basename(raw_file)
91
- } for raw_file in raw_files
92
- ]
93
- df = pd.DataFrame(files_list)
94
- df.to_csv(f'{ship_name}_{cruise_name}.csv', index=False, header=False, sep=' ')
95
- print('done')
96
-
97
-
98
- #################################################################
99
- def get_subset_ek60_prefix(
100
- self,
101
- df: pd.DataFrame
102
- ) -> pd.DataFrame:
103
- # Returns all objects with 'EK60' in prefix of file path
104
- # Note that this can include 'EK80' data that are false-positives
105
- # in dataframe with ['key', 'filename', 'ship', 'cruise', 'sensor', 'size', 'date', 'datagram']
106
- print("getting subset of ek60 data by prefix")
107
- objects = []
108
- for row in df.itertuples():
109
- row_split = row[1].split(os.sep)
110
- if len(row_split) == 6:
111
- filename = os.path.basename(row[1]) # 'EX1608_EK60-D20161205-T040300.raw'
112
- if filename.endswith(".raw"):
113
- ship_name, cruise_name, sensor_name = row_split[2:5] # 'Okeanos_Explorer', 'EX1608', 'EK60'
114
- if re.search("[D](\d{8})", filename) is not None and re.search("[T](\d{6})", filename) is not None:
115
- # Parse date if possible e.g.: 'data/raw/Henry_B._Bigelow/HB1006/EK60/HBB-D20100723-T025105.raw'
116
- # and 'data/raw/Henry_B._Bigelow/HB1802/EK60/D20180513-T150250.raw'
117
- date_substring = re.search("[D](\d{8})", filename).group(1)
118
- time_substring = re.search("[T](\d{6})", filename).group(1)
119
- date_string = datetime.strptime(f'{date_substring}{time_substring}', '%Y%m%d%H%M%S')
120
- else: # otherwise use current date
121
- date_string = f"{datetime.utcnow().isoformat()[:19]}Z"
122
- objects.append(
123
- {
124
- 'KEY': row[1],
125
- 'FILENAME': filename,
126
- 'SHIP': ship_name,
127
- 'CRUISE': cruise_name,
128
- 'SENSOR': sensor_name,
129
- 'SIZE': row[2],
130
- 'DATE': date_string,
131
- 'DATAGRAM': None
132
- }
133
- )
134
- return pd.DataFrame(objects)
135
-
136
- #################################################################
137
- def scan_datagram(
138
- self,
139
- select_key: str
140
- ) -> list:
141
- # Reads the first 8 bytes of S3 file. Used to determine if ek60 or ek80
142
- # Note: uses boto3 session instead of boto3 client: https://github.com/boto/boto3/issues/801
143
- # select_key = 'data/raw/Albatross_Iv/AL0403/EK60/L0005-D20040302-T200108-EK60.raw'
144
- s3_resource = self.s3_manager.s3_resource
145
- obj = s3_resource.Object(bucket_name=self.input_bucket_name, key=select_key) # XML0
146
- first_datagram = obj.get(Range='bytes=3-7')['Body'].read().decode().strip('\x00')
147
- # return [{'KEY': select_key, 'DATAGRAM': first_datagram}]
148
- ### EK60 data are denoted by 'CON0' ###
149
- return first_datagram
150
-
151
- #################################################################
152
- def get_subset_datagrams(
153
- self,
154
- df: pd.DataFrame
155
- ) -> list:
156
- print("getting subset of datagrams")
157
- select_keys = list(df[['KEY', 'CRUISE']].drop_duplicates(subset='CRUISE')['KEY'].values)
158
- all_datagrams = []
159
- with ThreadPoolExecutor(max_workers=self.max_pool_connections) as executor:
160
- futures = [executor.submit(self.scan_datagram, select_key) for select_key in select_keys]
161
- for future in as_completed(futures):
162
- result = future.result()
163
- if result:
164
- all_datagrams.extend(result)
165
- return all_datagrams
166
-
167
- #################################################################
168
- def get_ek60_objects(
169
- self,
170
- df: pd.DataFrame,
171
- subset_datagrams: list
172
- ) -> pd.DataFrame:
173
- # for each key write datagram value to all other files in same cruise
174
- for subset_datagram in subset_datagrams:
175
- if subset_datagram['DATAGRAM'] == 'CON0':
176
- select_cruise = df.loc[df['KEY'] == subset_datagram['KEY']]['CRUISE'].iloc[0]
177
- df.loc[df['CRUISE'] == select_cruise, ['DATAGRAM']] = subset_datagram['DATAGRAM']
178
- return df.loc[df['DATAGRAM'] == 'CON0']
179
-
180
- #################################################################
181
- def get_calibration_information( # tested
182
- self,
183
- ) -> pd.DataFrame:
184
- # Calibration data generated by data manager currently located here:
185
- # https://noaa-wcsd-pds-index.s3.amazonaws.com/calibrated_crusies.csv
186
- # Note: Data are either:
187
- # [1] Calibrated w/ calibration data
188
- # [2] Calibrated w/o calibration data
189
- # [3] uncalibrated
190
- response = self.s3_manager.get_object(bucket_name=self.calibration_bucket, key_name=self.calibration_key)
191
- calibration_statuses = pd.read_csv(response.get("Body"))
192
- calibration_statuses['DATASET_NAME'] = calibration_statuses['DATASET_NAME'].apply(lambda x: x.split('_EK60')[0])
193
- calibration_statuses['CAL_STATE'] = calibration_statuses['CAL_STATE'].apply(lambda x: x.find('Calibrated') >= 0)
194
- return calibration_statuses
195
-
196
- #################################################################
197
- # def index( # TODO: get rid of this?
198
- # self
199
- # ):
200
- # start_time = datetime.now() # used for benchmarking
201
- # # Get all object in public dataset bucket
202
- # all_objects = self.get_all_objects()
203
- # #
204
- # subset_ek60_by_prefix = self.get_subset_ek60_prefix(
205
- # df=all_objects[all_objects['Key'].str.contains('EK60')][['Key', 'Size']]
206
- # )
207
- # #
208
- # subset_datagrams = self.get_subset_datagrams(df=subset_ek60_by_prefix)
209
- # print("done getting subset of datagrams")
210
- # ek60_objects = self.get_ek60_objects(subset_ek60_by_prefix, subset_datagrams)
211
- # print("done getting ek60_objects")
212
- # print(start_time)
213
- # calibration_status = self.get_calibration_information(s3)
214
- # cruise_names = list(set(ek60_objects['CRUISE']))
215
- # cruise_names.sort()
216
- # for cruise_name in cruise_names: # ~322 cruises
217
- # cruise_data = ek60_objects.groupby('CRUISE').get_group(cruise_name)
218
- # ship = cruise_data['SHIP'].iloc[0]
219
- # sensor = cruise_data['SENSOR'].iloc[0]
220
- # datagram = cruise_data['DATAGRAM'].iloc[0]
221
- # file_count = cruise_data.shape[0]
222
- # total_size = np.sum(cruise_data['SIZE'])
223
- # calibrated = cruise_name in calibration_status['DATASET_NAME'].unique() # ~276 entries
224
- # start_date = np.min(cruise_data['DATE']).isoformat(timespec="seconds") + "Z"
225
- # end_date = np.max(cruise_data['DATE']).isoformat(timespec="seconds") + "Z"
226
- # end_time = datetime.now() # used for benchmarking
227
- # print(start_time)
228
- # print(end_time)
model/model.py DELETED
@@ -1,138 +0,0 @@
1
- import os
2
- import json
3
- import numpy as np
4
- from src.model.aws.s3_manager import S3Manager
5
- from src.model.aws.sns_manager import SNSManager
6
- from src.model.aws.dynamodb_manager import DynamoDBManager
7
-
8
-
9
- ###########################################################
10
- class Model:
11
- #######################################################
12
- def __init__(
13
- self,
14
- ):
15
- self.input_bucket_name = os.environ['INPUT_BUCKET_NAME']
16
- self.output_bucket_name = os.environ['OUTPUT_BUCKET_NAME']
17
- self.table_name = os.environ['TABLE_NAME']
18
- self.topic_arn = os.environ['TOPIC_ARN']
19
- # self.output_bucket_access_key = ?
20
- # self.output_bucket_secret_access_key = ?
21
-
22
- def execute(self):
23
- input_s3_manager = S3Manager() # TODO: Need to allow passing in of credentials when writing to protected bucket
24
- output_s3_manager = S3Manager()
25
- # TODO: s3fs?
26
- sns_manager = SNSManager()
27
- ddb_manager = DynamoDBManager()
28
-
29
- # [1 of 5] Update Pipeline Status in DynamoDB
30
- #self.dynamodb.update_ status ()
31
-
32
- # [2 of 5] Download Object From Input Bucket
33
- return_value = input_s3_manager.get(
34
- bucket_name=self.input_bucket_name,
35
- key='the_input_key'
36
- )
37
- print(return_value)
38
-
39
- # [3 of 5] Update Entry in DynamoDB
40
- ship_name = 'David_Starr_Jordan' # TODO: get this from input sns message
41
- cruise_name = 'DS0604'
42
- sensor_name = 'EK60'
43
- file_name = "DSJ0604-D20060406-T113407.raw"
44
-
45
- test_channels = [
46
- "GPT 38 kHz 009072055a7f 2 ES38B",
47
- "GPT 70 kHz 00907203400a 3 ES70-7C",
48
- "GPT 120 kHz 009072034d52 1 ES120-7",
49
- "GPT 200 kHz 0090720564e4 4 ES200-7C"
50
- ]
51
- test_frequencies = [38_000, 70_000, 120_000, 200_000]
52
- ddb_manager.update_item(
53
- table_name=self.table_name,
54
- key={
55
- 'FILE_NAME': {'S': file_name}, # Partition Key
56
- 'CRUISE_NAME': {'S': cruise_name}, # Sort Key
57
- },
58
- expression_attribute_names={
59
- '#CH': 'CHANNELS',
60
- '#ET': 'END_TIME',
61
- '#ED': 'ERROR_DETAIL',
62
- '#FR': 'FREQUENCIES',
63
- '#MA': 'MAX_ECHO_RANGE',
64
- '#MI': 'MIN_ECHO_RANGE',
65
- '#ND': 'NUM_PING_TIME_DROPNA',
66
- '#PS': 'PIPELINE_STATUS', # testing this updated
67
- '#PT': 'PIPELINE_TIME', # testing this updated
68
- '#SE': 'SENSOR_NAME',
69
- '#SH': 'SHIP_NAME',
70
- '#ST': 'START_TIME',
71
- '#ZB': 'ZARR_BUCKET',
72
- '#ZP': 'ZARR_PATH',
73
- },
74
- expression_attribute_values={
75
- ':ch': {'L': [{'S': i} for i in test_channels]},
76
- ':et': {'S': '2006-04-06T13:35:28.688Z'},
77
- ':ed': {'S': ''},
78
- ':fr': {'L': [{'N': str(i)} for i in test_frequencies]},
79
- ':ma': {'N': str(np.round(499.7653, 4))},
80
- ':mi': {'N': str(np.round(0.25, 4))},
81
- ':nd': {'N': str(2458)},
82
- ':ps': {'S': 'SUCCESS_AGGREGATOR'},
83
- ':pt': {'S': '2023-10-02T08:54:43Z'},
84
- ':se': {'S': sensor_name},
85
- ':sh': {'S': ship_name},
86
- ':st': {'S': '2006-04-06T11:34:07.288Z'},
87
- ':zb': {'S': 'r2d2-dev-echofish2-118234403147-echofish-dev-output'},
88
- ':zp': {'S': 'level_1/David_Starr_Jordan/DS0604/EK60/DSJ0604-D20060406-T113407.zarr'},
89
- },
90
- update_expression=(
91
- 'SET '
92
- '#CH = :ch, '
93
- '#ET = :et, '
94
- '#ED = :ed, '
95
- '#FR = :fr, '
96
- '#MA = :ma, '
97
- '#MI = :mi, '
98
- '#ND = :nd, '
99
- '#PS = :ps, '
100
- '#PT = :pt, '
101
- '#SE = :se, '
102
- '#SH = :sh, '
103
- '#ST = :st, '
104
- '#ZB = :zb, '
105
- '#ZP = :zp'
106
- )
107
- )
108
-
109
- # [4 of 5] Write Object to Output Bucket
110
- output_s3_manager.put(
111
- bucket_name=self.output_bucket_name,
112
- key='123',
113
- body='456'
114
- )
115
-
116
- # [_ of _] Read file-level Zarr store from bucket, Create GeoJSON, Write to bucket
117
- # [_ of _] Create empty cruise-level Zarr store
118
- # [_ of _] Resample and write to cruise-level Zarr Store
119
-
120
- # [5 of 5] Publish Done Message
121
- success_message = {
122
- "default": {
123
- "shipName": ship_name,
124
- "cruiseName": cruise_name,
125
- "sensorName": sensor_name,
126
- "fileName": file_name,
127
- }
128
- }
129
- sns_manager.publish(
130
- topic_arn=self.topic_arn,
131
- message=json.dumps(success_message),
132
- )
133
- print("done...")
134
-
135
- #######################################################
136
-
137
- ###########################################################
138
- ###########################################################
model/utility/__init__.py DELETED
File without changes
@@ -1,56 +0,0 @@
1
- from enum import Enum, Flag, unique
2
-
3
-
4
- @unique
5
- class Constants(Flag):
6
- TILE_SIZE = 512
7
-
8
-
9
- class Coordinates(Enum):
10
- """
11
- Should try to specify
12
- dtype
13
- units
14
- long_name — most readable description of variable
15
- standard_name — name in lowercase and snake_case
16
- """
17
- PROJECT_NAME = 'echofish'
18
-
19
- DEPTH = 'depth'
20
- DEPTH_DTYPE = 'float32'
21
- DEPTH_UNITS = 'm' # TODO: Pint? <https://pint.readthedocs.io/en/stable/>
22
- DEPTH_LONG_NAME = 'Depth below surface'
23
- DEPTH_STANDARD_NAME = 'depth'
24
-
25
- TIME = 'time'
26
- TIME_DTYPE = 'float64'
27
- # Note: units and calendar are used downstream by Xarray
28
- TIME_UNITS = 'seconds since 1970-01-01 00:00:00'
29
- TIME_LONG_NAME = 'Timestamp of each ping'
30
- TIME_STANDARD_NAME = 'time'
31
- TIME_CALENDAR = 'proleptic_gregorian'
32
- # TODO: create test for reading out timestamps in Xarray
33
-
34
- FREQUENCY = 'frequency'
35
- FREQUENCY_DTYPE = 'int'
36
- FREQUENCY_UNITS = 'Hz'
37
- FREQUENCY_LONG_NAME = 'Transducer frequency'
38
- FREQUENCY_STANDARD_NAME = 'sound_frequency'
39
-
40
- LATITUDE = 'latitude'
41
- LATITUDE_DTYPE = 'float32'
42
- LATITUDE_UNITS = 'degrees_north'
43
- LATITUDE_LONG_NAME = 'Latitude'
44
- LATITUDE_STANDARD_NAME = 'latitude'
45
-
46
- LONGITUDE = 'longitude'
47
- LONGITUDE_DTYPE = 'float32'
48
- LONGITUDE_UNITS = 'degrees_east'
49
- LONGITUDE_LONG_NAME = 'Longitude'
50
- LONGITUDE_STANDARD_NAME = 'longitude'
51
-
52
- SV = 'Sv'
53
- SV_DTYPE = 'float32' # TODO: experiment with dtype of int
54
- SV_UNITS = 'dB'
55
- SV_LONG_NAME = 'Volume backscattering strength (Sv re 1 m-1)'
56
- SV_STANDARD_NAME = 'volume_backscattering_strength'
@@ -1,12 +0,0 @@
1
- from datetime import datetime
2
-
3
-
4
- ###########################################################
5
- class Timestamp:
6
- @staticmethod
7
- def get_timestamp():
8
- # return timestamp in form:
9
- # PIPELINE_TIME = '2024-03-29T19:36:52.433Z'
10
- return f'{datetime.utcnow().isoformat()[:23]}Z'
11
-
12
- ###########################################################
model/zarr/__init__.py DELETED
File without changes
model/zarr/bar.py DELETED
@@ -1,28 +0,0 @@
1
- import requests
2
-
3
-
4
- # class Bar(object):
5
- #
6
- # def biz(self):
7
- # pass
8
-
9
-
10
- # class Bar(object):
11
- #
12
- # def sync(self, id, query_first):
13
- # if query_first:
14
- # requests.get('/remote/api/{id}'.format(id=id))
15
- #
16
- # requests.put(
17
- # '/remote/other/api/{id}'.format(id=id),
18
- # data=123 # current_data()
19
- # )
20
-
21
- # class Bar(object):
22
- # def biz(self, url, method, data, headers):
23
- # pass
24
-
25
- class Bar(object):
26
-
27
- def biz(self):
28
- return 1
model/zarr/foo.py DELETED
@@ -1,11 +0,0 @@
1
- from src.model.zarr.bar import Bar
2
-
3
-
4
- # def foo():
5
- # Bar().biz()
6
-
7
- # def foo(url, method='GET', data=None, headers=None):
8
- # Bar().biz(url, method, data=data, headers=headers)
9
-
10
- def foo():
11
- return Bar().biz()