water-column-sonar-annotation 26.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. tests/__init__.py +0 -0
  2. tests/astronomical/__init__.py +0 -0
  3. tests/astronomical/test_astronomical_manager.py +148 -0
  4. tests/conftest.py +60 -0
  5. tests/cruise/__init__.py +0 -0
  6. tests/cruise/test_cruise_manager.py +80 -0
  7. tests/geospatial/__init__.py +0 -0
  8. tests/geospatial/test_geospatial_manager.py +86 -0
  9. tests/record/test_echoview_record_manager.py +160 -0
  10. water_column_sonar_annotation/__init__.py +5 -0
  11. water_column_sonar_annotation/astronomical/__init__.py +5 -0
  12. water_column_sonar_annotation/astronomical/astronomical_manager.py +82 -0
  13. water_column_sonar_annotation/cruise/__init__.py +5 -0
  14. water_column_sonar_annotation/cruise/cruise_manager.py +104 -0
  15. water_column_sonar_annotation/geospatial/__init__.py +5 -0
  16. water_column_sonar_annotation/geospatial/geospatial_manager.py +143 -0
  17. water_column_sonar_annotation/record/__init__.py +9 -0
  18. water_column_sonar_annotation/record/echoview_record_manager.py +426 -0
  19. water_column_sonar_annotation/record/graph_record_manager.py +82 -0
  20. water_column_sonar_annotation/record/parquet_record_manager.py +83 -0
  21. water_column_sonar_annotation/shape/__init__.py +5 -0
  22. water_column_sonar_annotation/shape/shape_manager.py +29 -0
  23. water_column_sonar_annotation-26.1.8.dist-info/METADATA +109 -0
  24. water_column_sonar_annotation-26.1.8.dist-info/RECORD +27 -0
  25. water_column_sonar_annotation-26.1.8.dist-info/WHEEL +5 -0
  26. water_column_sonar_annotation-26.1.8.dist-info/licenses/LICENSE +21 -0
  27. water_column_sonar_annotation-26.1.8.dist-info/top_level.txt +2 -0
@@ -0,0 +1,426 @@
1
+ import hashlib
2
+ import itertools
3
+ from os import listdir
4
+ from os.path import isfile, join
5
+ from pathlib import Path
6
+
7
+ import numpy as np
8
+ import pandas as pd
9
+
10
+ from water_column_sonar_annotation.astronomical import AstronomicalManager
11
+ from water_column_sonar_annotation.cruise import CruiseManager
12
+ from water_column_sonar_annotation.geospatial import GeospatialManager
13
+ from water_column_sonar_annotation.record.graph_record_manager import (
14
+ GraphRecordManager,
15
+ )
16
+
17
+ # from water_column_sonar_annotation.record import EchofishRecordManager
18
+ # from water_column_sonar_annotation.record import GRecordManager
19
+
20
+ """
21
+ Documentation for echoview record files in EVR format:
22
+ https://support.echoview.com/WebHelp/Reference/File_Formats/Export_File_Formats/2D_Region_definition_file_format.htm
23
+ """
24
+
25
+
26
+ def chunks(lst, n):
27
+ """Yield strings from n-sized chunks from lst."""
28
+ for i in range(0, len(lst), n):
29
+ # yield lst[i:i + n]
30
+ yield " ".join(lst[i : i + n])
31
+
32
+
33
+ class EchoviewRecordManager:
34
+ def __init__(
35
+ self,
36
+ ):
37
+ print("__init__ called")
38
+ self.region_creation_type = { # Data formats — The region creation type is one of the following
39
+ "-1": "No type",
40
+ "0": "Created from a selection made using the horizontal band tool horizontal selection tool",
41
+ "1": "Created from a selection made using the parallelogram tool parallelogram tool",
42
+ "2": "Created from a selection made using the polygon tool polygon selection tool",
43
+ "3": "Created from a selection made using the rectangle tool rectangle tool",
44
+ "4": "Created from a selection made using the vertical band tool vertical selection tool",
45
+ "5": "Created as a bottom-relative region or line-relative region",
46
+ "6": "Created or assigned as Marker region.",
47
+ "7": "Created using the Detect Schools command",
48
+ "8": "Invalid or unknown region type",
49
+ "9": "Created as a fish track region",
50
+ }
51
+ self.region_type = {
52
+ "0": "bad (no data)",
53
+ "1": "analysis",
54
+ "2": "marker",
55
+ "3": "fishtracks",
56
+ "4": "bad (empty water)",
57
+ }
58
+ self.evr_region_classifications = [
59
+ "possible_herring",
60
+ "atlantic_herring",
61
+ "fish_school",
62
+ "Unclassified regions", # TODO: per CWB continue to include this
63
+ "krill_schools", # excluding this field because of unknowns
64
+ "AH_School",
65
+ ]
66
+ self.all_records_df = pd.DataFrame() # columns=["filename", "start_time"])
67
+ #
68
+ self.astronomical_manager = AstronomicalManager()
69
+ self.cruise_manager = CruiseManager()
70
+ self.geospatial_manager = GeospatialManager()
71
+
72
+ def __enter__(self):
73
+ print("__enter__ called")
74
+ return self
75
+
76
+ def __exit__(self, *a):
77
+ print("__exit__ called")
78
+
79
+ """
80
+ # evr_region_structure_version = bbox_split[0] # "13" (will be incremented if the region structure changes in future versions)
81
+ # evr_point_count = bbox_split[1] # Number of points in the region
82
+ # evr_region_id = # Unique number for each region. Specify sequential numbers starting at 1 if creating a new file
83
+ # evr_selected = # "0" (always)
84
+ # evr_region_creation_type = # See "Data formats" definition
85
+ # evr_dummy = # Should always be "-1"
86
+ # evr_bounding_rectangle_calculated = # "1" if the next four fields are valid; "0" otherwise
87
+ # evr_left_x_value_of_bounding_rectangle = # Date and time of left boundary of bounding rectangle – ignored when importing into Echoview. See "Point 1" in table below.
88
+ # evr_top_y_value_of_bounding_rectangle = # Upper depth coordinate of bounding rectangle – ignored when importing into Echoview. See "Point 1" in table below.
89
+ # evr_right_x_value_of_bounding_rectangle = # Date and time of right boundary of bounding rectangle – ignored when importing into Echoview. See "Point 1" in table below.
90
+ # evr_bottom_y_value_of_bounding_rectangle = # Lower depth coordinate of bounding rectangle – ignored when importing into Echoview. See "Point 1" in table below.
91
+ # evr_number_of_lines_of_notes = # The number of lines of region notes to follow.
92
+ # evr_region_notes = # Notes associated with the region. Maximum length is 2048 characters. Embedded CR characters are encoded as hexadecimal FF. Embedded LF characters are encoded as hexadecimal FE.
93
+ # evr_number_of_lines_of_detection_settings = # The number of lines of detection settings to follow.
94
+ # evr_region_detection_settings = # The detection settings as defined in the Fish Track Detection Properties dialog box or Detect Schools dialog box.
95
+ # evr_region_classification = # Region classification (string). Default value is "Unclassified regions"
96
+ # evr_points = # Data for first point – See Data formats below. These data are used to bound the region when importing into Echoview
97
+ # evr_region_type = # "0" = bad (no data); "1" = analysis; "2" = marker, "3" = fishtracks; "4" = bad (empty water);
98
+ # evr_region_name = # String
99
+ """
100
+
101
+ # TODO:
102
+ # [2] write df to parquet and tag as github resource
103
+
104
+ @staticmethod
105
+ def process_datetime_string(
106
+ date_string: str,
107
+ time_string: str,
108
+ ):
109
+ """Returns time in UTC from strings '20190925' and '2053458953'"""
110
+ # np.datetime64()
111
+ return pd.to_datetime(f"{date_string} {time_string}", format="%Y%m%d %H%M%S%f")
112
+
113
+ def process_vertice(
114
+ self,
115
+ date_string: str,
116
+ time_string: str,
117
+ depth: float,
118
+ ) -> tuple:
119
+ dt = self.process_datetime_string(date_string, time_string)
120
+ # print(dt.value) # is epoch time in nanoseconds
121
+ return dt, dt.value, np.round(depth, 2)
122
+
123
+ def process_evr_record(
124
+ self,
125
+ evr_record: str,
126
+ filename: str,
127
+ ):
128
+ try:
129
+ #########################################################
130
+ record_lines = [x for x in evr_record.split("\n") if x]
131
+ ############# get bbox #############
132
+ bbox_split = record_lines[0].split() # [x for x in record.split() if x]
133
+ #########################################################
134
+ # https://support.echoview.com/WebHelp/Reference/File_Formats/Export_File_Formats/2D_Region_definition_file_format.htm
135
+ #########################################################
136
+ evr_region_structure_version = bbox_split[0]
137
+ if evr_region_structure_version != "13":
138
+ raise Exception("EVR Region Structure Version must be 13")
139
+ #
140
+ evr_point_count = int(bbox_split[1])
141
+ print(f"EVR Point Count: {evr_point_count}")
142
+ #
143
+ evr_region_id = int(bbox_split[2])
144
+ print(f"EVR Region: {evr_region_id}")
145
+ #
146
+ evr_selected = bbox_split[3]
147
+ if evr_selected != "0":
148
+ raise Exception("EVR Selected must be 13")
149
+ #
150
+ evr_region_creation_type = bbox_split[4] # See "Data formats" definition
151
+ print(
152
+ f"EVR region creation type: {self.region_creation_type[evr_region_creation_type]}"
153
+ )
154
+ #
155
+ evr_dummy = bbox_split[5] # Should always be "-1"
156
+ if evr_dummy != "-1":
157
+ raise Exception("EVR Dummy Should always be -1")
158
+ #
159
+ ### "1" if the next four fields are valid; "0" otherwise ###
160
+ evr_bounding_rectangle_calculated = bbox_split[6]
161
+ evr_left_x_value_of_bounding_rectangle = None
162
+ evr_top_y_value_of_bounding_rectangle = None
163
+ evr_right_x_value_of_bounding_rectangle = None
164
+ evr_bottom_y_value_of_bounding_rectangle = None
165
+ if evr_bounding_rectangle_calculated == "1":
166
+ # Date and time of left boundary of bounding rectangle – ignored when importing into Echoview. See "Point 1" in table below.
167
+ # '20190925 2053458953' <-- TODO: format into datetime
168
+ evr_left_x_value_of_bounding_rectangle = self.process_datetime_string(
169
+ bbox_split[7], bbox_split[8]
170
+ )
171
+ # Upper depth coordinate of bounding rectangle – ignored when importing into Echoview. See "Point 1" in table below.
172
+ evr_top_y_value_of_bounding_rectangle = float(bbox_split[9])
173
+ # Date and time of right boundary of bounding rectangle – ignored when importing into Echoview. See "Point 1" in table below.
174
+ evr_right_x_value_of_bounding_rectangle = self.process_datetime_string(
175
+ bbox_split[10], bbox_split[11]
176
+ )
177
+ # Lower depth coordinate of bounding rectangle – ignored when importing into Echoview. See "Point 1" in table below.
178
+ evr_bottom_y_value_of_bounding_rectangle = float(bbox_split[12])
179
+ print(
180
+ f"{evr_left_x_value_of_bounding_rectangle.isoformat()}, {evr_top_y_value_of_bounding_rectangle}, {evr_right_x_value_of_bounding_rectangle.isoformat()}, {evr_bottom_y_value_of_bounding_rectangle}"
181
+ )
182
+ # making sure times are in-order
183
+ if (
184
+ evr_left_x_value_of_bounding_rectangle
185
+ > evr_right_x_value_of_bounding_rectangle
186
+ ):
187
+ raise Exception("Timestamps out of order!")
188
+ #
189
+ offset_index = 0
190
+ ### The number of lines of region notes to follow. ###
191
+ evr_number_of_lines_of_notes = int(record_lines[1])
192
+ print(f"Number of region notes: {evr_number_of_lines_of_notes}")
193
+ ### Notes associated with the region. Maximum length is 2048 characters. Embedded CR characters are encoded as hexadecimal FF. Embedded LF characters are encoded as hexadecimal FE. ###
194
+ if evr_number_of_lines_of_notes > 0:
195
+ offset_index = offset_index + evr_number_of_lines_of_notes + 1
196
+ evr_region_notes = record_lines[1:offset_index]
197
+ print(f"Region notes: {evr_region_notes}")
198
+ #
199
+ ### The number of lines of detection settings to follow. ###
200
+ evr_number_of_lines_of_detection_settings = int(
201
+ record_lines[2 + offset_index]
202
+ )
203
+ print(
204
+ f"Number of lines of detection settings: {evr_number_of_lines_of_detection_settings}"
205
+ )
206
+ ### The detection settings as defined in the Fish Track Detection Properties dialog box or Detect Schools dialog box. ###
207
+ if evr_number_of_lines_of_detection_settings > 0:
208
+ offset_index = (
209
+ evr_number_of_lines_of_notes
210
+ + evr_number_of_lines_of_detection_settings
211
+ + 3
212
+ )
213
+ evr_region_detection_settings = record_lines[3:offset_index]
214
+ print(f"Region detection settings: {evr_region_detection_settings}")
215
+ #
216
+ ### Region classification (string). Default value is "Unclassified regions" ###
217
+ evr_region_classification = record_lines[-3]
218
+ if evr_region_classification not in self.evr_region_classifications:
219
+ raise Exception(
220
+ f"Problem, unknown region classification: {evr_region_classification}"
221
+ )
222
+ print(f"Region classification: {evr_region_classification}")
223
+ #
224
+ # TODO: If the data has krill, skip creating a record of it
225
+ if evr_region_classification == "krill_schools":
226
+ print("Krill, skipping!!!")
227
+ return
228
+ #
229
+ # Data for first point – See Data formats below. These data are used to bound the region when importing into Echoview
230
+ evr_points = [x for x in record_lines[-2].split(" ") if x][:-1]
231
+ # print(f"EVR points: {evr_points}") # TODO: strip last entry
232
+ #
233
+ evr_point_chunks = list(itertools.batched(evr_points, 3))
234
+ for evr_point_chunk in evr_point_chunks:
235
+ processed_point = self.process_vertice(
236
+ date_string=evr_point_chunk[0],
237
+ time_string=evr_point_chunk[1],
238
+ depth=float(evr_point_chunk[2]),
239
+ )
240
+ print(processed_point)
241
+ #
242
+ if len(evr_points) != evr_point_count * 3:
243
+ raise Exception("EVR point count does not match expected.")
244
+ #
245
+ # "0" = bad (no data); "1" = analysis; "2" = marker, "3" = fishtracks; "4" = bad (empty water);
246
+ evr_region_type = [x for x in record_lines[-2].split(" ") if x][-1]
247
+ print(f"Region type: {self.region_type[evr_region_type]}")
248
+ # String
249
+ evr_region_name = record_lines[-1]
250
+ print(f"Region name: {evr_region_name}")
251
+ #
252
+ print("get lat lon")
253
+ (latitude, longitude) = self.cruise_manager.get_coordinates(
254
+ start_time=evr_left_x_value_of_bounding_rectangle.isoformat(),
255
+ end_time=evr_right_x_value_of_bounding_rectangle.isoformat(),
256
+ )
257
+ print("get local time")
258
+ local_time = self.geospatial_manager.get_local_time(
259
+ iso_time=evr_left_x_value_of_bounding_rectangle.isoformat(),
260
+ latitude=latitude,
261
+ longitude=longitude,
262
+ )
263
+ print("get solar")
264
+ solar_altitude = self.astronomical_manager.get_solar_azimuth(
265
+ iso_time=evr_left_x_value_of_bounding_rectangle.isoformat(),
266
+ latitude=latitude,
267
+ longitude=longitude,
268
+ )
269
+ print("phase_of_day")
270
+ phase_of_day = self.astronomical_manager.phase_of_day(
271
+ iso_time=evr_left_x_value_of_bounding_rectangle.isoformat(),
272
+ latitude=latitude,
273
+ longitude=longitude,
274
+ )
275
+ print("distance")
276
+ distance_from_coastline = ( # Note this takes about 14 seconds each, very slow
277
+ self.geospatial_manager.check_distance_from_coastline(
278
+ latitude=latitude,
279
+ longitude=longitude,
280
+ )
281
+ )
282
+ print("altitude")
283
+ evr_altitude = self.cruise_manager.get_altitude(
284
+ start_time=evr_left_x_value_of_bounding_rectangle.isoformat(),
285
+ end_time=evr_right_x_value_of_bounding_rectangle.isoformat(),
286
+ bbox_max=evr_bottom_y_value_of_bounding_rectangle,
287
+ )
288
+ # #
289
+ # # print("%5.2f, %5.2f, {2}, {3}, {4]" % (latitude, longitude, local_time, solar_altitude, is_daytime, distance_from_coastline, evr_altitude))
290
+ # print(
291
+ # f"{latitude}, {longitude}, {local_time}, {solar_altitude}, {is_daytime}, {distance_from_coastline}, {evr_altitude}"
292
+ # )
293
+ #
294
+ # TODO: need additional infor for provenance --> need to create a unique key for each
295
+ # want a hash of some sort
296
+ # add the region_id (which will recycle from file to file)
297
+ #
298
+ ### provenance ###
299
+ geometry_string = record_lines[-2] # inclusive of evr_region_type
300
+ # geometry_string = evr_record # TODO: should i hash the entire record or the geometry?
301
+ geometry_hash = (
302
+ f"{hashlib.sha256(geometry_string.encode('utf-8')).hexdigest()}"
303
+ # f"sha256:{hashlib.sha256(geometry_string.encode('utf-8')).hexdigest()}"
304
+ )
305
+ #
306
+ # parquet_record_manager = ParquetRecordManager()
307
+ # print(parquet_record_manager)
308
+ graph_record_manager = GraphRecordManager(
309
+ classification=evr_region_classification,
310
+ point_count=evr_point_count,
311
+ time_start=evr_left_x_value_of_bounding_rectangle.isoformat(),
312
+ time_end=evr_right_x_value_of_bounding_rectangle.isoformat(),
313
+ depth_min=np.round(evr_top_y_value_of_bounding_rectangle, 2),
314
+ depth_max=np.round(evr_bottom_y_value_of_bounding_rectangle, 2),
315
+ month=evr_left_x_value_of_bounding_rectangle.month, # TODO: UTC Month, maybe change to localtime
316
+ latitude=float(latitude), # TODO: too many digits
317
+ longitude=float(longitude),
318
+ local_time=local_time,
319
+ solar_altitude=solar_altitude,
320
+ phase_of_day=phase_of_day,
321
+ #
322
+ distance_from_coastline=distance_from_coastline,
323
+ altitude=evr_altitude,
324
+ # geometry="P(0, 1)", # TODO: https://hvplot.holoviz.org/en/docs/latest/ref/api/manual/hvplot.hvPlot.polygons.html
325
+ #
326
+ filename=filename, # how do i find in parquet
327
+ region_id=evr_region_id,
328
+ geometry_hash=geometry_hash,
329
+ )
330
+ # print(graph_record_manager.to_json())
331
+ #
332
+ update_df = pd.DataFrame([graph_record_manager.to_dict()])
333
+ self.all_records_df = pd.concat(
334
+ [self.all_records_df, update_df],
335
+ ignore_index=True,
336
+ )
337
+ except Exception as process_evr_record_exception:
338
+ print(f"Problem with process_evr_record: {process_evr_record_exception}")
339
+ finally:
340
+ print("______________________________________done reading_+_+_+_+_+_+_+_+")
341
+
342
+ def process_evr_file(
343
+ self,
344
+ evr_file_path: str = None,
345
+ evr_filename: str = None,
346
+ ):
347
+ try:
348
+ print(f"Filename: {evr_filename}")
349
+ with open(evr_file_path + evr_filename, "r") as file:
350
+ lines = file.read()
351
+
352
+ records = lines.split("\n\n")
353
+ records = [i for i in records if i.startswith("13 ")] # filter
354
+ for evr_record in records:
355
+ self.process_evr_record(evr_record=evr_record, filename=evr_filename)
356
+ except Exception as process_evr_file_exception:
357
+ print(
358
+ f"Problem processing file {evr_filename}: {process_evr_file_exception}"
359
+ )
360
+
361
+ def process_evr_directory(self, evr_directory_path="../../data/HB201906/"):
362
+ """Open evr directory and start to parse files"""
363
+ try:
364
+ all_evr_files = [
365
+ f
366
+ for f in listdir(evr_directory_path)
367
+ if isfile(join(evr_directory_path, f)) and Path(f).suffix == ".evr"
368
+ ]
369
+ all_evr_files.sort()
370
+ print(f"Found {len(all_evr_files)} EVR files.")
371
+ for evr_file in all_evr_files[:1]: # TODO: fix this
372
+ self.process_evr_file(
373
+ evr_file_path=evr_directory_path, evr_filename=evr_file
374
+ )
375
+ # I don't have the lat/lon information to draw here... need to query the zarr store...
376
+ print(self.all_records_df)
377
+ self.all_records_df.set_index(
378
+ keys="geometry_hash", drop=False, inplace=True
379
+ )
380
+ # sort by time
381
+ self.all_records_df.sort_values(
382
+ by="time_start",
383
+ axis=0,
384
+ ascending=True,
385
+ inplace=True,
386
+ ignore_index=False,
387
+ )
388
+ print("writing files")
389
+ self.all_records_df.to_parquet(
390
+ path="graph_record.parquet",
391
+ engine="pyarrow",
392
+ compression="snappy",
393
+ index=True,
394
+ partition_cols=None,
395
+ )
396
+ self.all_records_df.to_csv(
397
+ path_or_buf="graph_record.csv",
398
+ header=True,
399
+ index=True,
400
+ mode="w",
401
+ )
402
+ print("done writing files")
403
+ #
404
+ except Exception as process_evr_directory_exception:
405
+ print(
406
+ f"Problem processing evr directory: {process_evr_directory_exception}"
407
+ )
408
+
409
+
410
+ if __name__ == "__main__":
411
+ try:
412
+ echoview_record_manager = EchoviewRecordManager()
413
+ echoview_record_manager.process_evr_directory(
414
+ evr_directory_path="../../data/HB201906/"
415
+ )
416
+ print("done processing everything")
417
+ except Exception as e:
418
+ print(e)
419
+
420
+
421
+ # Example of polygon
422
+ # 20191106 1314583780 25.4929369108 # top-left
423
+ # 20191106 1314583780 30.2941528987 # bottom-left
424
+ # 20191106 1314593790 30.2941528987 # bottom-right
425
+ # 20191106 1314593790 25.3008882713 # top-right
426
+ # 20191106 1314583780 25.3008882713 1 # top-left'ish, ends with '1' ...goes counter-clockwise
@@ -0,0 +1,82 @@
1
+ from json import dumps
2
+
3
+ """
4
+ Format for export and bulk ingest into neo4j
5
+ """
6
+
7
+
8
+ class GraphRecordManager:
9
+ def __init__(
10
+ self,
11
+ classification,
12
+ point_count,
13
+ # geometry,
14
+ time_start,
15
+ time_end,
16
+ depth_min,
17
+ depth_max,
18
+ month,
19
+ altitude,
20
+ latitude: float,
21
+ longitude: float,
22
+ local_time,
23
+ distance_from_coastline,
24
+ solar_altitude,
25
+ phase_of_day,
26
+ filename,
27
+ region_id,
28
+ geometry_hash, # sha256 hash
29
+ ship: str = "Henry_B._Bigelow",
30
+ cruise: str = "HB1906",
31
+ instrument: str = "EK60",
32
+ ):
33
+ print("__init__ called")
34
+ self.classification: str = classification
35
+ self.point_count: int = point_count
36
+ # self.geometry: str = geometry # Do not want for neo4j
37
+ ### geospatial ###
38
+ self.time_start: str = time_start
39
+ self.time_end: str = time_end
40
+ self.depth_min: float = depth_min
41
+ self.depth_max: float = depth_max
42
+ self.month: int = month
43
+ self.altitude: float = altitude
44
+ self.latitude: float = latitude
45
+ self.longitude: float = longitude
46
+ self.local_time: str = local_time
47
+ self.distance_from_coastline: float = distance_from_coastline
48
+ ### astronomical ###
49
+ self.solar_altitude: float = solar_altitude
50
+ self.phase_of_day: bool = phase_of_day
51
+ ### provenance ###
52
+ self.filename: str = filename
53
+ self.region_id: str = region_id
54
+ self.geometry_hash: str = geometry_hash
55
+ self.ship: str = ship
56
+ self.cruise: str = cruise
57
+ self.instrument: str = instrument
58
+
59
+ # def __enter__(self):
60
+ # print("__enter__ called")
61
+ # return self
62
+
63
+ # def __exit__(self, *a):
64
+ # print("__exit__ called")
65
+
66
+ def to_dict(
67
+ self,
68
+ ):
69
+ try:
70
+ return self.__dict__
71
+ except Exception as knowledge_graph_record_exception:
72
+ print(
73
+ f"Problem with knowledge graph record: {knowledge_graph_record_exception}"
74
+ )
75
+
76
+ def to_json(
77
+ self,
78
+ ):
79
+ try:
80
+ return dumps(self.__dict__)
81
+ except Exception as knowledge_graph_record_exception:
82
+ print(f"Problem with echofish_record: {knowledge_graph_record_exception}")
@@ -0,0 +1,83 @@
1
+ from json import dumps
2
+
3
+ """
4
+ Format for export to parquet and bulk ingest into neo4j:
5
+ """
6
+ # TODO:
7
+ # [1] write the records to a pandas dataframe
8
+ # [2] write df to parquet and tag as github resource
9
+
10
+
11
+ class ParquetRecordManager:
12
+ def __init__(
13
+ self,
14
+ classification,
15
+ point_count,
16
+ geometry,
17
+ time_start,
18
+ time_end,
19
+ depth_min,
20
+ depth_max,
21
+ month,
22
+ # altitude,
23
+ # latitude: float,
24
+ # longitude: float,
25
+ # local_time,
26
+ # distance_from_coastline,
27
+ # solar_altitude,
28
+ # is_daytime,
29
+ filename,
30
+ region_id,
31
+ geometry_hash, # sha256 hash
32
+ ship: str = "Henry_B._Bigelow",
33
+ cruise: str = "HB1906",
34
+ instrument: str = "EK60",
35
+ ):
36
+ print("__init__ called")
37
+ self.classification: str = classification
38
+ self.point_count: int = point_count
39
+ # self.geometry: str = geometry
40
+ ### geospatial ###
41
+ self.time_start: str = time_start
42
+ self.time_end: str = time_end
43
+ self.depth_min: float = depth_min
44
+ self.depth_max: float = depth_max
45
+ self.month: int = month
46
+ # self.altitude: float = altitude
47
+ # self.latitude: float = latitude
48
+ # self.longitude: float = longitude
49
+ # self.local_time: str = local_time
50
+ # self.distance_from_coastline: float = distance_from_coastline
51
+ # ### astronomical ###
52
+ # self.solar_altitude: float = solar_altitude
53
+ # self.is_daytime: bool = is_daytime
54
+ ### provenance ###
55
+ self.filename: str = filename
56
+ self.region_id: str = region_id
57
+ self.geometry_hash: str = geometry_hash
58
+ self.ship: str = ship
59
+ self.cruise: str = cruise
60
+ self.instrument: str = instrument
61
+
62
+ # def __enter__(self):
63
+ # print("__enter__ called")
64
+ # return self
65
+
66
+ # def __exit__(self, *a):
67
+ # print("__exit__ called")
68
+
69
+ def to_dict(
70
+ self,
71
+ ):
72
+ try:
73
+ return self.__dict__
74
+ except Exception as parquet_record_exception:
75
+ print(f"Problem with parquet record: {parquet_record_exception}")
76
+
77
+ def to_json(
78
+ self,
79
+ ):
80
+ try:
81
+ return dumps(self.__dict__)
82
+ except Exception as parquet_record_exception:
83
+ print(f"Problem with parquet record: {parquet_record_exception}")
@@ -0,0 +1,5 @@
1
+ from .shape_manager import ShapeManager
2
+
3
+ __all__ = [
4
+ "ShapeManager",
5
+ ]
@@ -0,0 +1,29 @@
1
+ class ShapeManager:
2
+ def __init__(
3
+ self,
4
+ ):
5
+ self.DECIMAL_PRECISION = 4
6
+
7
+ def point(
8
+ self,
9
+ date_string,
10
+ time_string,
11
+ depth_string,
12
+ ): # -> returntype # TODO:
13
+ pass
14
+
15
+ def polygon(
16
+ self,
17
+ date_string,
18
+ time_string,
19
+ depth_string,
20
+ ): # -> type # TODO:
21
+ pass
22
+
23
+ def bounding_box(
24
+ self,
25
+ date_string,
26
+ time_string,
27
+ depth_string,
28
+ ): # -> returntype # TODO:
29
+ pass