windborne 1.0.5__py3-none-any.whl → 1.0.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- windborne/__init__.py +11 -4
- windborne/cli.py +168 -75
- windborne/data_api.py +629 -119
- windborne/utils.py +40 -16
- {windborne-1.0.5.dist-info → windborne-1.0.7.dist-info}/METADATA +1 -1
- windborne-1.0.7.dist-info/RECORD +11 -0
- windborne-1.0.5.dist-info/RECORD +0 -11
- {windborne-1.0.5.dist-info → windborne-1.0.7.dist-info}/WHEEL +0 -0
- {windborne-1.0.5.dist-info → windborne-1.0.7.dist-info}/entry_points.txt +0 -0
- {windborne-1.0.5.dist-info → windborne-1.0.7.dist-info}/top_level.txt +0 -0
windborne/data_api.py
CHANGED
@@ -7,10 +7,19 @@ from math import floor
|
|
7
7
|
from datetime import datetime, timezone, timedelta
|
8
8
|
import csv
|
9
9
|
import json
|
10
|
+
import hashlib
|
10
11
|
|
11
|
-
|
12
|
+
# UTC should be used across the lib
|
13
|
+
|
14
|
+
# ------------
|
15
|
+
# CORE RESOURCES
|
16
|
+
# ------------
|
17
|
+
|
18
|
+
# Observations
|
19
|
+
# ------------
|
20
|
+
def get_observations_page(since=None, min_time=None, max_time=None, include_ids=None, include_mission_name=True, include_updated_at=None, mission_id=None, min_latitude=None, max_latitude=None, min_longitude=None, max_longitude=None, save_to_file=None):
|
12
21
|
"""
|
13
|
-
Retrieves observations based on specified filters including geographical bounds.
|
22
|
+
Retrieves observations page based on specified filters including geographical bounds.
|
14
23
|
|
15
24
|
Args:
|
16
25
|
since (str): Filter observations after this timestamp.
|
@@ -70,52 +79,7 @@ def get_observations(since=None, min_time=None, max_time=None, include_ids=None,
|
|
70
79
|
|
71
80
|
return response
|
72
81
|
|
73
|
-
def
|
74
|
-
"""
|
75
|
-
Retrieves super observations based on specified filters.
|
76
|
-
|
77
|
-
Args:
|
78
|
-
since (str): Filter observations after this timestamp.
|
79
|
-
min_time (str): Minimum timestamp for observations.
|
80
|
-
max_time (str): Maximum timestamp for observations.
|
81
|
-
include_ids (bool): Include observation IDs in response.
|
82
|
-
include_mission_name (bool): Include mission names in response.
|
83
|
-
include_updated_at (bool): Include update timestamps in response.
|
84
|
-
mission_id (str): Filter observations by mission ID.
|
85
|
-
save_to_file (str): Optional path to save the response data.
|
86
|
-
If provided, saves the data in CSV format.
|
87
|
-
|
88
|
-
Returns:
|
89
|
-
dict: The API response containing filtered super observations.
|
90
|
-
"""
|
91
|
-
|
92
|
-
url = f"{DATA_API_BASE_URL}/super_observations.json"
|
93
|
-
|
94
|
-
params = {}
|
95
|
-
if since:
|
96
|
-
params["since"] = to_unix_timestamp(since)
|
97
|
-
if min_time:
|
98
|
-
params["min_time"] = to_unix_timestamp(min_time)
|
99
|
-
if max_time:
|
100
|
-
params["max_time"] = to_unix_timestamp(max_time)
|
101
|
-
if mission_id:
|
102
|
-
params["mission_id"] = mission_id
|
103
|
-
if include_ids:
|
104
|
-
params["include_ids"] = True
|
105
|
-
if include_mission_name:
|
106
|
-
params["include_mission_name"] = True
|
107
|
-
if include_updated_at:
|
108
|
-
params["include_updated_at"] = True
|
109
|
-
|
110
|
-
params = {k: v for k, v in params.items() if v is not None}
|
111
|
-
|
112
|
-
response = make_api_request(url, params=params)
|
113
|
-
if save_to_file:
|
114
|
-
save_csv_json(save_to_file, response, csv_data_key='observations')
|
115
|
-
|
116
|
-
return response
|
117
|
-
|
118
|
-
def poll_observations(start_time, end_time=None, include_ids=None, include_updated_at=None, mission_id=None, min_latitude=None, max_latitude=None, min_longitude=None, max_longitude=None, interval=60, save_to_file=None, bucket_hours=6.0, output_format=None, callback=None):
|
82
|
+
def observations(start_time, end_time=None, include_ids=None, include_updated_at=None, mission_id=None, min_latitude=None, max_latitude=None, min_longitude=None, max_longitude=None, interval=60, save_to_file=None, bucket_hours=6.0, output_format=None, output_dir=None, callback=None):
|
119
83
|
"""
|
120
84
|
Fetches observations between a start time and an optional end time and saves to files in specified format.
|
121
85
|
Files are broken up into time buckets, with filenames containing the time at the mid-point of the bucket.
|
@@ -140,6 +104,7 @@ def poll_observations(start_time, end_time=None, include_ids=None, include_updat
|
|
140
104
|
Supported formats are '.csv', '.json', '.little_r' and '.nc'
|
141
105
|
bucket_hours (int): Optional. Size of time buckets in hours. Defaults to 6 hours.
|
142
106
|
output_format (str): Optional. Format to save data in separate files. Supported formats are 'json, 'csv', 'little_r' and 'netcdf'.
|
107
|
+
output_dir (str): Optional. Directory path where the separate files should be saved. If not provided, files will be saved in current directory.
|
143
108
|
callback (callable): Optional callback function that receives (super observations, metadata) before saving.
|
144
109
|
This allows custom processing or saving in custom formats.
|
145
110
|
"""
|
@@ -165,7 +130,7 @@ def poll_observations(start_time, end_time=None, include_ids=None, include_updat
|
|
165
130
|
return
|
166
131
|
|
167
132
|
# Supported formats for saving into a single file:
|
168
|
-
# NOTE: for
|
133
|
+
# NOTE: for observations we handle .csv saving within observations and not using save_csv_json
|
169
134
|
# - .csv
|
170
135
|
# - .json
|
171
136
|
# - .little_r
|
@@ -201,12 +166,16 @@ def poll_observations(start_time, end_time=None, include_ids=None, include_updat
|
|
201
166
|
# Initialize the polling loop
|
202
167
|
current_timestamp = start_time
|
203
168
|
has_next_page = True
|
169
|
+
fetced_so_far = 0
|
170
|
+
|
171
|
+
print(f"Starting polling observations\nfrom {datetime.fromtimestamp(start_time, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')} UTC to {datetime.fromtimestamp(end_time, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')} UTC")
|
172
|
+
print("-----------------------------------------------------")
|
204
173
|
|
205
174
|
|
206
175
|
while has_next_page:
|
207
176
|
try:
|
208
177
|
# Fetch observations
|
209
|
-
observations_page =
|
178
|
+
observations_page = get_observations_page(
|
210
179
|
since=current_timestamp,
|
211
180
|
min_latitude=min_latitude,
|
212
181
|
max_latitude=max_latitude,
|
@@ -226,11 +195,15 @@ def poll_observations(start_time, end_time=None, include_ids=None, include_updat
|
|
226
195
|
continue
|
227
196
|
|
228
197
|
observations = observations_page.get('observations', [])
|
229
|
-
|
198
|
+
fetced_so_far = fetced_so_far + len(observations)
|
199
|
+
print_current_timestamp = current_timestamp if current_timestamp < 1e11 else current_timestamp / 1e9
|
200
|
+
print(f"Fetched {fetced_so_far} observations")
|
201
|
+
print(f"Current time: {datetime.fromtimestamp(print_current_timestamp, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')}")
|
202
|
+
print("-----------------------------------------------------")
|
230
203
|
|
231
204
|
# Invoke the callback with fetched observations
|
232
205
|
if callback:
|
233
|
-
print("
|
206
|
+
print("--------\nCallback\n--------")
|
234
207
|
callback(observations)
|
235
208
|
|
236
209
|
for obs in observations:
|
@@ -281,27 +254,41 @@ def poll_observations(start_time, end_time=None, include_ids=None, include_updat
|
|
281
254
|
|
282
255
|
if not has_next_page or not next_timestamp or next_timestamp <= current_timestamp:
|
283
256
|
print("-----------------------------------------------------\n")
|
284
|
-
print("
|
257
|
+
print("Fetching complete.")
|
285
258
|
print("\n-----------------------------------------------------")
|
286
259
|
break
|
287
260
|
|
288
261
|
current_timestamp = next_timestamp
|
289
262
|
|
263
|
+
except KeyboardInterrupt:
|
264
|
+
print("\n\n\U0001F6D1 Received interrupt, stopping...")
|
265
|
+
print("-----------------------------------------------------")
|
266
|
+
print("Requested data was not saved!\nRun again and do not interrupt the run to save data.")
|
267
|
+
print("-----------------------------------------------------")
|
268
|
+
exit(3)
|
290
269
|
except Exception as e:
|
291
270
|
print(f"Error occurred: {e}")
|
292
271
|
exit(1001)
|
293
272
|
|
294
273
|
# Save data to a single file
|
295
274
|
if save_to_file:
|
275
|
+
# Create directory path if it doesn't exist
|
276
|
+
directory = os.path.dirname(save_to_file)
|
277
|
+
if directory and not os.path.isdir(directory):
|
278
|
+
os.makedirs(directory, exist_ok=True)
|
296
279
|
filtered_observations = {obs_id: obs for obs_id, obs in all_observations.items()
|
297
280
|
if float(obs['timestamp']) >= start_time}
|
298
281
|
# Sort by timestamp
|
299
282
|
sorted_observations = dict(sorted(filtered_observations.items(),
|
300
283
|
key=lambda x: float(x[1]['timestamp'])))
|
301
284
|
|
285
|
+
print(f"Saving {len(sorted_observations)} {'observation' if len(sorted_observations) == 1 else 'observations'} to {save_to_file}")
|
286
|
+
print("This may take a while...")
|
287
|
+
print("-----------------------------------------------------\n")
|
288
|
+
|
302
289
|
if save_to_file.endswith('.nc'):
|
303
290
|
first_obs_timestamp = float(next(iter(sorted_observations.values()))['timestamp'])
|
304
|
-
convert_to_netcdf(sorted_observations, first_obs_timestamp,
|
291
|
+
convert_to_netcdf(sorted_observations, first_obs_timestamp, save_to_file)
|
305
292
|
elif save_to_file.endswith('.json'):
|
306
293
|
with open(save_to_file, 'w', encoding='utf-8') as f:
|
307
294
|
json.dump(sorted_observations, f, indent=4)
|
@@ -321,6 +308,15 @@ def poll_observations(start_time, end_time=None, include_ids=None, include_updat
|
|
321
308
|
|
322
309
|
# Save data to multiple file
|
323
310
|
elif output_format:
|
311
|
+
# Create output directory if specified
|
312
|
+
if output_dir:
|
313
|
+
os.makedirs(output_dir, exist_ok=True)
|
314
|
+
print(f"Files will be saved to {output_dir}")
|
315
|
+
else:
|
316
|
+
print(f"Files will be saved to {os.getcwd()}")
|
317
|
+
print(f"Processing {fetced_so_far} {'observation' if fetced_so_far == 1 else 'observations'} and save them over multiple files.")
|
318
|
+
print("This may take a while...")
|
319
|
+
print("-----------------------------------------------------\n")
|
324
320
|
# Track statistics per mission
|
325
321
|
mission_stats = {} # {mission_name: {'files': 0, 'observations': 0}}
|
326
322
|
total_observations_written = 0
|
@@ -329,48 +325,39 @@ def poll_observations(start_time, end_time=None, include_ids=None, include_updat
|
|
329
325
|
for (bucket_center, mission_name), observations in buckets.items():
|
330
326
|
if observations:
|
331
327
|
# Format hour to be the actual bucket center
|
332
|
-
bucket_hour = int((bucket_center.hour + bucket_hours/2) % 24)
|
328
|
+
bucket_hour = int((bucket_center.hour + bucket_hours / 2) % 24)
|
333
329
|
|
334
|
-
|
335
|
-
|
330
|
+
# Generate file name based on output format
|
331
|
+
file_name_format = {
|
332
|
+
'csv': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.csv",
|
333
|
+
'json': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.json",
|
334
|
+
'netcdf': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.nc",
|
335
|
+
'little_r': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d-00_%dh.little_r"
|
336
|
+
}
|
337
|
+
file_name = file_name_format[output_format] % (
|
338
|
+
bucket_center.year, bucket_center.month, bucket_center.day,
|
339
|
+
bucket_hour, bucket_hours)
|
336
340
|
|
337
|
-
|
338
|
-
output_file = (f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.csv" %
|
339
|
-
(bucket_center.year, bucket_center.month, bucket_center.day,
|
340
|
-
bucket_hour, bucket_hours))
|
341
|
+
output_file = os.path.join(output_dir or '.', file_name)
|
341
342
|
|
342
|
-
|
343
|
+
# Sort observations by timestamp within each bucket
|
344
|
+
sorted_obs = sorted(observations.values(), key=lambda x: int(x['timestamp']))
|
343
345
|
|
344
|
-
|
345
|
-
sorted_obs
|
346
|
+
if output_format == 'netcdf':
|
347
|
+
convert_to_netcdf(sorted_obs, bucket_center.timestamp(), output_file)
|
346
348
|
|
349
|
+
elif output_format == 'csv':
|
347
350
|
with open(output_file, mode='w', newline='') as file:
|
348
351
|
writer = csv.DictWriter(file, fieldnames=headers)
|
349
352
|
writer.writeheader()
|
350
353
|
writer.writerows(sorted_obs)
|
351
354
|
|
352
355
|
elif output_format == 'json':
|
353
|
-
|
354
|
-
(bucket_center.year, bucket_center.month, bucket_center.day,
|
355
|
-
bucket_hour, bucket_hours))
|
356
|
-
|
357
|
-
os.makedirs(os.path.dirname(output_file) or '.', exist_ok=True)
|
358
|
-
|
359
|
-
# Sort observations by timestamp within each bucket
|
360
|
-
sorted_obs = dict(sorted(observations.items(), key=lambda x: int(x[1]['timestamp'])))
|
361
|
-
|
356
|
+
sorted_obs_dict = {k: v for k, v in sorted(observations.items(), key=lambda x: int(x[1]['timestamp']))}
|
362
357
|
with open(output_file, 'w', encoding='utf-8') as file:
|
363
|
-
json.dump(
|
358
|
+
json.dump(sorted_obs_dict, file, indent=4)
|
364
359
|
|
365
360
|
elif output_format == 'little_r':
|
366
|
-
output_file = (f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d-00_%dh.little_r" %
|
367
|
-
(bucket_center.year, bucket_center.month, bucket_center.day,
|
368
|
-
bucket_hour, bucket_hours))
|
369
|
-
|
370
|
-
os.makedirs(os.path.dirname(output_file) or '.', exist_ok=True)
|
371
|
-
|
372
|
-
sorted_obs = sorted(observations.items(), key=lambda x: int(x[1]['timestamp']))
|
373
|
-
|
374
361
|
little_r_records = format_little_r(sorted_obs)
|
375
362
|
with open(output_file, 'w') as file:
|
376
363
|
file.write('\n'.join(little_r_records))
|
@@ -382,7 +369,7 @@ def poll_observations(start_time, end_time=None, include_ids=None, include_updat
|
|
382
369
|
mission_stats[mission_name]['files'] += 1
|
383
370
|
mission_stats[mission_name]['observations'] += len(observations)
|
384
371
|
# Print total observations written
|
385
|
-
print(f"
|
372
|
+
print(f"Saved {total_observations_written} {'observation.' if total_observations_written == 1 else 'observations.'}")
|
386
373
|
print("-----------------------------------------------------")
|
387
374
|
|
388
375
|
# Print summary for each mission
|
@@ -392,7 +379,286 @@ def poll_observations(start_time, end_time=None, include_ids=None, include_updat
|
|
392
379
|
print("-----------------------------------------------------")
|
393
380
|
print("All observations have been processed and saved.")
|
394
381
|
|
395
|
-
def
|
382
|
+
def poll_observations(start_time, include_ids=None, include_updated_at=None, mission_id=None, min_latitude=None, max_latitude=None, min_longitude=None, max_longitude=None, interval=60, bucket_hours=6.0, output_format=None, output_dir=None, callback=None):
|
383
|
+
"""
|
384
|
+
Continuously polls for observations and saves to files in specified format.
|
385
|
+
Will run indefinitely until interrupted.
|
386
|
+
|
387
|
+
Args:
|
388
|
+
start_time (str): Starting time in YYYY-MM-DD HH:MM:SS, YYYY-MM-DD_HH:MM or ISO format
|
389
|
+
include_ids (bool): Include observation IDs in response.
|
390
|
+
include_updated_at (bool): Include update timestamps in response.
|
391
|
+
mission_id (str): Filter observations by mission ID.
|
392
|
+
min_latitude (float): Minimum latitude boundary.
|
393
|
+
max_latitude (float): Maximum latitude boundary.
|
394
|
+
min_longitude (float): Minimum longitude boundary.
|
395
|
+
max_longitude (float): Maximum longitude boundary.
|
396
|
+
interval (int): Polling interval in seconds when no data is received (default: 60)
|
397
|
+
bucket_hours (float): Size of time buckets in hours (default: 6.0)
|
398
|
+
output_format (str): Format for bucket files ('json', 'csv', 'little_r', 'netcdf')
|
399
|
+
output_dir (str): Directory for bucket files (default: current directory)
|
400
|
+
callback (callable): Optional callback for data processing
|
401
|
+
"""
|
402
|
+
# Print warning about infinite loop
|
403
|
+
print(" ___________________________________________________________________")
|
404
|
+
print("| WARNING \U000026A0\U0000FE0F |")
|
405
|
+
print("| You are entering an endless loop. |")
|
406
|
+
print("| |")
|
407
|
+
print("| Press Ctrl + C anytime to exit. |")
|
408
|
+
print("|___________________________________________________________________|\n\n")
|
409
|
+
time.sleep(4)
|
410
|
+
|
411
|
+
start_time = to_unix_timestamp(start_time)
|
412
|
+
|
413
|
+
if output_format and output_format not in ['json', 'csv', 'little_r', 'netcdf']:
|
414
|
+
print("Please use one of the following formats:")
|
415
|
+
print(" - json\n - csv\n - little_r\n - netcdf")
|
416
|
+
return
|
417
|
+
|
418
|
+
if output_dir:
|
419
|
+
os.makedirs(output_dir, exist_ok=True)
|
420
|
+
print(f"\U0001F4C1 Files will be saved to {output_dir}")
|
421
|
+
else:
|
422
|
+
print(f"\U0001F4C1 Files will be saved to {os.getcwd()}")
|
423
|
+
|
424
|
+
# Convert start_time to datetime
|
425
|
+
start_dt = datetime.fromtimestamp(start_time, tz=timezone.utc)
|
426
|
+
|
427
|
+
# Calculate first center time that's after start_time
|
428
|
+
hours_since_day_start = start_dt.hour + start_dt.minute / 60
|
429
|
+
bucket_number = hours_since_day_start // bucket_hours
|
430
|
+
first_center = start_dt.replace(hour=0, minute=0, second=0, microsecond=0) + timedelta(hours=(bucket_number + 1) * bucket_hours)
|
431
|
+
|
432
|
+
headers = [
|
433
|
+
"timestamp", "id", "time", "latitude", "longitude", "altitude", "humidity",
|
434
|
+
"mission_name", "pressure", "specific_humidity", "speed_u", "speed_v", "temperature"
|
435
|
+
]
|
436
|
+
|
437
|
+
buckets = {} # {(bucket_center, mission_name): {'data': {}, 'last_write': timestamp, 'data_hash': str}}
|
438
|
+
current_timestamp = start_time
|
439
|
+
fetched_so_far = 0
|
440
|
+
mission_stats = {}
|
441
|
+
|
442
|
+
print(f"Starting continuous polling from {datetime.fromtimestamp(start_time, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')}")
|
443
|
+
print(f"Polling interval: {interval} seconds")
|
444
|
+
print("-----------------------------------------------------")
|
445
|
+
|
446
|
+
try:
|
447
|
+
while True:
|
448
|
+
observations_page = get_observations_page(
|
449
|
+
since=current_timestamp,
|
450
|
+
min_latitude=min_latitude,
|
451
|
+
max_latitude=max_latitude,
|
452
|
+
min_longitude=min_longitude,
|
453
|
+
max_longitude=max_longitude,
|
454
|
+
include_updated_at=include_updated_at,
|
455
|
+
mission_id=mission_id,
|
456
|
+
include_ids=include_ids,
|
457
|
+
include_mission_name=True
|
458
|
+
)
|
459
|
+
|
460
|
+
if observations_page is None:
|
461
|
+
print(f"\nNull response from API. Retrying in {interval} seconds ...")
|
462
|
+
time.sleep(interval)
|
463
|
+
continue
|
464
|
+
|
465
|
+
observations = observations_page.get('observations', [])
|
466
|
+
|
467
|
+
# Invoke the callback with fetched super observations
|
468
|
+
if callback:
|
469
|
+
print("--------\nCallback\n--------")
|
470
|
+
callback(observations)
|
471
|
+
|
472
|
+
if observations:
|
473
|
+
fetched_so_far += len(observations)
|
474
|
+
print_current_timestamp = current_timestamp if current_timestamp < 1e11 else current_timestamp / 1e9
|
475
|
+
print(f"Fetched {fetched_so_far} observations")
|
476
|
+
print(f"Current time: {datetime.fromtimestamp(print_current_timestamp, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')}")
|
477
|
+
print("-----------------------------------------------------")
|
478
|
+
|
479
|
+
for obs in observations:
|
480
|
+
if 'mission_name' not in obs:
|
481
|
+
continue
|
482
|
+
|
483
|
+
timestamp = obs.get('timestamp')
|
484
|
+
if not timestamp:
|
485
|
+
continue
|
486
|
+
|
487
|
+
try:
|
488
|
+
obs_time = datetime.fromtimestamp(timestamp, tz=timezone.utc)
|
489
|
+
except (OSError, ValueError, TypeError, OverflowError):
|
490
|
+
continue
|
491
|
+
|
492
|
+
mission_name = obs.get('mission_name', 'Unknown')
|
493
|
+
obs['time'] = obs_time.replace(tzinfo=timezone.utc).isoformat()
|
494
|
+
|
495
|
+
processed_obs = {
|
496
|
+
header: obs.get(header) if obs.get(header) not in [None, '', ' '] else 'None'
|
497
|
+
for header in headers
|
498
|
+
}
|
499
|
+
|
500
|
+
obs_id = f"{timestamp}_{mission_name}"
|
501
|
+
|
502
|
+
if obs_time >= start_dt:
|
503
|
+
hours_diff = (obs_time - first_center).total_seconds() / 3600
|
504
|
+
bucket_index = floor(hours_diff / bucket_hours)
|
505
|
+
bucket_center = first_center + timedelta(hours=bucket_index * bucket_hours)
|
506
|
+
bucket_end = bucket_center + timedelta(hours=bucket_hours)
|
507
|
+
|
508
|
+
if obs_time <= bucket_end:
|
509
|
+
bucket_key = (bucket_center, mission_name)
|
510
|
+
|
511
|
+
# Initialize bucket if needed
|
512
|
+
if bucket_key not in buckets:
|
513
|
+
buckets[bucket_key] = {
|
514
|
+
'data': {},
|
515
|
+
'last_write': 0,
|
516
|
+
'data_hash': ''
|
517
|
+
}
|
518
|
+
|
519
|
+
# Update bucket data
|
520
|
+
buckets[bucket_key]['data'][obs_id] = processed_obs
|
521
|
+
|
522
|
+
# Track statistics
|
523
|
+
if mission_name not in mission_stats:
|
524
|
+
mission_stats[mission_name] = {'files': set(), 'observations': 0}
|
525
|
+
mission_stats[mission_name]['observations'] += 1
|
526
|
+
|
527
|
+
# Calculate new data hash
|
528
|
+
sorted_data = sorted(buckets[bucket_key]['data'].items(), key=lambda x: int(x[1]['timestamp']))
|
529
|
+
data_hash = hashlib.md5(str(sorted_data).encode()).hexdigest()
|
530
|
+
|
531
|
+
# Check if we should write the bucket
|
532
|
+
current_time = datetime.now(timezone.utc)
|
533
|
+
time_since_last_write = current_time.timestamp() - buckets[bucket_key]['last_write']
|
534
|
+
data_changed = data_hash != buckets[bucket_key]['data_hash']
|
535
|
+
|
536
|
+
# Write if it's been more than interval seconds since last write OR if data has changed
|
537
|
+
if (time_since_last_write >= interval or data_changed) and output_format:
|
538
|
+
bucket_hour = int((bucket_center.hour + bucket_hours/2) % 24)
|
539
|
+
|
540
|
+
file_name_format = {
|
541
|
+
'csv': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.csv",
|
542
|
+
'json': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.json",
|
543
|
+
'netcdf': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.nc",
|
544
|
+
'little_r': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d-00_%dh.little_r"
|
545
|
+
}
|
546
|
+
|
547
|
+
file_name = file_name_format[output_format] % (
|
548
|
+
bucket_center.year, bucket_center.month, bucket_center.day,
|
549
|
+
bucket_hour, bucket_hours)
|
550
|
+
|
551
|
+
output_file = os.path.join(output_dir or '.', file_name)
|
552
|
+
sorted_obs = [obs for _, obs in sorted_data]
|
553
|
+
|
554
|
+
# Write the file based on format
|
555
|
+
try:
|
556
|
+
if output_format == 'netcdf':
|
557
|
+
convert_to_netcdf(sorted_obs, bucket_center.timestamp(), output_file)
|
558
|
+
elif output_format == 'csv':
|
559
|
+
with open(output_file, mode='w', newline='') as file:
|
560
|
+
writer = csv.DictWriter(file, fieldnames=headers)
|
561
|
+
writer.writeheader()
|
562
|
+
writer.writerows(sorted_obs)
|
563
|
+
elif output_format == 'json':
|
564
|
+
sorted_obs_dict = {k: v for k, v in sorted_data}
|
565
|
+
with open(output_file, 'w', encoding='utf-8') as file:
|
566
|
+
json.dump(sorted_obs_dict, file, indent=4)
|
567
|
+
elif output_format == 'little_r':
|
568
|
+
little_r_records = format_little_r(sorted_obs)
|
569
|
+
with open(output_file, 'w') as file:
|
570
|
+
file.write('\n'.join(little_r_records))
|
571
|
+
|
572
|
+
buckets[bucket_key]['last_write'] = current_time.timestamp()
|
573
|
+
buckets[bucket_key]['data_hash'] = data_hash
|
574
|
+
mission_stats[mission_name]['files'].add(output_file)
|
575
|
+
except Exception as e:
|
576
|
+
print(f"Error writing bucket file {file_name}: {str(e)}")
|
577
|
+
|
578
|
+
# Clean up old buckets
|
579
|
+
current_time = datetime.now(timezone.utc)
|
580
|
+
buckets = {
|
581
|
+
k: v for k, v in buckets.items()
|
582
|
+
if current_time - k[0] <= timedelta(hours=bucket_hours * 2) # Keep slightly longer for potential updates
|
583
|
+
}
|
584
|
+
|
585
|
+
next_timestamp = observations_page.get('next_since')
|
586
|
+
has_next_page = observations_page.get('has_next_page', False)
|
587
|
+
|
588
|
+
if next_timestamp and next_timestamp > current_timestamp:
|
589
|
+
current_timestamp = next_timestamp
|
590
|
+
elif not has_next_page:
|
591
|
+
print("-----------------------------------------------------")
|
592
|
+
print(f"\U0001F503 Latest super observations data have been processed.\nRetrying getting new observations data in {interval} seconds...")
|
593
|
+
print("-----------------------------------------------------")
|
594
|
+
time.sleep(interval)
|
595
|
+
continue
|
596
|
+
|
597
|
+
if not observations:
|
598
|
+
print(f"\U0001F503 No new super observations data available.\n Retrying getting new observations data in {interval} seconds...")
|
599
|
+
print("-----------------------------------------------------")
|
600
|
+
time.sleep(interval)
|
601
|
+
|
602
|
+
except KeyboardInterrupt:
|
603
|
+
print("\n\n\U0001F6D1 Received interrupt, stopping...")
|
604
|
+
print("-----------------------------------------------------")
|
605
|
+
for mission_name, stats in mission_stats.items():
|
606
|
+
print(f"Mission {mission_name}: {stats['observations']} observations across {len(stats['files'])} files")
|
607
|
+
except Exception as e:
|
608
|
+
print(f"Error occurred: {str(e)}")
|
609
|
+
exit(1001)
|
610
|
+
finally:
|
611
|
+
print("-----------------------------------------------------")
|
612
|
+
print("Finished processing observations.")
|
613
|
+
|
614
|
+
# Super Observations
|
615
|
+
# ------------
|
616
|
+
def get_super_observations_page(since=None, min_time=None, max_time=None, include_ids=None, include_mission_name=None, include_updated_at=None, mission_id=None, save_to_file=None):
|
617
|
+
"""
|
618
|
+
Retrieves super observations page based on specified filters.
|
619
|
+
|
620
|
+
Args:
|
621
|
+
since (str): Filter observations after this timestamp.
|
622
|
+
min_time (str): Minimum timestamp for observations.
|
623
|
+
max_time (str): Maximum timestamp for observations.
|
624
|
+
include_ids (bool): Include observation IDs in response.
|
625
|
+
include_mission_name (bool): Include mission names in response.
|
626
|
+
include_updated_at (bool): Include update timestamps in response.
|
627
|
+
mission_id (str): Filter observations by mission ID.
|
628
|
+
save_to_file (str): Optional path to save the response data.
|
629
|
+
If provided, saves the data in CSV format.
|
630
|
+
|
631
|
+
Returns:
|
632
|
+
dict: The API response containing filtered super observations.
|
633
|
+
"""
|
634
|
+
|
635
|
+
url = f"{DATA_API_BASE_URL}/super_observations.json"
|
636
|
+
|
637
|
+
params = {}
|
638
|
+
if since:
|
639
|
+
params["since"] = to_unix_timestamp(since)
|
640
|
+
if min_time:
|
641
|
+
params["min_time"] = to_unix_timestamp(min_time)
|
642
|
+
if max_time:
|
643
|
+
params["max_time"] = to_unix_timestamp(max_time)
|
644
|
+
if mission_id:
|
645
|
+
params["mission_id"] = mission_id
|
646
|
+
if include_ids:
|
647
|
+
params["include_ids"] = True
|
648
|
+
if include_mission_name:
|
649
|
+
params["include_mission_name"] = True
|
650
|
+
if include_updated_at:
|
651
|
+
params["include_updated_at"] = True
|
652
|
+
|
653
|
+
params = {k: v for k, v in params.items() if v is not None}
|
654
|
+
|
655
|
+
response = make_api_request(url, params=params)
|
656
|
+
if save_to_file:
|
657
|
+
save_csv_json(save_to_file, response, csv_data_key='observations')
|
658
|
+
|
659
|
+
return response
|
660
|
+
|
661
|
+
def super_observations(start_time, end_time=None, interval=60, save_to_file=None, bucket_hours=6.0, output_format=None, output_dir=None, callback=None):
|
396
662
|
"""
|
397
663
|
Fetches super observations between a start time and an optional end time and saves to files in specified format.
|
398
664
|
Files are broken up into time buckets, with filenames containing the time at the mid-point of the bucket.
|
@@ -408,6 +674,7 @@ def poll_super_observations(start_time, end_time=None, interval=60, save_to_file
|
|
408
674
|
Supported formats are '.csv', '.json', '.little_r' and '.nc'
|
409
675
|
bucket_hours (int): Optional. Size of time buckets in hours. Defaults to 6 hours.
|
410
676
|
output_format (str): Optional. Format to save data in separate files. Supported formats are 'json, 'csv', 'little_r' and 'netcdf'.
|
677
|
+
output_dir (str): Optional. Directory path where the separate files should be saved. If not provided, files will be saved in current directory.
|
411
678
|
callback (callable): Optional callback function that receives (super observations, metadata) before saving.
|
412
679
|
This allows custom processing or saving in custom formats.
|
413
680
|
"""
|
@@ -469,12 +736,15 @@ def poll_super_observations(start_time, end_time=None, interval=60, save_to_file
|
|
469
736
|
# Initialize the polling loop
|
470
737
|
current_timestamp = start_time
|
471
738
|
has_next_page = True
|
739
|
+
fetced_so_far = 0
|
472
740
|
|
741
|
+
print(f"Starting polling super observations\nfrom {datetime.fromtimestamp(start_time, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')} to {datetime.fromtimestamp(end_time, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')}")
|
742
|
+
print("-----------------------------------------------------")
|
473
743
|
|
474
744
|
while has_next_page:
|
475
745
|
try:
|
476
746
|
# Fetch observations
|
477
|
-
observations_page =
|
747
|
+
observations_page = get_super_observations_page(
|
478
748
|
since=current_timestamp,
|
479
749
|
min_time=start_time,
|
480
750
|
max_time=end_time,
|
@@ -490,13 +760,15 @@ def poll_super_observations(start_time, end_time=None, interval=60, save_to_file
|
|
490
760
|
continue
|
491
761
|
|
492
762
|
observations = observations_page.get('observations', [])
|
493
|
-
|
763
|
+
fetced_so_far = fetced_so_far + len(observations)
|
764
|
+
print_current_timestamp = current_timestamp if current_timestamp < 1e11 else current_timestamp / 1e9
|
765
|
+
print(f"Fetched {fetced_so_far} super observations")
|
766
|
+
print(f"Current time: {datetime.fromtimestamp(print_current_timestamp, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')}")
|
767
|
+
print("-----------------------------------------------------")
|
494
768
|
|
495
|
-
# Invoke the callback with fetched observations
|
769
|
+
# Invoke the callback with fetched super observations
|
496
770
|
if callback:
|
497
|
-
print("--------")
|
498
|
-
print("Callback")
|
499
|
-
print("--------")
|
771
|
+
print("--------\nCallback\n--------")
|
500
772
|
callback(observations)
|
501
773
|
|
502
774
|
for obs in observations:
|
@@ -547,27 +819,41 @@ def poll_super_observations(start_time, end_time=None, interval=60, save_to_file
|
|
547
819
|
|
548
820
|
if not has_next_page or not next_timestamp or next_timestamp <= current_timestamp:
|
549
821
|
print("-----------------------------------------------------\n")
|
550
|
-
print("
|
822
|
+
print("Fetching complete.")
|
551
823
|
print("\n-----------------------------------------------------")
|
552
824
|
break
|
553
825
|
|
554
826
|
current_timestamp = next_timestamp
|
555
827
|
|
828
|
+
except KeyboardInterrupt:
|
829
|
+
print("\n\n\U0001F6D1 Received interrupt, stopping...")
|
830
|
+
print("-----------------------------------------------------")
|
831
|
+
print("Requested data was not saved!\nRun again and do not interrupt the run to save data.")
|
832
|
+
print("-----------------------------------------------------")
|
833
|
+
exit(3)
|
556
834
|
except Exception as e:
|
557
835
|
print(f"Error occurred: {e}")
|
558
836
|
exit(1001)
|
559
837
|
|
560
838
|
# Save data to a single file
|
561
839
|
if save_to_file:
|
840
|
+
# Create directory path if it doesn't exist
|
841
|
+
directory = os.path.dirname(save_to_file)
|
842
|
+
if directory and not os.path.isdir(directory):
|
843
|
+
os.makedirs(directory, exist_ok=True)
|
562
844
|
filtered_observations = {obs_id: obs for obs_id, obs in all_observations.items()
|
563
845
|
if float(obs['timestamp']) >= start_time}
|
564
846
|
# Sort by timestamp
|
565
847
|
sorted_observations = dict(sorted(filtered_observations.items(),
|
566
848
|
key=lambda x: float(x[1]['timestamp'])))
|
567
849
|
|
850
|
+
print(f"Saving {len(sorted_observations)} super {'observation' if len(sorted_observations) == 1 else 'observations'} to {save_to_file}")
|
851
|
+
print("This may take a while...")
|
852
|
+
print("-----------------------------------------------------\n")
|
853
|
+
|
568
854
|
if save_to_file.endswith('.nc'):
|
569
855
|
first_obs_timestamp = float(next(iter(sorted_observations.values()))['timestamp'])
|
570
|
-
convert_to_netcdf(sorted_observations, first_obs_timestamp,
|
856
|
+
convert_to_netcdf(sorted_observations, first_obs_timestamp, save_to_file)
|
571
857
|
|
572
858
|
elif save_to_file.endswith('.json'):
|
573
859
|
with open(save_to_file, 'w', encoding='utf-8') as f:
|
@@ -588,6 +874,16 @@ def poll_super_observations(start_time, end_time=None, interval=60, save_to_file
|
|
588
874
|
|
589
875
|
# Save data to multiple file
|
590
876
|
elif output_format:
|
877
|
+
# Create output directory if specified
|
878
|
+
if output_dir:
|
879
|
+
os.makedirs(output_dir, exist_ok=True)
|
880
|
+
print(f"Files will be saved to {output_dir}")
|
881
|
+
else:
|
882
|
+
print(f"Files will be saved to {os.getcwd()}")
|
883
|
+
|
884
|
+
print(f"Processing {fetced_so_far} super {'observation' if fetced_so_far == 1 else 'observations'} and save them over multiple files.")
|
885
|
+
print("This may take a while...")
|
886
|
+
print("-----------------------------------------------------\n")
|
591
887
|
# Track statistics per mission
|
592
888
|
mission_stats = {} # {mission_name: {'files': 0, 'observations': 0}}
|
593
889
|
total_observations_written = 0
|
@@ -598,46 +894,37 @@ def poll_super_observations(start_time, end_time=None, interval=60, save_to_file
|
|
598
894
|
# Format hour to be the actual bucket center
|
599
895
|
bucket_hour = int((bucket_center.hour + bucket_hours/2) % 24)
|
600
896
|
|
601
|
-
|
602
|
-
|
897
|
+
# Generate file name based on output format
|
898
|
+
file_name_format = {
|
899
|
+
'csv': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.csv",
|
900
|
+
'json': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.json",
|
901
|
+
'netcdf': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.nc",
|
902
|
+
'little_r': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d-00_%dh.little_r"
|
903
|
+
}
|
904
|
+
file_name = file_name_format[output_format] % (
|
905
|
+
bucket_center.year, bucket_center.month, bucket_center.day,
|
906
|
+
bucket_hour, bucket_hours)
|
603
907
|
|
604
|
-
|
605
|
-
output_file = (f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.csv" %
|
606
|
-
(bucket_center.year, bucket_center.month, bucket_center.day,
|
607
|
-
bucket_hour, bucket_hours))
|
908
|
+
output_file = os.path.join(output_dir or '.', file_name)
|
608
909
|
|
609
|
-
|
910
|
+
# Sort observations by timestamp within each bucket
|
911
|
+
sorted_obs = sorted(observations.values(), key=lambda x: int(x['timestamp']))
|
610
912
|
|
611
|
-
|
612
|
-
sorted_obs
|
913
|
+
if output_format == 'netcdf':
|
914
|
+
convert_to_netcdf(sorted_obs, bucket_center.timestamp(), output_file)
|
613
915
|
|
916
|
+
elif output_format == 'csv':
|
614
917
|
with open(output_file, mode='w', newline='') as file:
|
615
918
|
writer = csv.DictWriter(file, fieldnames=headers)
|
616
919
|
writer.writeheader()
|
617
920
|
writer.writerows(sorted_obs)
|
618
921
|
|
619
922
|
elif output_format == 'json':
|
620
|
-
|
621
|
-
(bucket_center.year, bucket_center.month, bucket_center.day,
|
622
|
-
bucket_hour, bucket_hours))
|
623
|
-
|
624
|
-
os.makedirs(os.path.dirname(output_file) or '.', exist_ok=True)
|
625
|
-
|
626
|
-
# Sort observations by timestamp within each bucket
|
627
|
-
sorted_obs = dict(sorted(observations.items(), key=lambda x: int(x[1]['timestamp'])))
|
628
|
-
|
923
|
+
sorted_obs_dict = {k: v for k, v in sorted(observations.items(), key=lambda x: int(x[1]['timestamp']))}
|
629
924
|
with open(output_file, 'w', encoding='utf-8') as file:
|
630
|
-
json.dump(
|
925
|
+
json.dump(sorted_obs_dict, file, indent=4)
|
631
926
|
|
632
927
|
elif output_format == 'little_r':
|
633
|
-
output_file = (f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d-00_%dh.little_r" %
|
634
|
-
(bucket_center.year, bucket_center.month, bucket_center.day,
|
635
|
-
bucket_hour, bucket_hours))
|
636
|
-
|
637
|
-
os.makedirs(os.path.dirname(output_file) or '.', exist_ok=True)
|
638
|
-
|
639
|
-
sorted_obs = sorted(observations.items(), key=lambda x: int(x[1]['timestamp']))
|
640
|
-
|
641
928
|
little_r_records = format_little_r(sorted_obs)
|
642
929
|
with open(output_file, 'w') as file:
|
643
930
|
file.write('\n'.join(little_r_records))
|
@@ -659,6 +946,229 @@ def poll_super_observations(start_time, end_time=None, interval=60, save_to_file
|
|
659
946
|
print("-----------------------------------------------------")
|
660
947
|
print("All super observations have been processed and saved.")
|
661
948
|
|
949
|
+
def poll_super_observations(start_time, interval=60, bucket_hours=6.0, output_format=None, output_dir=None, callback=None):
|
950
|
+
"""
|
951
|
+
Continuously polls for super observations and saves to files in specified format.
|
952
|
+
Will run indefinitely until interrupted.
|
953
|
+
|
954
|
+
Args:
|
955
|
+
start_time (str): Starting time in YYYY-MM-DD HH:MM:SS, YYYY-MM-DD_HH:MM or ISO format
|
956
|
+
interval (int): Polling interval in seconds when no data is received (default: 60)
|
957
|
+
bucket_hours (float): Size of time buckets in hours (default: 6.0)
|
958
|
+
output_format (str): Format for bucket files ('json', 'csv', 'little_r', 'netcdf')
|
959
|
+
output_dir (str): Directory for bucket files (default: current directory)
|
960
|
+
callback (callable): Optional callback for data processing
|
961
|
+
"""
|
962
|
+
# Print warning about infinite loop
|
963
|
+
print(" ___________________________________________________________________")
|
964
|
+
print("| WARNING \U000026A0\U0000FE0F |")
|
965
|
+
print("| You are entering an endless loop. |")
|
966
|
+
print("| |")
|
967
|
+
print("| Press Ctrl + C anytime to exit. |")
|
968
|
+
print("|___________________________________________________________________|\n\n")
|
969
|
+
time.sleep(4)
|
970
|
+
|
971
|
+
start_time = to_unix_timestamp(start_time)
|
972
|
+
|
973
|
+
if output_format and output_format not in ['json', 'csv', 'little_r', 'netcdf']:
|
974
|
+
print("Please use one of the following formats:")
|
975
|
+
print(" - json\n - csv\n - little_r\n - netcdf")
|
976
|
+
return
|
977
|
+
|
978
|
+
if output_dir:
|
979
|
+
os.makedirs(output_dir, exist_ok=True)
|
980
|
+
print(f"\U0001F4C1 Files will be saved to {output_dir}")
|
981
|
+
else:
|
982
|
+
print(f"\U0001F4C1 Files will be saved to {os.getcwd()}")
|
983
|
+
|
984
|
+
# Convert start_time to datetime
|
985
|
+
start_dt = datetime.fromtimestamp(start_time, tz=timezone.utc)
|
986
|
+
|
987
|
+
# Calculate first center time that's after start_time
|
988
|
+
hours_since_day_start = start_dt.hour + start_dt.minute / 60
|
989
|
+
bucket_number = hours_since_day_start // bucket_hours
|
990
|
+
first_center = start_dt.replace(hour=0, minute=0, second=0, microsecond=0) + timedelta(hours=(bucket_number + 1) * bucket_hours)
|
991
|
+
|
992
|
+
headers = [
|
993
|
+
"timestamp", "id", "time", "latitude", "longitude", "altitude", "humidity",
|
994
|
+
"mission_name", "pressure", "specific_humidity", "speed_u", "speed_v", "temperature"
|
995
|
+
]
|
996
|
+
|
997
|
+
buckets = {} # {(bucket_center, mission_name): {'data': {}, 'last_write': timestamp, 'data_hash': str}}
|
998
|
+
current_timestamp = start_time
|
999
|
+
fetched_so_far = 0
|
1000
|
+
mission_stats = {}
|
1001
|
+
|
1002
|
+
print(f"Starting continuous polling from {datetime.fromtimestamp(start_time, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')} UTC")
|
1003
|
+
print(f"Polling interval: {interval} seconds")
|
1004
|
+
print("-----------------------------------------------------")
|
1005
|
+
|
1006
|
+
try:
|
1007
|
+
while True:
|
1008
|
+
observations_page = get_super_observations_page(
|
1009
|
+
since=current_timestamp,
|
1010
|
+
min_time=start_time,
|
1011
|
+
include_ids=True,
|
1012
|
+
include_mission_name=True
|
1013
|
+
)
|
1014
|
+
|
1015
|
+
if observations_page is None:
|
1016
|
+
print(f"\nNull response from API. Retrying in {interval} seconds ...")
|
1017
|
+
time.sleep(interval)
|
1018
|
+
continue
|
1019
|
+
|
1020
|
+
observations = observations_page.get('observations', [])
|
1021
|
+
|
1022
|
+
# Invoke the callback with fetched super observations
|
1023
|
+
if callback:
|
1024
|
+
print("--------\nCallback\n--------")
|
1025
|
+
callback(observations)
|
1026
|
+
|
1027
|
+
if observations:
|
1028
|
+
fetched_so_far += len(observations)
|
1029
|
+
print_current_timestamp = current_timestamp if current_timestamp < 1e11 else current_timestamp / 1e9
|
1030
|
+
print(f"Fetched {fetched_so_far} super observations")
|
1031
|
+
print(f"Current time: {datetime.fromtimestamp(print_current_timestamp, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')}")
|
1032
|
+
print("-----------------------------------------------------")
|
1033
|
+
|
1034
|
+
for obs in observations:
|
1035
|
+
if 'mission_name' not in obs:
|
1036
|
+
continue
|
1037
|
+
|
1038
|
+
timestamp = obs.get('timestamp')
|
1039
|
+
if not timestamp:
|
1040
|
+
continue
|
1041
|
+
|
1042
|
+
try:
|
1043
|
+
obs_time = datetime.fromtimestamp(timestamp, tz=timezone.utc)
|
1044
|
+
except (OSError, ValueError, TypeError, OverflowError):
|
1045
|
+
continue
|
1046
|
+
|
1047
|
+
mission_name = obs.get('mission_name', 'Unknown')
|
1048
|
+
obs['time'] = obs_time.replace(tzinfo=timezone.utc).isoformat()
|
1049
|
+
|
1050
|
+
processed_obs = {
|
1051
|
+
header: obs.get(header) if obs.get(header) not in [None, '', ' '] else 'None'
|
1052
|
+
for header in headers
|
1053
|
+
}
|
1054
|
+
|
1055
|
+
obs_id = f"{timestamp}_{mission_name}"
|
1056
|
+
|
1057
|
+
if obs_time >= start_dt:
|
1058
|
+
hours_diff = (obs_time - first_center).total_seconds() / 3600
|
1059
|
+
bucket_index = floor(hours_diff / bucket_hours)
|
1060
|
+
bucket_center = first_center + timedelta(hours=bucket_index * bucket_hours)
|
1061
|
+
bucket_end = bucket_center + timedelta(hours=bucket_hours)
|
1062
|
+
|
1063
|
+
if obs_time <= bucket_end:
|
1064
|
+
bucket_key = (bucket_center, mission_name)
|
1065
|
+
|
1066
|
+
# Initialize bucket if needed
|
1067
|
+
if bucket_key not in buckets:
|
1068
|
+
buckets[bucket_key] = {
|
1069
|
+
'data': {},
|
1070
|
+
'last_write': 0,
|
1071
|
+
'data_hash': ''
|
1072
|
+
}
|
1073
|
+
|
1074
|
+
# Update bucket data
|
1075
|
+
buckets[bucket_key]['data'][obs_id] = processed_obs
|
1076
|
+
|
1077
|
+
# Track statistics
|
1078
|
+
if mission_name not in mission_stats:
|
1079
|
+
mission_stats[mission_name] = {'files': set(), 'observations': 0}
|
1080
|
+
mission_stats[mission_name]['observations'] += 1
|
1081
|
+
|
1082
|
+
# Calculate new data hash
|
1083
|
+
sorted_data = sorted(buckets[bucket_key]['data'].items(), key=lambda x: int(x[1]['timestamp']))
|
1084
|
+
data_hash = hashlib.md5(str(sorted_data).encode()).hexdigest()
|
1085
|
+
|
1086
|
+
# Check if we should write the bucket
|
1087
|
+
current_time = datetime.now(timezone.utc)
|
1088
|
+
time_since_last_write = current_time.timestamp() - buckets[bucket_key]['last_write']
|
1089
|
+
data_changed = data_hash != buckets[bucket_key]['data_hash']
|
1090
|
+
|
1091
|
+
# Write if it's been more than interval seconds since last write OR if data has changed
|
1092
|
+
if (time_since_last_write >= interval or data_changed) and output_format:
|
1093
|
+
bucket_hour = int((bucket_center.hour + bucket_hours/2) % 24)
|
1094
|
+
|
1095
|
+
file_name_format = {
|
1096
|
+
'csv': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.csv",
|
1097
|
+
'json': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.json",
|
1098
|
+
'netcdf': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.nc",
|
1099
|
+
'little_r': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d-00_%dh.little_r"
|
1100
|
+
}
|
1101
|
+
|
1102
|
+
file_name = file_name_format[output_format] % (
|
1103
|
+
bucket_center.year, bucket_center.month, bucket_center.day,
|
1104
|
+
bucket_hour, bucket_hours)
|
1105
|
+
|
1106
|
+
output_file = os.path.join(output_dir or '.', file_name)
|
1107
|
+
sorted_obs = [obs for _, obs in sorted_data]
|
1108
|
+
|
1109
|
+
# Write the file based on format
|
1110
|
+
try:
|
1111
|
+
if output_format == 'netcdf':
|
1112
|
+
convert_to_netcdf(sorted_obs, bucket_center.timestamp(), output_file)
|
1113
|
+
elif output_format == 'csv':
|
1114
|
+
with open(output_file, mode='w', newline='') as file:
|
1115
|
+
writer = csv.DictWriter(file, fieldnames=headers)
|
1116
|
+
writer.writeheader()
|
1117
|
+
writer.writerows(sorted_obs)
|
1118
|
+
elif output_format == 'json':
|
1119
|
+
sorted_obs_dict = {k: v for k, v in sorted_data}
|
1120
|
+
with open(output_file, 'w', encoding='utf-8') as file:
|
1121
|
+
json.dump(sorted_obs_dict, file, indent=4)
|
1122
|
+
elif output_format == 'little_r':
|
1123
|
+
little_r_records = format_little_r(sorted_obs)
|
1124
|
+
with open(output_file, 'w') as file:
|
1125
|
+
file.write('\n'.join(little_r_records))
|
1126
|
+
|
1127
|
+
buckets[bucket_key]['last_write'] = current_time.timestamp()
|
1128
|
+
buckets[bucket_key]['data_hash'] = data_hash
|
1129
|
+
mission_stats[mission_name]['files'].add(output_file)
|
1130
|
+
except Exception as e:
|
1131
|
+
print(f"Error writing bucket file {file_name}: {str(e)}")
|
1132
|
+
|
1133
|
+
# Clean up old buckets
|
1134
|
+
current_time = datetime.now(timezone.utc)
|
1135
|
+
buckets = {
|
1136
|
+
k: v for k, v in buckets.items()
|
1137
|
+
if current_time - k[0] <= timedelta(hours=bucket_hours * 2) # Keep slightly longer for potential updates
|
1138
|
+
}
|
1139
|
+
|
1140
|
+
next_timestamp = observations_page.get('next_since')
|
1141
|
+
has_next_page = observations_page.get('has_next_page', False)
|
1142
|
+
|
1143
|
+
if next_timestamp and next_timestamp > current_timestamp:
|
1144
|
+
current_timestamp = next_timestamp
|
1145
|
+
elif not has_next_page:
|
1146
|
+
print("-----------------------------------------------------")
|
1147
|
+
print(f"\U0001F503 Latest super observations data have been processed.\nRetrying getting new super observations data in {interval} seconds...")
|
1148
|
+
print("-----------------------------------------------------")
|
1149
|
+
time.sleep(interval)
|
1150
|
+
continue
|
1151
|
+
|
1152
|
+
if not observations:
|
1153
|
+
print(f"\U0001F503 No new super observations data available.\n Retrying getting new super observations data in {interval} seconds...")
|
1154
|
+
print("-----------------------------------------------------")
|
1155
|
+
time.sleep(interval)
|
1156
|
+
|
1157
|
+
except KeyboardInterrupt:
|
1158
|
+
print("\n\U0001F6D1 Received interrupt, stopping...")
|
1159
|
+
print("-----------------------------------------------------")
|
1160
|
+
for mission_name, stats in mission_stats.items():
|
1161
|
+
print(f"Mission {mission_name}: {stats['observations']} super observations across {len(stats['files'])} files")
|
1162
|
+
except Exception as e:
|
1163
|
+
print(f"Error occurred: {str(e)}")
|
1164
|
+
exit(1001)
|
1165
|
+
finally:
|
1166
|
+
print("-----------------------------------------------------")
|
1167
|
+
print("Finished processing super observations.")
|
1168
|
+
|
1169
|
+
# ------------
|
1170
|
+
# METADATA
|
1171
|
+
# ------------
|
662
1172
|
def get_flying_missions(cli=None, save_to_file=None):
|
663
1173
|
"""
|
664
1174
|
Retrieves a list of currently flying missions.
|