sxs 2024.0.44__py3-none-any.whl → 2025.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,573 +1,2 @@
1
1
  """Container interface to the catalog of SXS simulations"""
2
-
3
- import collections
4
- import numpy as np
5
- import pandas as pd
6
-
7
- from ..utilities.string_converters import *
8
-
9
-
10
- class SimulationsDataFrame(pd.DataFrame):
11
- @property
12
- def BBH(self):
13
- """Restrict dataframe to just binary black hole systems"""
14
- return type(self)(self[self["object_types"] == "BHBH"])
15
- BHBH = BBH
16
-
17
- @property
18
- def BHNS(self):
19
- """Restrict dataframe to just black hole-neutron star systems"""
20
- return type(self)(self[self["object_types"] == "BHNS"])
21
- NSBH = BHNS
22
-
23
- @property
24
- def NSNS(self):
25
- """Restrict dataframe to just binary neutron star systems"""
26
- return type(self)(self[self["object_types"] == "NSNS"])
27
- BNS = NSNS
28
-
29
- @property
30
- def noneccentric(self):
31
- """Restrict dataframe to just non-eccentric systems (e<1e-3)"""
32
- return type(self)(self[self["reference_eccentricity_bound"] < 1e-3])
33
-
34
- @property
35
- def eccentric(self):
36
- """Restrict dataframe to just eccentric systems (e>=1e-3)"""
37
- return type(self)(self[self["reference_eccentricity_bound"] >= 1e-3])
38
-
39
- @property
40
- def nonprecessing(self):
41
- """Restrict dataframe to just nonprecessing systems
42
-
43
- The criterion used here is that the sum of the x-y components
44
- of the spins is less than 1e-3 at the reference time.
45
- """
46
- return type(self)(self[
47
- (self["reference_chi1_perp"] + self["reference_chi2_perp"]) < 1e-3
48
- ])
49
-
50
- @property
51
- def precessing(self):
52
- """Restrict dataframe to just precessing systems
53
-
54
- The criterion used here is that the sum of the x-y components
55
- of the spins is at least 1e-3 at the reference time.
56
- """
57
- return type(self)(self[
58
- (self["reference_chi1_perp"] + self["reference_chi2_perp"]) >= 1e-3
59
- ])
60
-
61
- @property
62
- def IMR(self):
63
- """Restrict dataframe to just BBH IMR systems
64
-
65
- "IMR" stands for inspiral, merger, and ringdown. Systems that
66
- will *not* be in this group include simulations that
67
- correspond to physical IMR systems, but were not continued
68
- through the merger.
69
-
70
- The criteria used here are just that the reference
71
- eccentricity and remnant mass are actual (finite) numbers.
72
- Currently, at least, the existence of a measured eccentricity
73
- means that the system is not hyperbolic or head-on.
74
- """
75
- df = self.BBH
76
- return type(df)(df[
77
- np.isfinite(df["reference_eccentricity"])
78
- & np.isfinite(df["remnant_mass"])
79
- ])
80
-
81
- @property
82
- def hyperbolic(self):
83
- """Restrict dataframe to just hyperbolic systems
84
-
85
- The criterion used here is that the (normalized) ADM mass is
86
- greater than 1.
87
- """
88
- total_mass = self["initial_mass1"] + self["initial_mass2"]
89
- normalized_ADM = self["initial_ADM_energy"] / total_mass
90
- return type(self)(self[
91
- np.isfinite(total_mass) & (total_mass > 0) & (normalized_ADM > 1)
92
- ])
93
-
94
- @property
95
- def undeprecated(self):
96
- """Restrict dataframe to just simulations that are not deprecated"""
97
- return type(self)(self[~self["deprecated"]])
98
-
99
-
100
- class Simulations(collections.OrderedDict):
101
- """Interface to the catalog of SXS simulations
102
-
103
- Creation
104
- --------
105
- You probably don't need to create this object yourself. The
106
- easiest way to create this object is just to use the `sxs.load`
107
- function:
108
-
109
- ```python
110
- import sxs
111
-
112
- simulations = sxs.load("simulations")
113
- ```
114
-
115
- Note SXS members may also wish to read a local copy of the
116
- simulation annex, which can be done with
117
- ```python
118
- simulations = sxs.load("simulations", annex_dir="/path/to/SimulationAnnex.git")
119
- ```
120
- which will re-read the annex (which may take about a minute), or
121
- ```python
122
- simulations = sxs.load("simulations", local=True)
123
- ```
124
- if the annex has not been updated since the last time you
125
- used the `annex_dir` argument. Once you have done this,
126
- calls to `sxs.load` will automatically use this local copy
127
- of the simulations.
128
- """
129
- last_modified_url = "https://api.github.com/repos/sxs-collaboration/sxs/contents/simulations.json?ref=simulations"
130
- url = "https://github.com/sxs-collaboration/sxs/raw/simulations/simulations.json"
131
-
132
- def __init__(self, sims):
133
- """Initialize the Simulations dictionary
134
-
135
- Note that the constructor is not generally useful from outside
136
- this class. See `Simulations.load` for a more useful
137
- initialization function, or simply call
138
- `sxs.load("simulations")`.
139
-
140
- """
141
- from .. import Metadata
142
- super(Simulations, self).__init__(
143
- (k, Metadata(sims[k])) for k in sorted(sims)
144
- )
145
-
146
- @classmethod
147
- def remote_timestamp(cls, download):
148
- import requests
149
- from datetime import datetime, timezone
150
- if not download:
151
- return datetime.min.replace(tzinfo=timezone.utc)
152
- failed = False
153
- try:
154
- response = requests.head(
155
- Simulations.last_modified_url,
156
- headers={"X-GitHub-Api-Version": "2022-11-28"},
157
- )
158
- if response.status_code != 200 or "Last-Modified" not in response.headers:
159
- failed = True
160
- else:
161
- remote_timestamp = datetime.strptime(
162
- response.headers["Last-Modified"], "%a, %d %b %Y %H:%M:%S GMT"
163
- ).replace(tzinfo=timezone.utc)
164
- except Exception as e:
165
- print("Got exception while trying to get the remote timestamp:", e)
166
- failed = True
167
- if failed:
168
- print(
169
- f"Failed to get the remote timestamp from <{Simulations.last_modified_url}>.\n"
170
- + "Assuming it is old."
171
- )
172
- return datetime.min.replace(tzinfo=timezone.utc)
173
- return remote_timestamp
174
-
175
- @classmethod
176
- def local(cls, directory=None, *, download=None, output_file=None, compute_md5=False, show_progress=False):
177
- """Load the local catalog of SXS simulations
178
-
179
- This function loads the standard public catalog, but also
180
- includes any local simulations found in the given directory.
181
- If no directory is provided, it will look for the local
182
- simulations file in the sxs cache directory.
183
-
184
- Parameters
185
- ----------
186
- directory : {None, str, Path}, optional
187
- A directory containing subdirectories of SXS simulations.
188
- See `sxs.local_simulations` for details about what is
189
- expected in this directory. If None (the default), it
190
- will look for the local simulations file in the sxs cache
191
- directory.
192
- download : {None, bool}, optional
193
- Passed to `Simulations.load` when loading the public set
194
- of simulations.
195
- output_file : {None, str, Path}, optional
196
- If `directory` is not None, this will be passed to
197
- `sxs.write_local_simulations`.
198
- compute_md5 : bool, optional
199
- If `directory` is not None, this will be passed to
200
- `sxs.local_simulations`.
201
- show_progress : bool, optional
202
- If `directory` is not None, this will be passed to
203
- `sxs.local_simulations`.
204
-
205
- See Also
206
- --------
207
- sxs.local_simulations : Search for local simulations
208
- sxs.write_local_simulations : Write local simulations to a
209
- file
210
-
211
- """
212
- import json
213
- from .local import write_local_simulations
214
- from .. import sxs_directory
215
-
216
- if directory is not None:
217
- local_path = output_file
218
- local_simulations = write_local_simulations(
219
- directory,
220
- output_file=output_file,
221
- compute_md5=compute_md5,
222
- show_progress=show_progress
223
- )
224
- else:
225
- local_path = sxs_directory("cache") / "local_simulations.json"
226
- if not local_path.exists():
227
- if directory is not None:
228
- raise ValueError(f"Writing local simulations for {directory=} failed")
229
- else:
230
- raise ValueError(
231
- f"Local simulations file not found, but no `directory` was provided.\n"
232
- + "If called from `sxs.load`, just pass the name of the directory."
233
- )
234
- with local_path.open("r") as f:
235
- local_simulations = json.load(f)
236
- simulations = cls.load(download)
237
- doi_versions = {
238
- k: v["DOI_versions"]
239
- for k,v in simulations.items()
240
- if "DOI_versions" in v
241
- }
242
- simulations.update(local_simulations)
243
- for k,v in doi_versions.items():
244
- simulations[k]["DOI_versions"] = v
245
- simulations.__file__ = str(local_path)
246
- return simulations
247
-
248
- @classmethod
249
- def load(cls, download=None, *, local=False, annex_dir=None, output_file=None, compute_md5=False, show_progress=False):
250
- """Load the catalog of SXS simulations
251
-
252
- Note that — unlike most SXS data files — the simulations file
253
- is updated frequently. As a result, this function — unlike
254
- the loading functions for most SXS data files — will download
255
- the simulations by default each time it is called. However,
256
- also note that this function is itself cached, meaning that
257
- the same dict will be returned on each call in a given python
258
- session. If you want to avoid that behavior, use
259
- `Simulations.reload`.
260
-
261
- Parameters
262
- ----------
263
- download : {None, bool}, optional
264
- If False, this function will look for the simulations in
265
- the sxs cache and raise an error if it is not found. If
266
- True, this function will download the simulations and
267
- raise an error if the download fails. If None (the
268
- default), it will try to download the file, warn but fall
269
- back to the cache if that fails, and only raise an error
270
- if the simulations is not found in the cache. Note that
271
- this ignores the sxs configuration file entirely.
272
-
273
- Keyword-only Parameters
274
- -----------------------
275
- local : {None, bool}, optional
276
- If True, this function will load local simulations from
277
- the sxs cache. To prepare the cache, you may wish to call
278
- `sxs.write_local_simulations`.
279
- annex_dir : {None, str, Path}, optional
280
- If provided and `local=True`, this function will load
281
- local simulations from the given directory. This is
282
- equivalent to calling `Simulations.local(directory)`.
283
- output_file : {None, str, Path}, optional
284
- If `annex_dir` is not None, this will be passed to
285
- `sxs.write_local_simulations`.
286
- compute_md5 : bool, optional
287
- If `annex_dir` is not None, this will be passed to
288
- `sxs.simulations.local_simulations`.
289
- show_progress : bool, optional
290
- If `annex_dir` is not None, this will be passed to
291
- `sxs.simulations.local_simulations`.
292
-
293
- See Also
294
- --------
295
- sxs.sxs_directory : Locate cache directory Simulations.reload
296
- : Avoid caching the result of this function
297
-
298
- """
299
- from datetime import datetime, timezone
300
- import json
301
- import zipfile
302
- from .. import sxs_directory, read_config
303
- from ..utilities import download_file
304
-
305
- if hasattr(cls, "_simulations"):
306
- return cls._simulations
307
-
308
- if local or annex_dir is not None:
309
- cls._simulations = cls.local(
310
- annex_dir,
311
- download=download,
312
- output_file=output_file,
313
- compute_md5=compute_md5,
314
- show_progress=show_progress
315
- )
316
- return cls._simulations
317
-
318
- progress = read_config("download_progress", True)
319
-
320
- remote_timestamp = cls.remote_timestamp(download is not False) # Test for literal `False`
321
-
322
- cache_path = sxs_directory("cache") / "simulations.zip"
323
-
324
- if cache_path.exists():
325
- local_timestamp = datetime.fromtimestamp(cache_path.stat().st_mtime, timezone.utc)
326
- elif download is False:
327
- raise ValueError(f"Simulations not found in '{cache_path}' and downloading was turned off")
328
- else:
329
- local_timestamp = datetime.min.replace(tzinfo=timezone.utc)
330
-
331
- download_failed = False
332
- if (download or download is None) and remote_timestamp > local_timestamp:
333
- # 1. Download the full json file (zipped in flight, but auto-decompressed on arrival)
334
- # 2. Zip to a temporary file (using bzip2, which is better than the in-flight compression)
335
- # 3. Replace the original simulations.zip with the temporary zip file
336
- # 4. Remove the full json file
337
- # 5. Make sure the temporary zip file is gone too
338
- temp_json = cache_path.with_suffix(".temp.json")
339
- temp_zip = cache_path.with_suffix(".temp.zip")
340
- try:
341
- try:
342
- download_file(cls.url, temp_json, progress=progress, if_newer=False)
343
- except Exception as e:
344
- if download:
345
- raise RuntimeError(f"Failed to download '{cls.url}'; try setting `download=False`") from e
346
- download_failed = e # We'll try the cache
347
- else:
348
- if temp_json.exists():
349
- with zipfile.ZipFile(temp_zip, "w", compression=zipfile.ZIP_BZIP2) as simulations_zip:
350
- simulations_zip.write(temp_json, arcname="simulations.json")
351
- temp_zip.replace(cache_path)
352
- finally:
353
- temp_json.unlink(missing_ok=True)
354
- temp_zip.unlink(missing_ok=True)
355
-
356
- if not cache_path.exists():
357
- if download is False: # Test if it literally *is* False, rather than just casts to False
358
- raise ValueError(f"The simulations file was not found in '{cache_path}', and downloading was turned off")
359
- elif download_failed:
360
- raise ValueError(f"Simulations not found in '{cache_path}' and download failed") from download_failed
361
- else:
362
- raise ValueError(f"Simulations not found in '{cache_path}' for unknown reasons")
363
-
364
- try:
365
- with zipfile.ZipFile(cache_path, "r") as simulations_zip:
366
- try:
367
- with simulations_zip.open("simulations.json") as simulations_json:
368
- try:
369
- simulations = json.load(simulations_json)
370
- except Exception as e:
371
- raise ValueError(f"Failed to parse 'simulations.json' in '{cache_path}'") from e
372
- except Exception as e:
373
- raise ValueError(f"Failed to open 'simulations.json' in '{cache_path}'") from e
374
- except Exception as e:
375
- raise ValueError(f"Failed to open '{cache_path}' as a ZIP file") from e
376
-
377
- sims = cls(simulations)
378
- sims.__file__ = str(cache_path)
379
-
380
- cls._simulations = sims
381
- return sims
382
-
383
- @classmethod
384
- def reload(cls, download=True):
385
- """Reload the catalog of SXS simulations, without caching
386
-
387
- Clears the cache of `Simulations.load` and returns the result of calling it again.
388
- Note that in this function, the default value of `download` is `True`, rather
389
- than `None` as in `Simulations.load` — though both behaviors are available.
390
-
391
- Parameters
392
- ----------
393
- download : {None, bool}, optional
394
- If False, this function will look for the simulations in the sxs cache and
395
- raise an error if it is not found. If True (the default), this function
396
- will download the simulations and raise an error if the download fails. If
397
- None (the default), it will try to download the file, warn but fall back to
398
- the cache if that fails, and only raise an error if the simulations is not
399
- found in the cache. Note that this ignores the sxs configuration file
400
- entirely.
401
-
402
- See Also
403
- --------
404
- sxs.sxs_directory : Locate cache directory
405
- Simulations.load : Caching version of this function
406
-
407
- """
408
- cls.load.cache_clear()
409
- return cls.load(download=download)
410
-
411
- @property
412
- def dataframe(self):
413
- """Create pandas.DataFrame containing metadata for all
414
- simulations
415
-
416
- Note that `pandas` is the standard Python interface for
417
- heterogeneous data tables, like the one we have here. This
418
- interface allows for more convenient slicing and querying of
419
- data than the list of `dict`s provided by the `Simulations`
420
- object.
421
-
422
- This can also be a more convenient way to access the metadata
423
- because the raw metadata has missing keys and mixed formats.
424
- Iif a key is missing from the metadata for a particular key,
425
- the dataframe will just have a `NaN` in that entry, rather
426
- than raising an exception. Other keys may have unexpected
427
- entries — such as the `"reference_eccentricity"` field, which
428
- is *usually* a float but may be a string like "<0.0001" if the
429
- eccentricity is not known precisely, but is only bounded. The
430
- dataframe introduces a new column called
431
- `"reference_eccentricity_bound"` that is always a float giving
432
- an upper bound on the eccentricity.
433
-
434
- See the `pandas` documentation for more information on how to
435
- use the resulting dataframe, or the `Simulations` tutorial for
436
- examples.
437
-
438
- """
439
- import numpy as np
440
- import pandas as pd
441
-
442
- if hasattr(self, "_dataframe"):
443
- return self._dataframe
444
-
445
- simulations = pd.DataFrame.from_dict(self, orient="index")
446
-
447
- # See also below for "number_of_orbits" field.
448
- # See also `sxs.metadata.metadata._backwards_compatibility`;
449
- # it's probably a good idea to duplicate whatever is included
450
- # here in that function, just to make sure nothing slips
451
- # through the cracks.
452
- for col in [
453
- "number_of_orbits", "number_of_orbits_from_start",
454
- "number_of_orbits_from_reference_time"
455
- ]:
456
- if col not in simulations.columns:
457
- simulations[col] = np.nan
458
-
459
- sims_df = SimulationsDataFrame(pd.concat((
460
- simulations["reference_time"].map(floater),
461
- simulations["reference_mass_ratio"].map(floater),
462
- simulations["reference_dimensionless_spin1"].map(three_vec),
463
- simulations["reference_dimensionless_spin1"].map(norm).rename("reference_dimensionless_spin1_mag"),
464
- simulations["reference_dimensionless_spin2"].map(three_vec),
465
- simulations["reference_dimensionless_spin2"].map(norm).rename("reference_dimensionless_spin2_mag"),
466
- simulations["reference_chi_eff"].map(floater),
467
- simulations["reference_chi1_perp"].map(floater),
468
- simulations["reference_chi2_perp"].map(floater),
469
- simulations["reference_eccentricity"].map(floater),
470
- simulations["reference_eccentricity"].map(floaterbound).rename("reference_eccentricity_bound"),
471
- simulations["reference_mean_anomaly"].map(floater),
472
- simulations["reference_orbital_frequency"].map(three_vec),
473
- simulations["reference_orbital_frequency"].map(norm).rename("reference_orbital_frequency_mag"),
474
- (
475
- simulations["reference_position1"].map(three_vec)
476
- -simulations["reference_position2"].map(three_vec)
477
- ).map(norm).rename("reference_separation"),
478
- simulations["reference_position1"].map(three_vec),
479
- simulations["reference_position2"].map(three_vec),
480
- simulations["reference_mass1"].map(floater),
481
- simulations["reference_mass2"].map(floater),
482
- simulations["reference_dimensionless_spin1"].map(norm).rename("reference_chi1_mag"),
483
- simulations["reference_dimensionless_spin2"].map(norm).rename("reference_chi2_mag"),
484
- simulations["relaxation_time"].map(floater),
485
- #simulations["merger_time"].map(floater),
486
- simulations["common_horizon_time"].map(floater),
487
- simulations["remnant_mass"].map(floater),
488
- simulations["remnant_dimensionless_spin"].map(three_vec),
489
- simulations["remnant_dimensionless_spin"].map(norm).rename("remnant_dimensionless_spin_mag"),
490
- simulations["remnant_velocity"].map(three_vec),
491
- simulations["remnant_velocity"].map(norm).rename("remnant_velocity_mag"),
492
- #simulations["final_time"].map(floater),
493
- simulations["EOS"].fillna(simulations["eos"]),
494
- simulations["disk_mass"].map(floater),
495
- simulations["ejecta_mass"].map(floater),
496
- simulations["object_types"].astype("category"),
497
- simulations["initial_data_type"].astype("category"),
498
- simulations["initial_separation"].map(floater),
499
- simulations["initial_orbital_frequency"].map(floater),
500
- simulations["initial_adot"].map(floater),
501
- simulations["initial_ADM_energy"].map(floater),
502
- simulations["initial_ADM_linear_momentum"].map(three_vec),
503
- simulations["initial_ADM_linear_momentum"].map(norm).rename("initial_ADM_linear_momentum_mag"),
504
- simulations["initial_ADM_angular_momentum"].map(three_vec),
505
- simulations["initial_ADM_angular_momentum"].map(norm).rename("initial_ADM_angular_momentum_mag"),
506
- simulations["initial_mass1"].map(floater),
507
- simulations["initial_mass2"].map(floater),
508
- simulations["initial_mass_ratio"].map(floater),
509
- simulations["initial_dimensionless_spin1"].map(three_vec),
510
- simulations["initial_dimensionless_spin1"].map(norm).rename("initial_dimensionless_spin1_mag"),
511
- simulations["initial_dimensionless_spin2"].map(three_vec),
512
- simulations["initial_dimensionless_spin2"].map(norm).rename("initial_dimensionless_spin2_mag"),
513
- simulations["initial_position1"].map(three_vec),
514
- simulations["initial_position2"].map(three_vec),
515
- #simulations["object1"].astype("category"),
516
- #simulations["object2"].astype("category"),
517
- # simulations["url"],
518
- #simulations["simulation_name"],
519
- #simulations["alternative_names"],
520
- # simulations["metadata_path"],
521
- # simulations["end_of_trajectory_time"].map(floater),
522
- # simulations["merger_time"].map(floater),
523
- simulations["number_of_orbits"].map(floater),
524
- simulations["number_of_orbits_from_start"].map(floater),
525
- simulations["number_of_orbits_from_reference_time"].map(floater),
526
- simulations["DOI_versions"],
527
- simulations["keywords"],
528
- simulations["date_link_earliest"].map(datetime_from_string),
529
- simulations["date_run_earliest"].map(datetime_from_string),
530
- simulations["date_run_latest"].map(datetime_from_string),
531
- simulations["date_postprocessing"].map(datetime_from_string),
532
- ), axis=1))
533
-
534
- sims_df.insert(0, "deprecated", (
535
- sims_df["keywords"].map(lambda ks: "deprecated" in ks)
536
- ))
537
-
538
- # See also `sxs.metadata.metadata._backwards_compatibility`;
539
- # it's probably a good idea to duplicate whatever is included
540
- # here in that function, just to make sure nothing slips
541
- # through the cracks.
542
- sims_df["number_of_orbits"] = sims_df["number_of_orbits"].fillna(
543
- sims_df["number_of_orbits_from_start"]
544
- )
545
-
546
- # We have ignored the following fields present in the
547
- # simulations.json file (as of 2024-08-04), listed here with
548
- # the number of non-null entries:
549
- #
550
- # alternative_names 2778
551
- # point_of_contact_email 2778
552
- # authors_emails 2776
553
- # simulation_bibtex_keys 2778
554
- # code_bibtex_keys 2778
555
- # initial_data_bibtex_keys 2778
556
- # quasicircular_bibtex_keys 2778
557
- # metadata_version 2778
558
- # spec_revisions 2778
559
- # spells_revision 2778
560
- # merger_time 9
561
- # final_time 12
562
- # reference_spin1 2
563
- # reference_spin2 1
564
- # nitial_spin1 2
565
- # initial_spin2 2
566
- # remnant_spin 2
567
- # initial_mass_withspin2 2
568
- # end_of_trajectory_time 3
569
-
570
- self._dataframe = sims_df
571
- return sims_df
572
-
573
- table = dataframe
2
+ from sxscatalog.simulations.simulations import *
@@ -1,104 +1,2 @@
1
1
  """A core utility function for downloading efficiently and robustly"""
2
-
3
- def download_file(url, path, progress=False, if_newer=True):
4
- """Download large file efficiently from url into path
5
-
6
- Parameters
7
- ----------
8
- url : str
9
- The URL to download from. Redirects are followed.
10
- path : {str, pathlib.Path}
11
- Path to the file in which the download will be stored. If this is an
12
- existing directory or ends in a path separator, the "path" component of the
13
- URL will be used as the file name, and the full directory path will be
14
- created.
15
- progress : bool, optional
16
- If True, and a nonzero Content-Length header is returned, a progress bar
17
- will be shown during the download.
18
- if_newer : {bool, datetime, pathlib.Path}, optional
19
- If True (the default), the file will only be downloaded if the version on
20
- the server is newer than the "mtime" of the local version. If this flag is
21
- False, or there is no local version, or the server does not reply with a
22
- 'Last-Modified' header, the file is downloaded as usual. If a datetime
23
- object is passed, it is used instead of the local file's mtime. If a Path
24
- object is passed, its mtime is used instead of the output path's, and this
25
- path is returned if it is newer than the server's file.
26
-
27
- Returns
28
- -------
29
- local_filename : pathlib.Path
30
-
31
- """
32
- import functools
33
- import pathlib
34
- import os
35
- import shutil
36
- import urllib.parse
37
- import requests
38
- from tqdm.auto import tqdm
39
- from datetime import datetime, timezone
40
-
41
- url_path = urllib.parse.urlparse(url).path
42
- path = pathlib.Path(path).expanduser().resolve()
43
- if path.is_dir():
44
- path = path / url_path[1:] # May have some new directories
45
- directory = path.parent
46
- filename = path.name
47
- directory.mkdir(parents=True, exist_ok=True)
48
- if not os.access(str(directory), os.W_OK) or not directory.is_dir():
49
- raise ValueError(f"Path parent '{directory}' is not writable or is not a directory")
50
- local_filename = directory / filename
51
-
52
- r = requests.get(url, stream=True, allow_redirects=True)
53
- if r.status_code != 200:
54
- print(f"An error occurred when trying to access <{url}>.")
55
- try:
56
- print(r.json())
57
- except Exception:
58
- pass
59
- r.raise_for_status()
60
- raise RuntimeError() # Will only happen if the response was not strictly an error
61
-
62
- if if_newer and "Last-Modified" in r.headers:
63
- remote_timestamp = datetime.strptime(
64
- r.headers["Last-Modified"], "%a, %d %b %Y %H:%M:%S GMT"
65
- ).replace(tzinfo=timezone.utc)
66
- if isinstance(if_newer, datetime):
67
- local_timestamp = if_newer
68
- elif isinstance(if_newer, pathlib.Path) and if_newer.exists():
69
- local_timestamp = datetime.fromtimestamp(if_newer.stat().st_mtime, timezone.utc)
70
- elif local_filename.exists():
71
- local_timestamp = datetime.fromtimestamp(local_filename.stat().st_mtime, timezone.utc)
72
- else:
73
- local_timestamp = remote_timestamp # Just to make the next condition evaluate to False
74
- if local_timestamp > remote_timestamp:
75
- if progress:
76
- print(f"Skipping download from '{url}' because local file is newer")
77
- if isinstance(if_newer, pathlib.Path) and if_newer.exists():
78
- return if_newer
79
- return local_filename
80
-
81
- file_size = int(r.headers.get('Content-Length', 0))
82
- r.raw.read = functools.partial(r.raw.read, decode_content=True)
83
-
84
- output_path = local_filename.parent / (local_filename.name + '.part')
85
- try:
86
- with output_path.open("wb") as f:
87
- if progress and file_size:
88
- desc = "(Unknown total file size)" if file_size == 0 else ""
89
- print(f"Downloading to {path}:", flush=True)
90
- with tqdm.wrapattr(r.raw, "read", total=file_size, desc=desc, dynamic_ncols=True) as r_raw:
91
- shutil.copyfileobj(r_raw, f)
92
- else:
93
- shutil.copyfileobj(r.raw, f)
94
- except Exception as e:
95
- raise RuntimeError(f"Failed to download {url} to {local_filename}; original file remains") from e
96
- else:
97
- output_path.replace(local_filename)
98
- finally:
99
- try:
100
- output_path.unlink() # missing_ok is only available in python 3.8
101
- except FileNotFoundError:
102
- pass
103
-
104
- return local_filename
2
+ from sxscatalog.utilities.downloads import *