sxs 2024.0.44__py3-none-any.whl → 2025.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,342 +0,0 @@
1
- import time
2
- import warnings
3
- import re
4
- import datetime
5
- import json
6
- import pathlib
7
-
8
- from .login import Login
9
- from .. import sxs_id, Metadata
10
- from ..utilities import sxs_identifier_regex, SimpleVersion
11
-
12
-
13
- def mtime(f):
14
- """Look for git or filesystem modification time
15
-
16
- If the git call fails for any reason, we fall back to the filesystem's `mtime`.
17
-
18
- The input `f` is expected to be a path to the file. The return value is a DateTime object
19
-
20
- """
21
- from subprocess import check_output
22
- try:
23
- timestamp = check_output(f"""git log -1 --format="%ad" --date=unix -- {f}""", shell=True).decode().strip()
24
- except:
25
- timestamp = f.stat().st_mtime
26
- finally:
27
- mt = datetime.datetime.fromtimestamp(timestamp, tz=datetime.timezone.utc)
28
- return mt
29
-
30
-
31
- def filter_files(files, utime):
32
- return [f for f in files if mtime(f) > utime]
33
-
34
-
35
- def upload_simannex_dir(login, directory, date=None):
36
- """Update/upload data from the SimAnnex to CaltechDATA
37
-
38
- Parameters
39
- ----------
40
- login : caltechdata.Login
41
- This is used to communicate with CaltechDATA.
42
- directory : str or pathlib.Path
43
- This must contain either "Public" or "Private", followed by the specific
44
- directory in SimAnnex to upload. It can also contain higher path elements
45
- — for example, you can give the absolute path.
46
- date : str, optional
47
- If given, this is used as the `publicationDate` field. It must be
48
- formatted as "%Y-%m-%d". The default is just the current UTC time.
49
-
50
- """
51
- raise Exception("This interface is for the old TIND API; use zenodo for the new API")
52
- from caltechdata_api import customize_schema, decustomize_schema
53
-
54
- allowed_files = [
55
- "metadata.json",
56
- "Strain_Outer.h5",
57
- "Strain_N2.h5",
58
- "Strain_N3.h5",
59
- "Strain_N4.h5",
60
- "Horizons.h5",
61
- "Matter.h5",
62
- ]
63
-
64
- # Check that `directory` is a valid SXS dataset
65
- # - Name must contain either "Public" or "Private" followed by at least one subdirectory
66
- # - Must contain common-metadata.txt (though this will not be uploaded)
67
- # - Must contain at least one Lev* subdirectory
68
- # - Each Lev* subdirectory must contain these files (which will be uploaded)
69
- # - metadata.json
70
- # - Strain_Outer.h5
71
- # - Strain_N2.h5
72
- # - Strain_N3.h5
73
- # - Strain_N4.h5
74
- # - Horizons.h5 and/or Matter.h5
75
- directory = pathlib.Path(directory).expanduser().resolve()
76
- if "/Public/" not in str(directory) and "/Private/" not in str(directory):
77
- raise ValueError(f"""Either "Public" or "Private" must be in the input directory "{directory}".""")
78
- if not (directory / "common-metadata.txt").exists():
79
- raise ValueError(f"Missing common-metadata.txt in {directory}")
80
- if not directory.glob("Lev*"):
81
- raise ValueError(f"Missing Lev* in {directory}")
82
- for lev in directory.glob("Lev*"):
83
- warnings.warn("Temporarily skipping file checks")
84
- # for f in ["Strain_Outer.h5", "Strain_N2.h5", "Strain_N3.h5", "Strain_N4.h5"]:
85
- # if not (lev / f).exists():
86
- # raise ValueError(f"""Missing file "{f}" in "{lev}".""")
87
- # if not (lev / "metadata.json").exists():
88
- # mtxt = lev / "metadata.txt"
89
- # if not mtxt.exists():
90
- # raise ValueError(f"""Missing both "metadata.txt" and "metadata.json" in "{lev}".""")
91
- # else:
92
- # m = Metadata.from_txt_file(str(mtxt), cache_json=False).add_extras().reorder_keys()
93
- # m.to_json_file(str(mtxt.with_suffix(".json")))
94
- # if not (lev / "Horizons.h5").exists() and not (lev / "Matter.h5").exists():
95
- # raise ValueError(f"""Missing both "Horizons.h5" and "Matter.h5" in "{lev}".""")
96
-
97
- # Figure out the "original name" of this simulation — something like "q4_7d/022" or "HighSpinMethods/BBH_SKS_d15.4_q1_..."
98
- root_dir = [par for par in directory.parents if par.match("Public") or par.match("Private")][0]
99
- original_name = str(directory.relative_to(root_dir))
100
-
101
- # Get the SXS ID from common-metadata.txt
102
- sxs_system_re = re.compile(sxs_identifier_regex)
103
- sxs_system_type = None
104
- sxs_system_number = None
105
- with (directory / "common-metadata.txt").open("r") as f:
106
- for line in f.readlines():
107
- line = line.strip()
108
- if "alternative-names" in line:
109
- m = sxs_system_re.search(line)
110
- if m:
111
- sxs_system_type = m["simulation_type"]
112
- sxs_system_number = m["sxs_number"]
113
- break
114
- if not sxs_system_type or not sxs_system_number:
115
- raise ValueError(f"No SXS identifier found in {directory / 'common-metadata.txt'}")
116
- sxs_system = f"SXS:{sxs_system_type}:{sxs_system_number}"
117
- spec_url = "https://www.black-holes.org/code/SpEC.html"
118
- if sxs_system_type == "BBH":
119
- title = f"Binary black-hole simulation {sxs_system}"
120
- description = f"""Simulation of a black-hole binary system evolved by the <a href="{spec_url}">SpEC code</a>."""
121
- elif sxs_system_type == "BHNS":
122
- title = f"Black-hole neutron-star binary simulation {sxs_system}"
123
- description = f"""Simulation of a black-hole neutron-star binary system evolved by the <a href="{spec_url}">SpEC code</a>."""
124
- elif sxs_system_type == "NSNS":
125
- title = f"Binary neutron-star simulation {sxs_system}"
126
- description = f"""Simulation of a neutron-star binary system evolved by the <a href="{spec_url}">SpEC code</a>."""
127
- else:
128
- raise ValueError(
129
- f"""Did not recognize SXS system type "{sxs_system_type}" in directory "{directory}"; should be BBH, BHNS, or NSNS."""
130
- )
131
- print(f"Beginning work on {sxs_system}")
132
-
133
- # Find our list of files
134
- files = sorted(
135
- [f for f in directory.glob("Lev*/*") if f.name in allowed_files],
136
- key=lambda f: f"{f.parent}/{allowed_files.index(f.name)}"
137
- )
138
-
139
- # Search for an existing record with this name
140
- existing = login.search(f'"{sxs_system}"')
141
- exists = existing.get("hits", {}).get("total", 0) > 0
142
-
143
- if exists:
144
- # Find the latest version
145
- latest = max(
146
- existing["hits"]["hits"],
147
- key=lambda h: SimpleVersion(h["metadata"]["version"])
148
- )
149
- latest_link = latest["links"]["self"].replace("/api/", "/")
150
- latest_doi = latest["metadata"]["doi"]
151
-
152
- # Check to see if any files have changed since the latest version
153
- utime = datetime.datetime.fromisoformat(latest["updated"])
154
- files_to_upload = filter_files(files, utime)
155
- if not files:
156
- print(f"No changes to {latest_link}.")
157
- return latest_link
158
-
159
- # Update version, relatedIdentifiers, and updated date
160
- metadata = latest["metadata"]
161
- old_version = SimpleVersion(metadata["version"])
162
- metadata["version"] = str(old_version.increment())
163
- metadata["relatedIdentifiers"] =[
164
- {
165
- "relatedIdentifierRelation": "IsNewVersionOf",
166
- "relatedIdentifier": latest_doi,
167
- "relatedIdentifierScheme": "DOI"
168
- }
169
- ]
170
- metadata["relevantDates"] = [
171
- {
172
- "relevantDateType": "Updated",
173
- "relevantDateValue": datetime.datetime.utcnow().strftime("%Y-%m-%d"),
174
- }
175
- ]
176
-
177
- # Upload files
178
- raise NotImplementedError(
179
- "Adding new files to an existing record is not yet supported. We will need some logic like this:\n" +
180
- " https://github.com/caltechlibrary/caltechdata_api/blob/840a359a017d1257ac344b6af80038f591fd8a97/caltechdata_api/caltechdata_edit.py#L189-L233\n" +
181
- "More specifically, if we want to create a new version, we will probably need to essentially copy\n" +
182
- "the existing record, publish it, and then follow steps like the above to upload new files and\n" +
183
- "delete the old ones. It is not clear to me if the deletion would destroy files being used by the\n" +
184
- "older version, or just delete that entry in the new record."
185
- )
186
- existing_names = {
187
- f["electronic_name"][0]: f["uniform_resource_identifier"].split("/")[-2]
188
- for f in metadata["electronic_location_and_access"]
189
- }
190
- for f in files_to_upload:
191
- fileinfo = login.send_s3(f, str(f.relative_to(directory)), verbose=True)
192
- name = fileinfo["filename"]
193
- if name in existing_names:
194
- metadata["files"][existing_names.index(name)] = fileinfo
195
- else:
196
- metadata["files"].append(fileinfo)
197
-
198
- else:
199
- # Set up the metadata. Note that CaltechDATA internally uses an undocumented
200
- # customized schema similar to — but different from — the standard Invenio
201
- # schema. Fortunately, `caltechdata_api` includes a function to convert from
202
- # DataCite 4.0 or 4.3 to their schema. So, we set ours up as DataCite 4.0 and
203
- # convert it: <https://schema.datacite.org/meta/kernel-4.0/>
204
-
205
- # Generate a little table of info to append to the description
206
- highest_lev = sorted(set(f.relative_to(directory).parent for f in files))[-1]
207
- sim_metadata = Metadata.from_file(directory / highest_lev / "metadata")
208
- object_types = sim_metadata.get("object_types", "")
209
- mass_ratio = sim_metadata.get("reference_mass_ratio", "Unknown")
210
- if "reference_dimensionless_spin1" in sim_metadata:
211
- chi1 = sim_metadata.reference_dimensionless_spin1
212
- chi1 = f"[{chi1[0]:0.4f}, {chi1[1]:0.4f}, {chi1[2]:0.4f}]"
213
- else:
214
- chi1 = "N/A"
215
- if "reference_dimensionless_spin2" in sim_metadata:
216
- chi2 = sim_metadata.reference_dimensionless_spin2
217
- chi2 = f"[{chi2[0]:0.4f}, {chi2[1]:0.4f}, {chi2[2]:0.4f}]"
218
- else:
219
- chi2 = "N/A"
220
- n_orbits = sim_metadata.get("number_of_orbits", "Unknown")
221
- reference_eccentricity = sim_metadata.get("reference_eccentricity", "Unknown")
222
- description = f"""{description}
223
- <br/>
224
- <table>
225
- <tbody>
226
- <tr><td style="padding:0 15px;">Mass ratio</td><td>{mass_ratio:.4f}</td></tr>
227
- <tr><td style="padding:0 15px;">Spin 1</td><td>{chi1}</td></tr>
228
- <tr><td style="padding:0 15px;">Spin 2</td><td>{chi2}</td></tr>
229
- <tr><td style="padding:0 15px;">Number of orbits</td><td>{n_orbits:.4f}</td></tr>
230
- <tr><td style="padding:0 15px;">Eccentricity</td><td>{reference_eccentricity}</td></tr>
231
- </tbody>
232
- </table>
233
- <br/>
234
- Originally named "{original_name}"
235
- """
236
- files_to_upload = files
237
-
238
- metadata = {
239
- "resourceType": {
240
- "resourceTypeGeneral": "Dataset"
241
- },
242
- "titles": [
243
- {"title": title},
244
- ],
245
- "version": "2.0",
246
- "creators": [{"creatorName": "SXS Collaboration"}],
247
- "descriptions": [
248
- {
249
- "descriptionType": "Abstract",
250
- "description": description,
251
- }
252
- ],
253
- "subjects": [
254
- {"subject": "Astronomy"},
255
- {"subject": "Astrophysics"},
256
- {"subject": "Gravitational Waves"},
257
- {"subject": "Numerical Relativity"},
258
- ],
259
- "dates": [
260
- {
261
- "date": datetime.datetime.utcnow().strftime("%Y-%m-%d"),
262
- "dateType": "Updated"
263
- },
264
- ],
265
- "rightsList": [
266
- {
267
- "rights": "Creative Commons Attribution 4.0 International License",
268
- "rightsURI": "https://creativecommons.org/licenses/by/4.0/",
269
- }
270
- ],
271
- }
272
- if "bh" in object_types.lower():
273
- metadata["subjects"].append({"subject": "Black Holes"})
274
- if "ns" in object_types.lower():
275
- metadata["subjects"].append({"subject": "Neutron Stars"})
276
- metadata = customize_schema(metadata, schema="40")
277
- metadata["publicationDate"] = date or datetime.datetime.utcnow().strftime("%Y-%m-%d")
278
- metadata["files"] = [login.send_s3(f, str(f.relative_to(directory)), verbose=True) for f in files]
279
-
280
- metadata["titles"] = [
281
- {"title": title},
282
- {"title": f"{original_name}", "titleType": "AlternativeTitle"},
283
- ]
284
-
285
- # if doi is None:
286
- # # We want tind to generate the identifier
287
- # metadata["final_actions"] = [
288
- # {
289
- # "type": "create_doi",
290
- # "parameters": {"type": "records", "field": "doi"},
291
- # }
292
- # ]
293
- # else:
294
- metadata["doi"] = f"""{login.doi_prefix}/{sxs_system}v{metadata["version"]}"""
295
-
296
- # Now tell CaltechDATA about it
297
- recordurl = f"{login.base_url}submit/api/create/"
298
- response = login.session.post(recordurl, data=json.dumps({"record": metadata}))
299
- if response.status_code != 200:
300
- print(f"An error occurred when trying to create a new record for '{directory}'.")
301
- try:
302
- print(response.text)
303
- except:
304
- pass
305
- try:
306
- print(response.json())
307
- except:
308
- pass
309
- response.raise_for_status()
310
- raise RuntimeError() # Will only happen if the response was not strictly an error
311
- else:
312
- print(f""" Publishing as "{title}".""")
313
- print(f""" {response.text}""")
314
-
315
- url = response.text[response.text.find(login.base_url):].rstrip().rstrip(".")
316
- pid = url.rsplit("/", 1)[1]
317
-
318
- # Create the DOI
319
- print(" Creating/updating DOIs")
320
- api_url = f"{login.base_url}api/record/{pid}"
321
- for retry in range(20):
322
- r = login.session.get(api_url)
323
- if r.status_code == 200:
324
- break
325
- time.sleep(1.1) # Let things settle down at CaltechDATA, so we can get the metadata
326
- if r.status_code != 200:
327
- print(f"""An error occurred when trying to access "{api_url}".""")
328
- try:
329
- print(r.json())
330
- except:
331
- pass
332
- r.raise_for_status()
333
- raise RuntimeError() # Will only happen if the response was not strictly an error
334
- output_metadata = r.json()
335
- doi_metadata = decustomize_schema(output_metadata, schema="43")
336
- if not exists:
337
- doi = f"""{login.doi_prefix}/{sxs_system}"""
338
- login.datacite.public_doi(doi_metadata, url, doi)
339
- doi = f"""{login.doi_prefix}/{sxs_system}v{metadata["version"]}"""
340
- login.datacite.public_doi(doi_metadata, url, doi)
341
-
342
- return url
@@ -1,85 +0,0 @@
1
- format_description = """New catalog format
2
-
3
- {
4
- "sxs_format": "catalog_v2"
5
- "modified": "<YYYY-MM-DDThh:mm:ss.ssssss>", # UTC time of last-modified record in this file
6
-
7
- "records": { # Includes *all* versions
8
- "<sxs_id_versioned>": { # SXS:(BBH|BHNS|NSNS):[0-9]{4,}v[0-9]{2,}
9
- "title": "<title>" # Same as ["metadata"]["title"],
10
- "id": <id>, # ~7-digit integer uniquely identifying this record
11
- "conceptrecid": "<conceptrecid>", # ~7-digit integer (as string) collectively identifying all versions of this record
12
- "created": "<YYYY-MM-DDThh:mm:ss.ssssss>", # UTC time of creation of this record on Zenodo
13
- "modified": "<YYYY-MM-DDThh:mm:ss.ssssss>", # (UTC) Last modification of this record (possibly just Zenodo metadata modified)
14
- "bucket": "https://zenodo.org/api/files/<uuid>", # Base URL for file uploads and downloads,
15
- "files": [
16
- {
17
- "checksum": "<checksum>", # MD5 checksum of file on Zenodo
18
- "filename": "<filename>", # Name of file; may contain slashes denoting directories
19
- "filesize": <filesize>, # Number of bytes in the file
20
- "id": "<fileid>", # A standard UUID (hexadecimal with characters in the pattern 8-4-4-4-12)
21
- },
22
- ... # Other file descriptions in the order in which they were uploaded (not necessarily a meaningful order)
23
- ]
24
- },
25
- ...
26
- },
27
-
28
- "simulations": { # Physical data (masses, spins, etc.) for all available SXS simulations, in the most recent version
29
- "<sxs_id>": { # The SXS ID is a string like SXS:BHNS:0001 or SXS:BBH:1234
30
- "latest_version": "<sxs_id_versioned>", # Entry in "records" containing the most recent version of this simulation's data
31
- "url": "<URL>", # The URL of the Zenodo "conceptdoi" link, which *resolves to* the most-recent version
32
- #
33
- # NOTE: All of the following may be absent if this simulation is closed-access, or simply does not have metadata.
34
- #
35
- # Variable content describing (mostly) physical parameters of the system. It's basically a
36
- # python-compatible version of the information contained in "metadata.txt" from the
37
- # highest-resolution run in the most-recent version of this simulation. That file is meant to
38
- # be more-or-less as suggested in <https://arxiv.org/abs/0709.0093>. The conversion to a
39
- # python-compatible format means that keys like "simulation-name" have had hyphens replaced by
40
- # underscores so that they can be used as variable names in python and any other sane language
41
- # (with apologies to Lisp). As far as possible, values that are just strings in that file
42
- # have been converted into the relevant types -- like numbers, integers, and arrays. Note
43
- # that some keys like eccentricity are sometimes numbers and sometimes the string "<number"
44
- # (meaning that the eccentricity is less than the number), which is necessarily a string.
45
- #
46
- # Below are just the first few keys that *may* be present. Note that closed-access
47
- # simulations will have empty dictionaries here.
48
- #
49
- "simulation_name": "<directory_name>", # This may be distinctly uninformative
50
- "alternative_names": "<some ugly thing>, ..., <sxs_id>", # This may be a list of strings
51
- "initial_data_type": "<type>", # Something like "BBH_CFMS"
52
- "object_types": "<type>", # Currently "BHBH", "BHNS", or "NSNS"
53
- "number_of_orbits": <number>, # This is a float, rather than an integer
54
- "reference_mass_ratio": <q>, # Usually greater than 1 (exceptions are due to junk radiation)
55
- "reference_chi_eff": <chi_eff>, # Dimensionless effective spin quantity
56
- "reference_chi1_perp": <chi1_perp>, # Magnitude of component of chi1 orthogonal to "reference_orbital_frequency"
57
- "reference_chi2_perp": <chi2_perp>, # Magnitude of component of chi2 orthogonal to "reference_orbital_frequency"
58
- "reference_mass1": <m2>,
59
- "reference_mass2": <m1>,
60
- "reference_dimensionless_spin1": [
61
- <chi1_x>,
62
- <chi1_y>,
63
- <chi1_z>
64
- ],
65
- "reference_dimensionless_spin2": [
66
- <chi2_x>,
67
- <chi2_y>,
68
- <chi2_z>
69
- ],
70
- "reference_eccentricity": <eccentricity>, # A float or possibly a string containing "<" and a float
71
- "reference_orbital_frequency": [
72
- <omega_x>,
73
- <omega_y>,
74
- <omega_z>
75
- ],
76
- "reference_time": <time>,
77
- ...
78
- },
79
- ...
80
- }
81
-
82
- }
83
-
84
-
85
- """