xcpcio 0.63.3__py3-none-any.whl → 0.63.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xcpcio might be problematic. Click here for more details.

xcpcio/__init__.py CHANGED
@@ -1,5 +1,4 @@
1
- from . import constants, types
1
+ from . import ccs, constants, types
2
+ from .__version__ import __version__
2
3
 
3
- __version__ = "0.63.3"
4
-
5
- __all__ = [constants, types]
4
+ __all__ = [constants, types, ccs, __version__]
xcpcio/__version__.py ADDED
@@ -0,0 +1 @@
1
+ __version__ = "0.63.5"
xcpcio/ccs/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
- from . import ccs, model
1
+ from . import contest_archiver, model
2
2
 
3
- __all__ = [model, ccs]
3
+ __all__ = [model, contest_archiver]
@@ -0,0 +1,439 @@
1
+ """
2
+ CCS Contest API Archiver
3
+ A tool to archive contest data from a CCS API and organize it into a contest package.
4
+
5
+ Based on the CCS Contest API specification:
6
+ https://ccs-specs.icpc.io/2023-06/contest_api
7
+
8
+ And the Contest Package specification:
9
+ https://ccs-specs.icpc.io/2023-06/contest_package
10
+ """
11
+
12
+ import asyncio
13
+ import json
14
+ import logging
15
+ from dataclasses import dataclass
16
+ from pathlib import Path
17
+ from typing import Any, Dict, List, Optional
18
+ from urllib.parse import urljoin
19
+
20
+ import aiofiles
21
+ import aiohttp
22
+ import semver
23
+ from tenacity import before_sleep_log, retry, retry_if_exception_type, stop_after_attempt, wait_exponential
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+
28
+ @dataclass
29
+ class APICredentials:
30
+ """API authentication credentials"""
31
+
32
+ username: Optional[str] = None
33
+ password: Optional[str] = None
34
+ token: Optional[str] = None
35
+
36
+
37
+ @dataclass
38
+ class ArchiveConfig:
39
+ """Configuration for the contest dump operation"""
40
+
41
+ base_url: str
42
+ contest_id: str
43
+ credentials: APICredentials
44
+ output_dir: Path
45
+ include_files: bool = True
46
+ endpoints: Optional[List[str]] = None
47
+ timeout: int = 30
48
+ max_concurrent: int = 10
49
+ include_event_feed: bool = False
50
+
51
+
52
+ class ContestArchiver:
53
+ """
54
+ Main class for archiving contest data from CCS API to contest package format.
55
+
56
+ This tool fetches data from all relevant API endpoints and organizes them
57
+ according to the Contest Package specification.
58
+ """
59
+
60
+ # Known endpoints that can be fetched
61
+ KNOWN_ENDPOINTS = [
62
+ "access",
63
+ "contests",
64
+ "judgement-types",
65
+ "languages",
66
+ "problems",
67
+ "groups",
68
+ "organizations",
69
+ "teams",
70
+ "persons",
71
+ "accounts",
72
+ "state",
73
+ "submissions",
74
+ "judgements",
75
+ "runs",
76
+ "clarifications",
77
+ "awards",
78
+ "commentary",
79
+ "scoreboard",
80
+ ]
81
+
82
+ DOMJUDGE_KNOWN_ENDPOINTS = [
83
+ "access",
84
+ "contests",
85
+ "judgement-types",
86
+ "languages",
87
+ "problems",
88
+ "groups",
89
+ "organizations",
90
+ "teams",
91
+ "accounts",
92
+ "state",
93
+ "submissions",
94
+ "judgements",
95
+ "runs",
96
+ "clarifications",
97
+ "awards",
98
+ "scoreboard",
99
+ ]
100
+
101
+ def __init__(self, config: ArchiveConfig):
102
+ self._config = config
103
+ self._session: Optional[aiohttp.ClientSession] = None
104
+ self._semaphore = asyncio.Semaphore(config.max_concurrent)
105
+ self._api_info: Optional[Dict[str, Any]] = None
106
+ self._provider_name: Optional[str] = None
107
+ self._provider_version: Optional[semver.VersionInfo] = None
108
+
109
+ # Create output directory
110
+ self._config.output_dir.mkdir(parents=True, exist_ok=True)
111
+
112
+ def _build_url(self, endpoint: str) -> str:
113
+ """Build API URL ensuring proper path joining"""
114
+ # Ensure base_url ends with / and endpoint doesn't start with /
115
+ base = self._config.base_url.rstrip("/") + "/"
116
+ endpoint = endpoint.lstrip("/")
117
+ return urljoin(base, endpoint)
118
+
119
+ async def __aenter__(self):
120
+ """Async context manager entry"""
121
+ await self.start_session()
122
+ return self
123
+
124
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
125
+ """Async context manager exit"""
126
+ await self.close_session()
127
+
128
+ async def start_session(self):
129
+ """Initialize the HTTP session with authentication"""
130
+ # Setup authentication
131
+ auth = None
132
+ headers = {}
133
+
134
+ if self._config.credentials.username and self._config.credentials.password:
135
+ auth = aiohttp.BasicAuth(self._config.credentials.username, self._config.credentials.password)
136
+ elif self._config.credentials.token:
137
+ headers["Authorization"] = f"Bearer {self._config.credentials.token}"
138
+
139
+ self._session = aiohttp.ClientSession(auth=auth, headers=headers)
140
+
141
+ async def close_session(self):
142
+ """Close the HTTP session"""
143
+ if self._session:
144
+ await self._session.close()
145
+
146
+ @retry(
147
+ stop=stop_after_attempt(3),
148
+ wait=wait_exponential(multiplier=1, min=1, max=10),
149
+ retry=retry_if_exception_type((asyncio.TimeoutError, aiohttp.ClientError)),
150
+ before_sleep=before_sleep_log(logger, logging.WARNING),
151
+ reraise=True,
152
+ )
153
+ async def _fetch_json_internal(self, url: str, override_timeout: Optional[int] = None) -> Optional[Dict[str, Any]]:
154
+ """Internal fetch method with retry logic"""
155
+ logger.info(f"Fetching {url}")
156
+ timeout = aiohttp.ClientTimeout(total=override_timeout or self._config.timeout)
157
+ async with self._session.get(url, timeout=timeout) as response:
158
+ if response.status == 404:
159
+ logger.warning(f"Endpoint not found: {url}")
160
+ return None
161
+ elif response.status != 200:
162
+ raise aiohttp.ClientResponseError(
163
+ request_info=response.request_info, history=response.history, status=response.status
164
+ )
165
+
166
+ data = await response.json()
167
+ logger.debug(f"Fetched {len(str(data))} bytes from {url}")
168
+ return data
169
+
170
+ async def fetch_json(self, endpoint: str, override_timeout: Optional[int] = None) -> Optional[Dict[str, Any]]:
171
+ """Fetch JSON data from an API endpoint"""
172
+ url = self._build_url(endpoint)
173
+
174
+ async with self._semaphore:
175
+ try:
176
+ return await self._fetch_json_internal(url, override_timeout)
177
+ except Exception as e:
178
+ logger.error(f"Failed to fetch. [url={url}] [err={e}]")
179
+ return None
180
+
181
+ @retry(
182
+ stop=stop_after_attempt(3),
183
+ wait=wait_exponential(multiplier=1, min=1, max=10),
184
+ retry=retry_if_exception_type((asyncio.TimeoutError, aiohttp.ClientError)),
185
+ before_sleep=before_sleep_log(logger, logging.WARNING),
186
+ reraise=True,
187
+ )
188
+ async def _fetch_file_internal(
189
+ self, file_url: str, output_path: Path, override_timeout: Optional[int] = None
190
+ ) -> bool:
191
+ """Internal file download method with retry logic"""
192
+ logger.info(f"Downloading {file_url} -> {output_path}")
193
+ output_path.parent.mkdir(parents=True, exist_ok=True)
194
+
195
+ timeout = aiohttp.ClientTimeout(total=override_timeout or self._config.timeout)
196
+ async with self._session.get(file_url, timeout=timeout) as response:
197
+ if response.status != 200:
198
+ raise aiohttp.ClientResponseError(
199
+ request_info=response.request_info, history=response.history, status=response.status
200
+ )
201
+
202
+ async with aiofiles.open(output_path, "wb") as f:
203
+ async for chunk in response.content.iter_chunked(8192):
204
+ await f.write(chunk)
205
+
206
+ logger.debug(f"Downloaded {output_path}")
207
+ return True
208
+
209
+ async def fetch_file(
210
+ self, file_url: Optional[str], output_path: Path, override_timeout: Optional[int] = None
211
+ ) -> bool:
212
+ """Download a file from URL to local path"""
213
+ if not file_url:
214
+ return False
215
+
216
+ # Handle relative URLs
217
+ if not file_url.startswith(("http://", "https://")):
218
+ file_url = self._build_url(file_url)
219
+
220
+ async with self._semaphore:
221
+ try:
222
+ return await self._fetch_file_internal(file_url, output_path, override_timeout)
223
+ except Exception as e:
224
+ logger.error(f"Failed to download {file_url} after retries: {e}")
225
+ return False
226
+
227
+ def _get_file_output_path(
228
+ self, filename: str, base_path: Optional[str] = None, object_id: Optional[str] = None
229
+ ) -> Path:
230
+ """Get the output path for a file reference"""
231
+ res_dir = self._config.output_dir
232
+ if base_path:
233
+ res_dir /= base_path
234
+ if object_id:
235
+ res_dir /= object_id
236
+ return res_dir / filename
237
+
238
+ def _collect_file_references(self, data: Any, base_path: str, object_id: Optional[str] = None) -> List[tuple]:
239
+ """Collect all file references found in data"""
240
+ file_refs = []
241
+
242
+ if isinstance(data, dict):
243
+ # Handle file reference objects
244
+ if "href" in data and "filename" in data:
245
+ output_path = self._get_file_output_path(data["filename"], base_path, object_id)
246
+ file_refs.append((data["href"], output_path))
247
+
248
+ # Recursively check other dict values
249
+ for value in data.values():
250
+ file_refs.extend(self._collect_file_references(value, base_path, object_id))
251
+
252
+ elif isinstance(data, list):
253
+ # Handle arrays of file references or other objects
254
+ for item in data:
255
+ file_refs.extend(self._collect_file_references(item, base_path, object_id))
256
+
257
+ return file_refs
258
+
259
+ async def _download_file_references(self, data: Any, base_path: str, object_id: Optional[str] = None):
260
+ """Download all file references found in data in parallel"""
261
+ if not self._config.include_files:
262
+ return
263
+
264
+ # Collect all file references first
265
+ file_refs = self._collect_file_references(data, base_path, object_id)
266
+
267
+ if not file_refs:
268
+ return
269
+
270
+ # Download all files in parallel (controlled by self.semaphore)
271
+ download_tasks = [self.fetch_file(href, output_path) for href, output_path in file_refs]
272
+
273
+ if download_tasks:
274
+ await asyncio.gather(*download_tasks, return_exceptions=True)
275
+
276
+ async def save_data(self, filename: str, data: Any):
277
+ """Save data to file in JSON format"""
278
+ file_path = self._config.output_dir / filename
279
+ content = json.dumps(data, indent=2, ensure_ascii=False)
280
+
281
+ async with aiofiles.open(file_path, "w", encoding="utf-8") as f:
282
+ await f.write(content)
283
+
284
+ logger.info(f"Saved {file_path}")
285
+
286
+ async def dump_api_info(self):
287
+ """Dump API root endpoint information"""
288
+ logger.info("Dumping API information...")
289
+
290
+ data = await self.fetch_json("/")
291
+ if not data:
292
+ raise RuntimeError("Failed to fetch API information from root endpoint")
293
+
294
+ self._api_info = data # Store API info for later use
295
+
296
+ # Parse provider information
297
+ if "provider" in data:
298
+ provider: Dict = data.get("provider", {})
299
+ self._provider_name = provider.get("name", "")
300
+
301
+ # Parse version string to semver.VersionInfo
302
+ version_str: str = provider.get("version", "")
303
+ if version_str:
304
+ try:
305
+ # Clean version string: "8.3.1/3324986cd" -> "8.3.1", "9.0.0DEV/26e89f701" -> "9.0.0-dev"
306
+ version_clean = version_str.split("/")[0]
307
+ # Convert DEV suffix to semver prerelease format
308
+ if version_clean.endswith("DEV"):
309
+ version_clean = version_clean[:-3] + "-dev"
310
+
311
+ self._provider_version = semver.VersionInfo.parse(version_clean)
312
+ logger.info(
313
+ f"Detected API provider: {self._provider_name} version {version_str} (parsed: {self._provider_version})"
314
+ )
315
+ except (ValueError, TypeError) as e:
316
+ logger.warning(f"Could not parse version string: {version_str}, error: {e}")
317
+ self._provider_version = None
318
+ else:
319
+ logger.info(f"Detected API provider: {self._provider_name} (no version)")
320
+
321
+ await self.save_data("api.json", data)
322
+ await self._download_file_references(data, "api")
323
+
324
+ async def dump_contest_info(self):
325
+ """Dump contest information"""
326
+ logger.info("Dumping contest information...")
327
+
328
+ endpoint = f"contests/{self._config.contest_id}"
329
+ data = await self.fetch_json(endpoint)
330
+ if data:
331
+ await self.save_data("contest.json", data)
332
+ await self._download_file_references(data, "contest")
333
+
334
+ async def dump_endpoint_collection(self, endpoint: str):
335
+ """Dump a collection endpoint (returns array of objects)"""
336
+ logger.info(f"Dumping {endpoint}...")
337
+
338
+ api_endpoint = f"contests/{self._config.contest_id}/{endpoint}"
339
+ data = await self.fetch_json(api_endpoint)
340
+
341
+ if data is None:
342
+ return
343
+
344
+ await self.save_data(f"{endpoint}.json", data)
345
+
346
+ # Download files for each object in the collection
347
+ if isinstance(data, list):
348
+ for item in data:
349
+ if isinstance(item, dict) and "id" in item:
350
+ await self._download_file_references(item, endpoint, item["id"])
351
+
352
+ async def dump_endpoint_single(self, endpoint: str):
353
+ """Dump a single object endpoint"""
354
+ logger.info(f"Dumping {endpoint}...")
355
+
356
+ api_endpoint = f"contests/{self._config.contest_id}/{endpoint}"
357
+ data = await self.fetch_json(api_endpoint)
358
+
359
+ if data is None:
360
+ return
361
+
362
+ await self.save_data(f"{endpoint}.json", data)
363
+ await self._download_file_references(data, endpoint)
364
+
365
+ async def dump_event_feed(self):
366
+ """Dump event-feed endpoint with stream=false parameter"""
367
+ logger.info("Dumping event-feed...")
368
+
369
+ api_endpoint = f"contests/{self._config.contest_id}/event-feed?stream=false"
370
+ # Use extended timeout for event-feed as it may contain large amounts of data
371
+ await self.fetch_file(
372
+ api_endpoint,
373
+ output_path=self._get_file_output_path("event-feed.ndjson"),
374
+ override_timeout=self._config.timeout * 10,
375
+ )
376
+
377
+ async def get_available_endpoints(self) -> List[str]:
378
+ """Get list of available endpoints based on API provider and version"""
379
+ # Check if it's DOMjudge with version < 9.0.0
380
+ if self._provider_name == "DOMjudge" and self._provider_version and self._provider_version.major < 9:
381
+ logger.info(f"Using DOMjudge known endpoints for version < 9.0.0 (detected: {self._provider_version})")
382
+ return self.DOMJUDGE_KNOWN_ENDPOINTS
383
+
384
+ # For other providers or DOMjudge >= 9.0.0, try to get from access endpoint
385
+ access_data = await self.fetch_json(f"contests/{self._config.contest_id}/access")
386
+
387
+ if not access_data or "endpoints" not in access_data:
388
+ logger.warning("Could not fetch access info, using default endpoints")
389
+ return self.KNOWN_ENDPOINTS
390
+
391
+ available = [ep["type"] for ep in access_data["endpoints"]]
392
+ logger.info(f"Available endpoints: {available}")
393
+ return available
394
+
395
+ async def dump_all(self):
396
+ """Dump all contest data"""
397
+ logger.info(
398
+ f"Starting contest archive. [base_url={self._config.base_url}] [contest_id={self._config.contest_id}]"
399
+ )
400
+
401
+ # Always dump API and contest info
402
+ await self.dump_api_info()
403
+ await self.dump_contest_info()
404
+
405
+ # Get list of endpoints to dump
406
+ if self._config.endpoints:
407
+ endpoints = self._config.endpoints
408
+ else:
409
+ endpoints = await self.get_available_endpoints()
410
+
411
+ # Remove 'contest' endpoint as it's already handled by dump_contest_info
412
+ if "contest" in endpoints:
413
+ endpoints = [ep for ep in endpoints if ep != "contest"]
414
+
415
+ # Single object endpoints
416
+ single_endpoints = ["state", "scoreboard"]
417
+
418
+ # Collection endpoints
419
+ collection_endpoints = [ep for ep in endpoints if ep not in single_endpoints]
420
+
421
+ # Dump all endpoints concurrently
422
+ tasks = []
423
+
424
+ for endpoint in single_endpoints:
425
+ if endpoint in endpoints:
426
+ tasks.append(self.dump_endpoint_single(endpoint))
427
+
428
+ for endpoint in collection_endpoints:
429
+ if endpoint in endpoints:
430
+ tasks.append(self.dump_endpoint_collection(endpoint))
431
+
432
+ if tasks:
433
+ await asyncio.gather(*tasks, return_exceptions=True)
434
+
435
+ # Dump event-feed if configured
436
+ if self._config.include_event_feed:
437
+ await self.dump_event_feed()
438
+
439
+ logger.info(f"Contest archive completed: {self._config.output_dir}")
@@ -1,21 +1,27 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: xcpcio
3
- Version: 0.63.3
3
+ Version: 0.63.5
4
4
  Summary: xcpcio python lib
5
- Author-email: Dup4 <hi@dup4.com>
6
- License-Expression: MIT
7
5
  Project-URL: homepage, https://github.com/xcpcio/xcpcio
8
6
  Project-URL: documentation, https://github.com/xcpcio/xcpcio
9
7
  Project-URL: repository, https://github.com/xcpcio/xcpcio
8
+ Author-email: Dup4 <hi@dup4.com>
9
+ License-Expression: MIT
10
10
  Keywords: xcpcio
11
- Classifier: Topic :: Software Development :: Build Tools
12
- Classifier: Topic :: Software Development :: Libraries :: Python Modules
13
11
  Classifier: Programming Language :: Python :: 3
14
12
  Classifier: Programming Language :: Python :: 3.11
15
13
  Classifier: Programming Language :: Python :: 3.12
16
14
  Classifier: Programming Language :: Python :: 3.13
15
+ Classifier: Topic :: Software Development :: Build Tools
16
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
17
17
  Requires-Python: >=3.11
18
- Description-Content-Type: text/markdown
18
+ Requires-Dist: aiofiles>=23.0.0
19
+ Requires-Dist: aiohttp>=3.8.0
20
+ Requires-Dist: click>=8.0.0
19
21
  Requires-Dist: pydantic>=2.11.7
22
+ Requires-Dist: pyyaml>=6.0.0
23
+ Requires-Dist: semver>=3.0.0
24
+ Requires-Dist: tenacity>=8.0.0
25
+ Description-Content-Type: text/markdown
20
26
 
21
27
  # xcpcio-python
@@ -0,0 +1,13 @@
1
+ xcpcio/__init__.py,sha256=kjd6itqBRSQ-OT83qUJXHt81KQQDRUtaIuykzfaWXLM,121
2
+ xcpcio/__version__.py,sha256=0Inh2IrO1TDWHZQNNWZG7RQoT8GKZUEeRoZnIhcVdWI,23
3
+ xcpcio/constants.py,sha256=MjpAgNXiBlUsx1S09m7JNT-nekNDR-aE6ggvGL3fg0I,2297
4
+ xcpcio/types.py,sha256=AkYby2haJgxwtozlgaPMG2ryAZdvsSc3sH-p6qXcM4g,6575
5
+ xcpcio/ccs/__init__.py,sha256=qjkSo9lS8dJyNqiGljCnbS1cr7J8jj72pqcCzf_v0Ig,75
6
+ xcpcio/ccs/contest_archiver.py,sha256=FKpUn1IGfa-UNf63OJ5eff7rxOEqXCvFYRLsvkMbUJc,16203
7
+ xcpcio/ccs/model/__init__.py,sha256=cZE1q5JY-iHDEKZpsx0UZaMhH-23H4oAHaYOkW7dZ5s,43
8
+ xcpcio/ccs/model/model_2023_06/__init__.py,sha256=OmDQZqmigBpL64LXk5lIOGoQ3Uqis8-2z6qQpOO5aJc,167
9
+ xcpcio/ccs/model/model_2023_06/model.py,sha256=bVMDWpJTwPSpz1fHPxWrWerxCBIboH3LKVZpIZGQ2pY,15287
10
+ xcpcio-0.63.5.dist-info/METADATA,sha256=KK0d6SVxSWCkNyD-6fYSO5GZnV2L9dT8FA_hE7o2hp0,950
11
+ xcpcio-0.63.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
12
+ xcpcio-0.63.5.dist-info/entry_points.txt,sha256=qvzh8oDJxIHqTN-rg2lRN6xR99AqxbWnlAQI7uzDibI,59
13
+ xcpcio-0.63.5.dist-info/RECORD,,
@@ -1,5 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.9.0)
2
+ Generator: hatchling 1.27.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
-
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ ccs-archiver = cli.ccs_archiver_cli:main
@@ -1,11 +0,0 @@
1
- xcpcio/__init__.py,sha256=OxVA1dNXmgYMDFhZ2iZFG5LyC3_3V6bnh2Hq8PVywZ4,85
2
- xcpcio/constants.py,sha256=MjpAgNXiBlUsx1S09m7JNT-nekNDR-aE6ggvGL3fg0I,2297
3
- xcpcio/types.py,sha256=AkYby2haJgxwtozlgaPMG2ryAZdvsSc3sH-p6qXcM4g,6575
4
- xcpcio/ccs/__init__.py,sha256=QUkMIexjQPFLsns0hQLkrItsIdCTcjnqjK1Ne4kdrKQ,49
5
- xcpcio/ccs/model/__init__.py,sha256=cZE1q5JY-iHDEKZpsx0UZaMhH-23H4oAHaYOkW7dZ5s,43
6
- xcpcio/ccs/model/model_2023_06/__init__.py,sha256=OmDQZqmigBpL64LXk5lIOGoQ3Uqis8-2z6qQpOO5aJc,167
7
- xcpcio/ccs/model/model_2023_06/model.py,sha256=bVMDWpJTwPSpz1fHPxWrWerxCBIboH3LKVZpIZGQ2pY,15287
8
- xcpcio-0.63.3.dist-info/METADATA,sha256=fRfPebx3QHVrLE-evbrHMM9RkNkAuRa_DYjg1BdAjBI,771
9
- xcpcio-0.63.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
10
- xcpcio-0.63.3.dist-info/top_level.txt,sha256=c0sKVaiJdzJma17sS0dqHsYvDtGav5iIbd3wyLWuDEA,7
11
- xcpcio-0.63.3.dist-info/RECORD,,
@@ -1 +0,0 @@
1
- xcpcio