soracom-lib 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- soracom_lib-0.1.0/LICENSE +15 -0
- soracom_lib-0.1.0/PKG-INFO +321 -0
- soracom_lib-0.1.0/README.md +306 -0
- soracom_lib-0.1.0/pyproject.toml +28 -0
- soracom_lib-0.1.0/setup.cfg +4 -0
- soracom_lib-0.1.0/soracom_lib.egg-info/PKG-INFO +321 -0
- soracom_lib-0.1.0/soracom_lib.egg-info/SOURCES.txt +11 -0
- soracom_lib-0.1.0/soracom_lib.egg-info/dependency_links.txt +1 -0
- soracom_lib-0.1.0/soracom_lib.egg-info/requires.txt +1 -0
- soracom_lib-0.1.0/soracom_lib.egg-info/top_level.txt +1 -0
- soracom_lib-0.1.0/soracomlib/init.py +25 -0
- soracom_lib-0.1.0/soracomlib/soracom_harvest_files.py +439 -0
- soracom_lib-0.1.0/soracomlib/soracom_lib.py +70 -0
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
ApacheLicense2.0
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025-2026 tkxu
|
|
4
|
+
|
|
5
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
you may not use this file except in compliance with the License.
|
|
7
|
+
You may obtain a copy of the License at
|
|
8
|
+
|
|
9
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
|
|
11
|
+
Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
See the License for the specific language governing permissions and
|
|
15
|
+
limitations under the License.
|
|
@@ -0,0 +1,321 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: soracom-lib
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: SORACOM Harvest Files utility library
|
|
5
|
+
Author: tkxu
|
|
6
|
+
License: Apache-2.0
|
|
7
|
+
Classifier: Programming Language :: Python :: 3
|
|
8
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
9
|
+
Classifier: Operating System :: OS Independent
|
|
10
|
+
Requires-Python: >=3.8
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
License-File: LICENSE
|
|
13
|
+
Requires-Dist: requests>=2.28
|
|
14
|
+
Dynamic: license-file
|
|
15
|
+
|
|
16
|
+
# soracom_lib
|
|
17
|
+
|
|
18
|
+
A lightweight Python client library for [SORACOM Harvest Files](https://developers.soracom.io/en/docs/harvest/).
|
|
19
|
+
|
|
20
|
+
Provides authentication, iterative file listing, download, upload, and related helpers.
|
|
21
|
+
All functionality lives in `soracom_harvest_files.py`.
|
|
22
|
+
|
|
23
|
+
No dependencies beyond the standard library and `requests`.
|
|
24
|
+
|
|
25
|
+
---
|
|
26
|
+
|
|
27
|
+
## Features
|
|
28
|
+
|
|
29
|
+
- Credential loading from `_netrc` / `.netrc` (no credentials in source code)
|
|
30
|
+
- Authentication via SORACOM SAM user — wraps API key + token into a reusable dict
|
|
31
|
+
- Iterative directory traversal with optional time-range filter and pagination
|
|
32
|
+
- Per-file download with skip-existing and overwrite options
|
|
33
|
+
- File upload with automatic `Content-Type` detection
|
|
34
|
+
- Recency helper for filtering by last-modified time
|
|
35
|
+
|
|
36
|
+
---
|
|
37
|
+
|
|
38
|
+
## Requirements
|
|
39
|
+
|
|
40
|
+
T.B.D.
|
|
41
|
+
|
|
42
|
+
---
|
|
43
|
+
|
|
44
|
+
## Installation
|
|
45
|
+
|
|
46
|
+
No package installation is needed. Copy the file into your project:
|
|
47
|
+
|
|
48
|
+
```
|
|
49
|
+
your_project/
|
|
50
|
+
└── soracom_harvest_files.py
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
Install the only external dependency:
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
pip install requests
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
---
|
|
60
|
+
|
|
61
|
+
## Credentials setup
|
|
62
|
+
|
|
63
|
+
`soracom_harvest_files` never reads credentials from source code or environment variables.
|
|
64
|
+
It uses the standard `netrc` mechanism so secrets stay out of your repository.
|
|
65
|
+
|
|
66
|
+
### 1. Create a SORACOM SAM user
|
|
67
|
+
|
|
68
|
+
Generate an **Auth Key** in the [SORACOM User Console](https://console.soracom.io/) under **Security → SAM Users**.
|
|
69
|
+
|
|
70
|
+
### 2. Write the netrc file
|
|
71
|
+
|
|
72
|
+
**Windows** — create `%USERPROFILE%\_netrc`:
|
|
73
|
+
|
|
74
|
+
```
|
|
75
|
+
machine api.soracom.io
|
|
76
|
+
login keyId-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
|
|
77
|
+
password secret-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
**Linux / macOS** — create `~/.netrc` and restrict permissions:
|
|
81
|
+
|
|
82
|
+
```bash
|
|
83
|
+
cat >> ~/.netrc << 'EOF'
|
|
84
|
+
machine api.soracom.io
|
|
85
|
+
login keyId-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
|
|
86
|
+
password secret-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
|
|
87
|
+
EOF
|
|
88
|
+
chmod 600 ~/.netrc
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
### 3. Add netrc to .gitignore
|
|
92
|
+
|
|
93
|
+
```
|
|
94
|
+
_netrc
|
|
95
|
+
.netrc
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
---
|
|
99
|
+
|
|
100
|
+
## Quick start
|
|
101
|
+
|
|
102
|
+
```python
|
|
103
|
+
import soracom_harvest_files as sf
|
|
104
|
+
|
|
105
|
+
# Load credentials (returns AuthInfo; raises RuntimeError on failure)
|
|
106
|
+
auth_info = sf.read_auth_keys()
|
|
107
|
+
|
|
108
|
+
# Build token dict from credentials
|
|
109
|
+
token = sf.authenticate(auth_info)
|
|
110
|
+
|
|
111
|
+
# List all files under a path
|
|
112
|
+
files = sf.list_files_iterative("logs/XXXXXXXXXXXXXXXXX/", token)
|
|
113
|
+
print(files)
|
|
114
|
+
|
|
115
|
+
# Download each file individually
|
|
116
|
+
import os
|
|
117
|
+
for remote_path in files:
|
|
118
|
+
local_path = os.path.join("./download", os.path.basename(remote_path))
|
|
119
|
+
sf.download_and_save(remote_path, local_path, token)
|
|
120
|
+
|
|
121
|
+
# Upload a file
|
|
122
|
+
sf.upload_file_to_soracom(
|
|
123
|
+
file_path="./results/output.log",
|
|
124
|
+
upload_path="logs/XXXXXXXXXXXXXXXX/output.log",
|
|
125
|
+
token=token,
|
|
126
|
+
)
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
---
|
|
130
|
+
|
|
131
|
+
## API reference
|
|
132
|
+
|
|
133
|
+
### `read_auth_keys(base_dir=None) -> AuthInfo`
|
|
134
|
+
|
|
135
|
+
Load credentials from `_netrc` or `.netrc` and return an `AuthInfo` object.
|
|
136
|
+
|
|
137
|
+
| Parameter | Type | Description |
|
|
138
|
+
|-----------|------|-------------|
|
|
139
|
+
| `base_dir` | `str \| None` | When provided, also searches for `.netrc` in this directory (takes priority over the home directory). |
|
|
140
|
+
|
|
141
|
+
Raises `RuntimeError` if no valid credentials are found.
|
|
142
|
+
|
|
143
|
+
---
|
|
144
|
+
|
|
145
|
+
### `AuthInfo`
|
|
146
|
+
|
|
147
|
+
Frozen dataclass holding the loaded credentials.
|
|
148
|
+
|
|
149
|
+
```python
|
|
150
|
+
@dataclass(frozen=True)
|
|
151
|
+
class AuthInfo:
|
|
152
|
+
api_key: str # authKeyId from netrc login field
|
|
153
|
+
api_token: str # authKey from netrc password field
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
---
|
|
157
|
+
|
|
158
|
+
### `authenticate(auth: AuthInfo) -> dict`
|
|
159
|
+
|
|
160
|
+
Wrap `AuthInfo` into a token dict that is passed to every subsequent API call.
|
|
161
|
+
|
|
162
|
+
```python
|
|
163
|
+
token = sf.authenticate(auth_info)
|
|
164
|
+
# → {"apiKey": "...", "token": "..."}
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
No network call is made.
|
|
168
|
+
|
|
169
|
+
---
|
|
170
|
+
|
|
171
|
+
### `list_files_iterative(base_path, token, *, limit=None, page_size=100, start_time=None, end_time=None) -> list[str]`
|
|
172
|
+
|
|
173
|
+
Iteratively traverse directories and list all files under `base_path` in Harvest Files.
|
|
174
|
+
|
|
175
|
+
| Parameter | Type | Description |
|
|
176
|
+
|-----------|------|-------------|
|
|
177
|
+
| `base_path` | `str` | Root path in Harvest Files, e.g. `"logs/XXXXX.../"` |
|
|
178
|
+
| `token` | `dict` | Token dict from `authenticate()` |
|
|
179
|
+
| `limit` | `int \| None` | Cap on total results; `None` = no limit |
|
|
180
|
+
| `page_size` | `int` | Number of entries per API request (default `100`, max `100`) |
|
|
181
|
+
| `start_time` | `datetime \| None` | Exclude files modified before this time. Naive datetimes are treated as UTC. |
|
|
182
|
+
| `end_time` | `datetime \| None` | Exclude files modified after this time. Naive datetimes are treated as UTC. |
|
|
183
|
+
|
|
184
|
+
Returns a list of full file path strings.
|
|
185
|
+
|
|
186
|
+
---
|
|
187
|
+
|
|
188
|
+
### `download_and_save(remote_path, local_path, token, *, overwrite=False, chunk_size=1048576) -> bool`
|
|
189
|
+
|
|
190
|
+
Download a single file from Harvest Files and save it to the local filesystem.
|
|
191
|
+
|
|
192
|
+
| Parameter | Type | Description |
|
|
193
|
+
|-----------|------|-------------|
|
|
194
|
+
| `remote_path` | `str` | File path in Harvest Files (e.g. `"logs/XXXXX.../file.log"`) |
|
|
195
|
+
| `local_path` | `str` | Destination path on local filesystem |
|
|
196
|
+
| `token` | `dict` | Token dict from `authenticate()` |
|
|
197
|
+
| `overwrite` | `bool` | If `False` (default) and the local file already exists, the download is skipped and `True` is returned |
|
|
198
|
+
| `chunk_size` | `int` | Streaming chunk size in bytes (default 1 MiB) |
|
|
199
|
+
|
|
200
|
+
Returns `True` on success (including skip), `False` on failure.
|
|
201
|
+
Parent directories are created automatically.
|
|
202
|
+
|
|
203
|
+
---
|
|
204
|
+
|
|
205
|
+
### `upload_file_to_soracom(file_path, upload_path, token) -> bool`
|
|
206
|
+
|
|
207
|
+
Upload a local file to Harvest Files via HTTP PUT.
|
|
208
|
+
`Content-Type` is detected automatically from the file extension; falls back to `application/octet-stream`.
|
|
209
|
+
|
|
210
|
+
Returns `True` on success (HTTP 200 or 204), `False` on failure.
|
|
211
|
+
|
|
212
|
+
---
|
|
213
|
+
|
|
214
|
+
### `get_with_auth(url, token) -> requests.Response | None`
|
|
215
|
+
|
|
216
|
+
Authenticated GET. Returns the response object on success (status < 400), or `None` on error.
|
|
217
|
+
|
|
218
|
+
---
|
|
219
|
+
|
|
220
|
+
### `delete_with_auth(url, token) -> requests.Response | None`
|
|
221
|
+
|
|
222
|
+
Authenticated DELETE. Returns the response object on success (status < 400), or `None` on error.
|
|
223
|
+
|
|
224
|
+
---
|
|
225
|
+
|
|
226
|
+
### `is_recent(last_modified_ms, within_seconds, *, now=None) -> bool`
|
|
227
|
+
|
|
228
|
+
Return `True` if `last_modified_ms` (milliseconds since epoch, as returned by the Harvest Files API) falls within the past `within_seconds` seconds.
|
|
229
|
+
|
|
230
|
+
| Parameter | Type | Description |
|
|
231
|
+
|-----------|------|-------------|
|
|
232
|
+
| `last_modified_ms` | `int` | Milliseconds since epoch |
|
|
233
|
+
| `within_seconds` | `int` | Threshold in seconds |
|
|
234
|
+
| `now` | `datetime \| None` | Reference time (UTC). Defaults to `datetime.now(timezone.utc)` |
|
|
235
|
+
|
|
236
|
+
```python
|
|
237
|
+
if sf.is_recent(entry["lastModifiedTime"], within_seconds=7 * 86_400):
|
|
238
|
+
print("Modified within the last week")
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
---
|
|
242
|
+
|
|
243
|
+
### `LEVEL_INFO` / `LEVEL_WARN` / `LEVEL_ERROR`
|
|
244
|
+
|
|
245
|
+
Log level constants used internally by `log_status()`. You can also pass these directly when calling `log_status()`.
|
|
246
|
+
|
|
247
|
+
```python
|
|
248
|
+
LEVEL_INFO = "INFO"
|
|
249
|
+
LEVEL_WARN = "WARN"
|
|
250
|
+
LEVEL_ERROR = "ERROR"
|
|
251
|
+
```
|
|
252
|
+
|
|
253
|
+
---
|
|
254
|
+
|
|
255
|
+
## Bulk download pattern
|
|
256
|
+
|
|
257
|
+
`download_and_save()` operates on one file at a time, giving you full control over size limits, delete-after-download, and error handling per file.
|
|
258
|
+
|
|
259
|
+
```python
|
|
260
|
+
import os
|
|
261
|
+
import soracom_harvest_files as sf
|
|
262
|
+
|
|
263
|
+
auth_info = sf.read_auth_keys()
|
|
264
|
+
token = sf.authenticate(auth_info)
|
|
265
|
+
|
|
266
|
+
files = sf.list_files_iterative("logs/XXXXXXXXXXXXXXXX/", token)
|
|
267
|
+
save_dir = "./download/XXXXXXXXXXXXXXXX"
|
|
268
|
+
os.makedirs(save_dir, exist_ok=True)
|
|
269
|
+
|
|
270
|
+
LIMIT_BYTES = 200 * 1024 * 1024 # 200 MB
|
|
271
|
+
downloaded_bytes = 0
|
|
272
|
+
|
|
273
|
+
for remote_path in sorted(files):
|
|
274
|
+
if downloaded_bytes >= LIMIT_BYTES:
|
|
275
|
+
break
|
|
276
|
+
|
|
277
|
+
local_path = os.path.join(save_dir, os.path.basename(remote_path))
|
|
278
|
+
ok = sf.download_and_save(remote_path, local_path, token)
|
|
279
|
+
|
|
280
|
+
if ok:
|
|
281
|
+
downloaded_bytes += os.path.getsize(local_path)
|
|
282
|
+
# Optionally delete from Harvest Files after download:
|
|
283
|
+
# sf.delete_with_auth(f"{sf.API_BASE}/files/private/{remote_path}", token)
|
|
284
|
+
```
|
|
285
|
+
|
|
286
|
+
---
|
|
287
|
+
|
|
288
|
+
## Logging
|
|
289
|
+
|
|
290
|
+
`soracom_harvest_files` uses Python's standard `logging` module under the logger name `soracom_harvest_files`.
|
|
291
|
+
By default a `NullHandler` is attached, so no output appears unless your application configures logging.
|
|
292
|
+
To integrate with your own logging config:
|
|
293
|
+
|
|
294
|
+
```python
|
|
295
|
+
import logging
|
|
296
|
+
|
|
297
|
+
# Show INFO and above from this library on stderr
|
|
298
|
+
logging.basicConfig(level=logging.INFO)
|
|
299
|
+
logging.getLogger("soracom_harvest_files").propagate = True
|
|
300
|
+
|
|
301
|
+
# Or suppress all output from this library
|
|
302
|
+
logging.getLogger("soracom_harvest_files").setLevel(logging.CRITICAL)
|
|
303
|
+
```
|
|
304
|
+
|
|
305
|
+
---
|
|
306
|
+
|
|
307
|
+
## License
|
|
308
|
+
|
|
309
|
+
Copyright (c) 2025-2026 tkxu
|
|
310
|
+
|
|
311
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
|
312
|
+
you may not use this file except in compliance with the License.
|
|
313
|
+
You may obtain a copy of the License at
|
|
314
|
+
|
|
315
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
|
316
|
+
|
|
317
|
+
Unless required by applicable law or agreed to in writing, software
|
|
318
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
|
319
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
320
|
+
See the License for the specific language governing permissions and
|
|
321
|
+
limitations under the License.
|
|
@@ -0,0 +1,306 @@
|
|
|
1
|
+
# soracom_lib
|
|
2
|
+
|
|
3
|
+
A lightweight Python client library for [SORACOM Harvest Files](https://developers.soracom.io/en/docs/harvest/).
|
|
4
|
+
|
|
5
|
+
Provides authentication, iterative file listing, download, upload, and related helpers.
|
|
6
|
+
All functionality lives in `soracom_harvest_files.py`.
|
|
7
|
+
|
|
8
|
+
No dependencies beyond the standard library and `requests`.
|
|
9
|
+
|
|
10
|
+
---
|
|
11
|
+
|
|
12
|
+
## Features
|
|
13
|
+
|
|
14
|
+
- Credential loading from `_netrc` / `.netrc` (no credentials in source code)
|
|
15
|
+
- Authentication via SORACOM SAM user — wraps API key + token into a reusable dict
|
|
16
|
+
- Iterative directory traversal with optional time-range filter and pagination
|
|
17
|
+
- Per-file download with skip-existing and overwrite options
|
|
18
|
+
- File upload with automatic `Content-Type` detection
|
|
19
|
+
- Recency helper for filtering by last-modified time
|
|
20
|
+
|
|
21
|
+
---
|
|
22
|
+
|
|
23
|
+
## Requirements
|
|
24
|
+
|
|
25
|
+
T.B.D.
|
|
26
|
+
|
|
27
|
+
---
|
|
28
|
+
|
|
29
|
+
## Installation
|
|
30
|
+
|
|
31
|
+
No package installation is needed. Copy the file into your project:
|
|
32
|
+
|
|
33
|
+
```
|
|
34
|
+
your_project/
|
|
35
|
+
└── soracom_harvest_files.py
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
Install the only external dependency:
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
pip install requests
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
---
|
|
45
|
+
|
|
46
|
+
## Credentials setup
|
|
47
|
+
|
|
48
|
+
`soracom_harvest_files` never reads credentials from source code or environment variables.
|
|
49
|
+
It uses the standard `netrc` mechanism so secrets stay out of your repository.
|
|
50
|
+
|
|
51
|
+
### 1. Create a SORACOM SAM user
|
|
52
|
+
|
|
53
|
+
Generate an **Auth Key** in the [SORACOM User Console](https://console.soracom.io/) under **Security → SAM Users**.
|
|
54
|
+
|
|
55
|
+
### 2. Write the netrc file
|
|
56
|
+
|
|
57
|
+
**Windows** — create `%USERPROFILE%\_netrc`:
|
|
58
|
+
|
|
59
|
+
```
|
|
60
|
+
machine api.soracom.io
|
|
61
|
+
login keyId-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
|
|
62
|
+
password secret-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
**Linux / macOS** — create `~/.netrc` and restrict permissions:
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
cat >> ~/.netrc << 'EOF'
|
|
69
|
+
machine api.soracom.io
|
|
70
|
+
login keyId-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
|
|
71
|
+
password secret-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
|
|
72
|
+
EOF
|
|
73
|
+
chmod 600 ~/.netrc
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
### 3. Add netrc to .gitignore
|
|
77
|
+
|
|
78
|
+
```
|
|
79
|
+
_netrc
|
|
80
|
+
.netrc
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
---
|
|
84
|
+
|
|
85
|
+
## Quick start
|
|
86
|
+
|
|
87
|
+
```python
|
|
88
|
+
import soracom_harvest_files as sf
|
|
89
|
+
|
|
90
|
+
# Load credentials (returns AuthInfo; raises RuntimeError on failure)
|
|
91
|
+
auth_info = sf.read_auth_keys()
|
|
92
|
+
|
|
93
|
+
# Build token dict from credentials
|
|
94
|
+
token = sf.authenticate(auth_info)
|
|
95
|
+
|
|
96
|
+
# List all files under a path
|
|
97
|
+
files = sf.list_files_iterative("logs/XXXXXXXXXXXXXXXXX/", token)
|
|
98
|
+
print(files)
|
|
99
|
+
|
|
100
|
+
# Download each file individually
|
|
101
|
+
import os
|
|
102
|
+
for remote_path in files:
|
|
103
|
+
local_path = os.path.join("./download", os.path.basename(remote_path))
|
|
104
|
+
sf.download_and_save(remote_path, local_path, token)
|
|
105
|
+
|
|
106
|
+
# Upload a file
|
|
107
|
+
sf.upload_file_to_soracom(
|
|
108
|
+
file_path="./results/output.log",
|
|
109
|
+
upload_path="logs/XXXXXXXXXXXXXXXX/output.log",
|
|
110
|
+
token=token,
|
|
111
|
+
)
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
---
|
|
115
|
+
|
|
116
|
+
## API reference
|
|
117
|
+
|
|
118
|
+
### `read_auth_keys(base_dir=None) -> AuthInfo`
|
|
119
|
+
|
|
120
|
+
Load credentials from `_netrc` or `.netrc` and return an `AuthInfo` object.
|
|
121
|
+
|
|
122
|
+
| Parameter | Type | Description |
|
|
123
|
+
|-----------|------|-------------|
|
|
124
|
+
| `base_dir` | `str \| None` | When provided, also searches for `.netrc` in this directory (takes priority over the home directory). |
|
|
125
|
+
|
|
126
|
+
Raises `RuntimeError` if no valid credentials are found.
|
|
127
|
+
|
|
128
|
+
---
|
|
129
|
+
|
|
130
|
+
### `AuthInfo`
|
|
131
|
+
|
|
132
|
+
Frozen dataclass holding the loaded credentials.
|
|
133
|
+
|
|
134
|
+
```python
|
|
135
|
+
@dataclass(frozen=True)
|
|
136
|
+
class AuthInfo:
|
|
137
|
+
api_key: str # authKeyId from netrc login field
|
|
138
|
+
api_token: str # authKey from netrc password field
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
---
|
|
142
|
+
|
|
143
|
+
### `authenticate(auth: AuthInfo) -> dict`
|
|
144
|
+
|
|
145
|
+
Wrap `AuthInfo` into a token dict that is passed to every subsequent API call.
|
|
146
|
+
|
|
147
|
+
```python
|
|
148
|
+
token = sf.authenticate(auth_info)
|
|
149
|
+
# → {"apiKey": "...", "token": "..."}
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
No network call is made.
|
|
153
|
+
|
|
154
|
+
---
|
|
155
|
+
|
|
156
|
+
### `list_files_iterative(base_path, token, *, limit=None, page_size=100, start_time=None, end_time=None) -> list[str]`
|
|
157
|
+
|
|
158
|
+
Iteratively traverse directories and list all files under `base_path` in Harvest Files.
|
|
159
|
+
|
|
160
|
+
| Parameter | Type | Description |
|
|
161
|
+
|-----------|------|-------------|
|
|
162
|
+
| `base_path` | `str` | Root path in Harvest Files, e.g. `"logs/XXXXX.../"` |
|
|
163
|
+
| `token` | `dict` | Token dict from `authenticate()` |
|
|
164
|
+
| `limit` | `int \| None` | Cap on total results; `None` = no limit |
|
|
165
|
+
| `page_size` | `int` | Number of entries per API request (default `100`, max `100`) |
|
|
166
|
+
| `start_time` | `datetime \| None` | Exclude files modified before this time. Naive datetimes are treated as UTC. |
|
|
167
|
+
| `end_time` | `datetime \| None` | Exclude files modified after this time. Naive datetimes are treated as UTC. |
|
|
168
|
+
|
|
169
|
+
Returns a list of full file path strings.
|
|
170
|
+
|
|
171
|
+
---
|
|
172
|
+
|
|
173
|
+
### `download_and_save(remote_path, local_path, token, *, overwrite=False, chunk_size=1048576) -> bool`
|
|
174
|
+
|
|
175
|
+
Download a single file from Harvest Files and save it to the local filesystem.
|
|
176
|
+
|
|
177
|
+
| Parameter | Type | Description |
|
|
178
|
+
|-----------|------|-------------|
|
|
179
|
+
| `remote_path` | `str` | File path in Harvest Files (e.g. `"logs/XXXXX.../file.log"`) |
|
|
180
|
+
| `local_path` | `str` | Destination path on local filesystem |
|
|
181
|
+
| `token` | `dict` | Token dict from `authenticate()` |
|
|
182
|
+
| `overwrite` | `bool` | If `False` (default) and the local file already exists, the download is skipped and `True` is returned |
|
|
183
|
+
| `chunk_size` | `int` | Streaming chunk size in bytes (default 1 MiB) |
|
|
184
|
+
|
|
185
|
+
Returns `True` on success (including skip), `False` on failure.
|
|
186
|
+
Parent directories are created automatically.
|
|
187
|
+
|
|
188
|
+
---
|
|
189
|
+
|
|
190
|
+
### `upload_file_to_soracom(file_path, upload_path, token) -> bool`
|
|
191
|
+
|
|
192
|
+
Upload a local file to Harvest Files via HTTP PUT.
|
|
193
|
+
`Content-Type` is detected automatically from the file extension; falls back to `application/octet-stream`.
|
|
194
|
+
|
|
195
|
+
Returns `True` on success (HTTP 200 or 204), `False` on failure.
|
|
196
|
+
|
|
197
|
+
---
|
|
198
|
+
|
|
199
|
+
### `get_with_auth(url, token) -> requests.Response | None`
|
|
200
|
+
|
|
201
|
+
Authenticated GET. Returns the response object on success (status < 400), or `None` on error.
|
|
202
|
+
|
|
203
|
+
---
|
|
204
|
+
|
|
205
|
+
### `delete_with_auth(url, token) -> requests.Response | None`
|
|
206
|
+
|
|
207
|
+
Authenticated DELETE. Returns the response object on success (status < 400), or `None` on error.
|
|
208
|
+
|
|
209
|
+
---
|
|
210
|
+
|
|
211
|
+
### `is_recent(last_modified_ms, within_seconds, *, now=None) -> bool`
|
|
212
|
+
|
|
213
|
+
Return `True` if `last_modified_ms` (milliseconds since epoch, as returned by the Harvest Files API) falls within the past `within_seconds` seconds.
|
|
214
|
+
|
|
215
|
+
| Parameter | Type | Description |
|
|
216
|
+
|-----------|------|-------------|
|
|
217
|
+
| `last_modified_ms` | `int` | Milliseconds since epoch |
|
|
218
|
+
| `within_seconds` | `int` | Threshold in seconds |
|
|
219
|
+
| `now` | `datetime \| None` | Reference time (UTC). Defaults to `datetime.now(timezone.utc)` |
|
|
220
|
+
|
|
221
|
+
```python
|
|
222
|
+
if sf.is_recent(entry["lastModifiedTime"], within_seconds=7 * 86_400):
|
|
223
|
+
print("Modified within the last week")
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
---
|
|
227
|
+
|
|
228
|
+
### `LEVEL_INFO` / `LEVEL_WARN` / `LEVEL_ERROR`
|
|
229
|
+
|
|
230
|
+
Log level constants used internally by `log_status()`. You can also pass these directly when calling `log_status()`.
|
|
231
|
+
|
|
232
|
+
```python
|
|
233
|
+
LEVEL_INFO = "INFO"
|
|
234
|
+
LEVEL_WARN = "WARN"
|
|
235
|
+
LEVEL_ERROR = "ERROR"
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
---
|
|
239
|
+
|
|
240
|
+
## Bulk download pattern
|
|
241
|
+
|
|
242
|
+
`download_and_save()` operates on one file at a time, giving you full control over size limits, delete-after-download, and error handling per file.
|
|
243
|
+
|
|
244
|
+
```python
|
|
245
|
+
import os
|
|
246
|
+
import soracom_harvest_files as sf
|
|
247
|
+
|
|
248
|
+
auth_info = sf.read_auth_keys()
|
|
249
|
+
token = sf.authenticate(auth_info)
|
|
250
|
+
|
|
251
|
+
files = sf.list_files_iterative("logs/XXXXXXXXXXXXXXXX/", token)
|
|
252
|
+
save_dir = "./download/XXXXXXXXXXXXXXXX"
|
|
253
|
+
os.makedirs(save_dir, exist_ok=True)
|
|
254
|
+
|
|
255
|
+
LIMIT_BYTES = 200 * 1024 * 1024 # 200 MB
|
|
256
|
+
downloaded_bytes = 0
|
|
257
|
+
|
|
258
|
+
for remote_path in sorted(files):
|
|
259
|
+
if downloaded_bytes >= LIMIT_BYTES:
|
|
260
|
+
break
|
|
261
|
+
|
|
262
|
+
local_path = os.path.join(save_dir, os.path.basename(remote_path))
|
|
263
|
+
ok = sf.download_and_save(remote_path, local_path, token)
|
|
264
|
+
|
|
265
|
+
if ok:
|
|
266
|
+
downloaded_bytes += os.path.getsize(local_path)
|
|
267
|
+
# Optionally delete from Harvest Files after download:
|
|
268
|
+
# sf.delete_with_auth(f"{sf.API_BASE}/files/private/{remote_path}", token)
|
|
269
|
+
```
|
|
270
|
+
|
|
271
|
+
---
|
|
272
|
+
|
|
273
|
+
## Logging
|
|
274
|
+
|
|
275
|
+
`soracom_harvest_files` uses Python's standard `logging` module under the logger name `soracom_harvest_files`.
|
|
276
|
+
By default a `NullHandler` is attached, so no output appears unless your application configures logging.
|
|
277
|
+
To integrate with your own logging config:
|
|
278
|
+
|
|
279
|
+
```python
|
|
280
|
+
import logging
|
|
281
|
+
|
|
282
|
+
# Show INFO and above from this library on stderr
|
|
283
|
+
logging.basicConfig(level=logging.INFO)
|
|
284
|
+
logging.getLogger("soracom_harvest_files").propagate = True
|
|
285
|
+
|
|
286
|
+
# Or suppress all output from this library
|
|
287
|
+
logging.getLogger("soracom_harvest_files").setLevel(logging.CRITICAL)
|
|
288
|
+
```
|
|
289
|
+
|
|
290
|
+
---
|
|
291
|
+
|
|
292
|
+
## License
|
|
293
|
+
|
|
294
|
+
Copyright (c) 2025-2026 tkxu
|
|
295
|
+
|
|
296
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
|
297
|
+
you may not use this file except in compliance with the License.
|
|
298
|
+
You may obtain a copy of the License at
|
|
299
|
+
|
|
300
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
|
301
|
+
|
|
302
|
+
Unless required by applicable law or agreed to in writing, software
|
|
303
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
|
304
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
305
|
+
See the License for the specific language governing permissions and
|
|
306
|
+
limitations under the License.
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "soracom-lib"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "SORACOM Harvest Files utility library"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.8"
|
|
11
|
+
license = {text = "Apache-2.0"}
|
|
12
|
+
|
|
13
|
+
authors = [
|
|
14
|
+
{name = "tkxu"}
|
|
15
|
+
]
|
|
16
|
+
|
|
17
|
+
dependencies = [
|
|
18
|
+
"requests>=2.28"
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
classifiers = [
|
|
22
|
+
"Programming Language :: Python :: 3",
|
|
23
|
+
"License :: OSI Approved :: Apache Software License",
|
|
24
|
+
"Operating System :: OS Independent"
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
[tool.setuptools]
|
|
28
|
+
packages = ["soracomlib"]
|