tostorchconnector 1.0.8__tar.gz → 1.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tostorchconnector might be problematic. Click here for more details.
- {tostorchconnector-1.0.8/tostorchconnector.egg-info → tostorchconnector-1.1.0}/PKG-INFO +2 -2
- {tostorchconnector-1.0.8 → tostorchconnector-1.1.0}/pyproject.toml +2 -2
- {tostorchconnector-1.0.8 → tostorchconnector-1.1.0}/tostorchconnector/tos_client.py +3 -0
- {tostorchconnector-1.0.8 → tostorchconnector-1.1.0}/tostorchconnector/tos_common.py +14 -11
- {tostorchconnector-1.0.8 → tostorchconnector-1.1.0}/tostorchconnector/tos_iterable_dataset.py +3 -2
- {tostorchconnector-1.0.8 → tostorchconnector-1.1.0}/tostorchconnector/tos_map_dataset.py +3 -2
- {tostorchconnector-1.0.8 → tostorchconnector-1.1.0/tostorchconnector.egg-info}/PKG-INFO +2 -2
- tostorchconnector-1.1.0/tostorchconnector.egg-info/requires.txt +3 -0
- tostorchconnector-1.0.8/tostorchconnector.egg-info/requires.txt +0 -3
- {tostorchconnector-1.0.8 → tostorchconnector-1.1.0}/LICENSE +0 -0
- {tostorchconnector-1.0.8 → tostorchconnector-1.1.0}/README.md +0 -0
- {tostorchconnector-1.0.8 → tostorchconnector-1.1.0}/setup.cfg +0 -0
- {tostorchconnector-1.0.8 → tostorchconnector-1.1.0}/tests/test_tos_dataset.py +0 -0
- {tostorchconnector-1.0.8 → tostorchconnector-1.1.0}/tests/test_tosclient.py +0 -0
- {tostorchconnector-1.0.8 → tostorchconnector-1.1.0}/tests/test_tosrawclient.py +0 -0
- {tostorchconnector-1.0.8 → tostorchconnector-1.1.0}/tostorchconnector/__init__.py +0 -0
- {tostorchconnector-1.0.8 → tostorchconnector-1.1.0}/tostorchconnector/tos_checkpoint.py +0 -0
- {tostorchconnector-1.0.8 → tostorchconnector-1.1.0}/tostorchconnector/tos_object_meta.py +0 -0
- {tostorchconnector-1.0.8 → tostorchconnector-1.1.0}/tostorchconnector/tos_object_reader.py +0 -0
- {tostorchconnector-1.0.8 → tostorchconnector-1.1.0}/tostorchconnector/tos_object_writer.py +0 -0
- {tostorchconnector-1.0.8 → tostorchconnector-1.1.0}/tostorchconnector.egg-info/SOURCES.txt +0 -0
- {tostorchconnector-1.0.8 → tostorchconnector-1.1.0}/tostorchconnector.egg-info/dependency_links.txt +0 -0
- {tostorchconnector-1.0.8 → tostorchconnector-1.1.0}/tostorchconnector.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: tostorchconnector
|
|
3
|
-
Version: 1.0
|
|
3
|
+
Version: 1.1.0
|
|
4
4
|
Summary: TOS connector integration for PyTorch
|
|
5
5
|
Author-email: xiangshijian <xiangshijian@bytedance.com>
|
|
6
6
|
Classifier: Development Status :: 4 - Beta
|
|
@@ -19,7 +19,7 @@ Description-Content-Type: text/markdown
|
|
|
19
19
|
License-File: LICENSE
|
|
20
20
|
Requires-Dist: torch>=2.0
|
|
21
21
|
Requires-Dist: tos>=2.8.0
|
|
22
|
-
Requires-Dist: tosnativeclient>=1.0.
|
|
22
|
+
Requires-Dist: tosnativeclient>=1.0.7
|
|
23
23
|
Dynamic: license-file
|
|
24
24
|
|
|
25
25
|
# TOS Connector for pytorch
|
|
@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
|
|
|
5
5
|
|
|
6
6
|
[project]
|
|
7
7
|
name = "tostorchconnector"
|
|
8
|
-
version = "1.0
|
|
8
|
+
version = "1.1.0"
|
|
9
9
|
description = "TOS connector integration for PyTorch"
|
|
10
10
|
authors = [{ name = "xiangshijian", email = "xiangshijian@bytedance.com" }]
|
|
11
11
|
requires-python = ">=3.8,<3.14"
|
|
@@ -26,7 +26,7 @@ classifiers = [
|
|
|
26
26
|
dependencies = [
|
|
27
27
|
"torch >= 2.0",
|
|
28
28
|
"tos>=2.8.0",
|
|
29
|
-
"tosnativeclient >= 1.0.
|
|
29
|
+
"tosnativeclient >= 1.0.7"
|
|
30
30
|
]
|
|
31
31
|
|
|
32
32
|
[tool.setuptools.packages]
|
|
@@ -151,6 +151,9 @@ class TosClient(object):
|
|
|
151
151
|
directives = 'info'
|
|
152
152
|
directory = self._log_conf.log_dir
|
|
153
153
|
file_name_prefix = self._log_conf.log_file_name
|
|
154
|
+
# reset log_conf to avoid panic
|
|
155
|
+
self._log_conf = TosLogConfig()
|
|
156
|
+
|
|
154
157
|
self._inner_client = tosnativeclient.TosClient(self._region, self._endpoint, self._cred.ak,
|
|
155
158
|
self._cred.sk,
|
|
156
159
|
self._client_conf.part_size,
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from typing import Union, Iterator, Tuple, Optional
|
|
3
3
|
|
|
4
|
+
from tosnativeclient import TosObject
|
|
4
5
|
from . import TosObjectReader
|
|
5
6
|
from .tos_client import TosClient
|
|
6
7
|
from .tos_object_meta import TosObjectMeta
|
|
@@ -14,7 +15,7 @@ class TosObjectIterable(object):
|
|
|
14
15
|
self._prefix = prefix
|
|
15
16
|
self._client = client
|
|
16
17
|
|
|
17
|
-
def __iter__(self) -> Iterator[TosObjectMeta]:
|
|
18
|
+
def __iter__(self) -> Iterator[TosObjectMeta | TosObject]:
|
|
18
19
|
return iter(TosObjectIterator(self._bucket, self._prefix, self._client))
|
|
19
20
|
|
|
20
21
|
|
|
@@ -35,10 +36,10 @@ class TosObjectIterator(object):
|
|
|
35
36
|
if self._list_stream is not None:
|
|
36
37
|
self._list_stream.close()
|
|
37
38
|
|
|
38
|
-
def __iter__(self) -> Iterator[TosObjectMeta]:
|
|
39
|
+
def __iter__(self) -> Iterator[TosObjectMeta | TosObject]:
|
|
39
40
|
return self
|
|
40
41
|
|
|
41
|
-
def __next__(self) -> TosObjectMeta:
|
|
42
|
+
def __next__(self) -> TosObjectMeta | TosObject:
|
|
42
43
|
if self._client.use_native_client:
|
|
43
44
|
if self._list_stream is None:
|
|
44
45
|
self._list_stream = self._client.gen_list_stream(self._bucket, self._prefix, max_keys=1000,
|
|
@@ -46,15 +47,17 @@ class TosObjectIterator(object):
|
|
|
46
47
|
continuation_token=self._continuation_token)
|
|
47
48
|
|
|
48
49
|
if self._object_metas is None or self._index >= len(self._object_metas):
|
|
49
|
-
self._object_metas =
|
|
50
|
+
self._object_metas = None
|
|
50
51
|
self._index = 0
|
|
51
52
|
while 1:
|
|
52
|
-
|
|
53
|
+
try:
|
|
54
|
+
objects = next(self._list_stream)
|
|
55
|
+
except:
|
|
56
|
+
self.close()
|
|
57
|
+
raise
|
|
53
58
|
self._continuation_token = self._list_stream.current_continuation_token()
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
TosObjectMeta(content.bucket, content.key, content.size, content.etag))
|
|
57
|
-
if len(self._object_metas) > 0:
|
|
59
|
+
self._object_metas = objects.contents
|
|
60
|
+
if self._object_metas is not None and len(self._object_metas) > 0:
|
|
58
61
|
break
|
|
59
62
|
|
|
60
63
|
object_meta = self._object_metas[self._index]
|
|
@@ -104,12 +107,12 @@ def default_trans(obj: TosObjectReader) -> TosObjectReader:
|
|
|
104
107
|
return obj
|
|
105
108
|
|
|
106
109
|
|
|
107
|
-
def gen_dataset_from_urls(urls: Union[str, Iterator[str]], _: TosClient) -> Iterator[TosObjectMeta]:
|
|
110
|
+
def gen_dataset_from_urls(urls: Union[str, Iterator[str]], _: TosClient) -> Iterator[TosObjectMeta | TosObject]:
|
|
108
111
|
if isinstance(urls, str):
|
|
109
112
|
urls = [urls]
|
|
110
113
|
return (TosObjectMeta(bucket, key) for bucket, key in [parse_tos_url(url) for url in urls])
|
|
111
114
|
|
|
112
115
|
|
|
113
|
-
def gen_dataset_from_prefix(prefix: str, client: TosClient) -> Iterator[TosObjectMeta]:
|
|
116
|
+
def gen_dataset_from_prefix(prefix: str, client: TosClient) -> Iterator[TosObjectMeta | TosObject]:
|
|
114
117
|
bucket, prefix = parse_tos_url(prefix)
|
|
115
118
|
return iter(TosObjectIterable(bucket, prefix, client))
|
{tostorchconnector-1.0.8 → tostorchconnector-1.1.0}/tostorchconnector/tos_iterable_dataset.py
RENAMED
|
@@ -4,6 +4,7 @@ from typing import Iterator, Any, Optional, Callable, Union
|
|
|
4
4
|
|
|
5
5
|
import torch
|
|
6
6
|
|
|
7
|
+
from tosnativeclient import TosObject
|
|
7
8
|
from . import TosObjectReader
|
|
8
9
|
from .tos_client import CredentialProvider, TosClientConfig, TosClient, TosLogConfig, ReaderType
|
|
9
10
|
from .tos_common import default_trans, gen_dataset_from_urls, gen_dataset_from_prefix
|
|
@@ -14,7 +15,7 @@ log = logging.getLogger(__name__)
|
|
|
14
15
|
|
|
15
16
|
class TosIterableDataset(torch.utils.data.IterableDataset):
|
|
16
17
|
def __init__(self, region: str,
|
|
17
|
-
gen_dataset: Callable[[TosClient], Iterator[TosObjectMeta]],
|
|
18
|
+
gen_dataset: Callable[[TosClient], Iterator[TosObjectMeta | TosObject]],
|
|
18
19
|
endpoint: Optional[str] = None,
|
|
19
20
|
transform: Callable[[TosObjectReader], Any] = default_trans,
|
|
20
21
|
cred: Optional[CredentialProvider] = None,
|
|
@@ -100,7 +101,7 @@ class TosIterableDataset(torch.utils.data.IterableDataset):
|
|
|
100
101
|
)
|
|
101
102
|
return map(self._trans_tos_object, part_dataset)
|
|
102
103
|
|
|
103
|
-
def _trans_tos_object(self, object_meta: TosObjectMeta) -> Any:
|
|
104
|
+
def _trans_tos_object(self, object_meta: TosObjectMeta | TosObject) -> Any:
|
|
104
105
|
obj = self._client.get_object(object_meta.bucket, object_meta.key, object_meta.etag, object_meta.size,
|
|
105
106
|
reader_type=self._reader_type, buffer_size=self._buffer_size)
|
|
106
107
|
return self._trans(obj)
|
|
@@ -4,6 +4,7 @@ from typing import Any, Callable, Iterator, Optional, List, Union
|
|
|
4
4
|
|
|
5
5
|
import torch
|
|
6
6
|
|
|
7
|
+
from tosnativeclient import TosObject
|
|
7
8
|
from . import TosObjectReader
|
|
8
9
|
from .tos_client import CredentialProvider, TosClientConfig, TosClient, TosLogConfig, ReaderType
|
|
9
10
|
from .tos_common import default_trans, gen_dataset_from_prefix, \
|
|
@@ -15,7 +16,7 @@ log = logging.getLogger(__name__)
|
|
|
15
16
|
|
|
16
17
|
class TosMapDataset(torch.utils.data.Dataset):
|
|
17
18
|
def __init__(self, region: str,
|
|
18
|
-
gen_dataset: Callable[[TosClient], Iterator[TosObjectMeta]],
|
|
19
|
+
gen_dataset: Callable[[TosClient], Iterator[TosObjectMeta | TosObject]],
|
|
19
20
|
endpoint: Optional[str] = None,
|
|
20
21
|
transform: Callable[[TosObjectReader], Any] = default_trans,
|
|
21
22
|
cred: Optional[CredentialProvider] = None,
|
|
@@ -71,7 +72,7 @@ class TosMapDataset(torch.utils.data.Dataset):
|
|
|
71
72
|
return len(self._data_set)
|
|
72
73
|
|
|
73
74
|
@property
|
|
74
|
-
def _data_set(self) -> List[TosObjectMeta]:
|
|
75
|
+
def _data_set(self) -> List[TosObjectMeta | TosObject]:
|
|
75
76
|
if self._dataset is None:
|
|
76
77
|
self._dataset = list(self._gen_dataset(self._client))
|
|
77
78
|
assert self._dataset is not None
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: tostorchconnector
|
|
3
|
-
Version: 1.0
|
|
3
|
+
Version: 1.1.0
|
|
4
4
|
Summary: TOS connector integration for PyTorch
|
|
5
5
|
Author-email: xiangshijian <xiangshijian@bytedance.com>
|
|
6
6
|
Classifier: Development Status :: 4 - Beta
|
|
@@ -19,7 +19,7 @@ Description-Content-Type: text/markdown
|
|
|
19
19
|
License-File: LICENSE
|
|
20
20
|
Requires-Dist: torch>=2.0
|
|
21
21
|
Requires-Dist: tos>=2.8.0
|
|
22
|
-
Requires-Dist: tosnativeclient>=1.0.
|
|
22
|
+
Requires-Dist: tosnativeclient>=1.0.7
|
|
23
23
|
Dynamic: license-file
|
|
24
24
|
|
|
25
25
|
# TOS Connector for pytorch
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tostorchconnector-1.0.8 → tostorchconnector-1.1.0}/tostorchconnector.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
{tostorchconnector-1.0.8 → tostorchconnector-1.1.0}/tostorchconnector.egg-info/top_level.txt
RENAMED
|
File without changes
|