xoscar 0.9.0__cp312-cp312-macosx_10_13_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- xoscar/__init__.py +61 -0
- xoscar/_utils.cpython-312-darwin.so +0 -0
- xoscar/_utils.pxd +36 -0
- xoscar/_utils.pyx +246 -0
- xoscar/_version.py +693 -0
- xoscar/aio/__init__.py +16 -0
- xoscar/aio/base.py +86 -0
- xoscar/aio/file.py +59 -0
- xoscar/aio/lru.py +228 -0
- xoscar/aio/parallelism.py +39 -0
- xoscar/api.py +527 -0
- xoscar/backend.py +67 -0
- xoscar/backends/__init__.py +14 -0
- xoscar/backends/allocate_strategy.py +160 -0
- xoscar/backends/communication/__init__.py +30 -0
- xoscar/backends/communication/base.py +315 -0
- xoscar/backends/communication/core.py +69 -0
- xoscar/backends/communication/dummy.py +253 -0
- xoscar/backends/communication/errors.py +20 -0
- xoscar/backends/communication/socket.py +444 -0
- xoscar/backends/communication/ucx.py +538 -0
- xoscar/backends/communication/utils.py +97 -0
- xoscar/backends/config.py +157 -0
- xoscar/backends/context.py +437 -0
- xoscar/backends/core.py +352 -0
- xoscar/backends/indigen/__init__.py +16 -0
- xoscar/backends/indigen/__main__.py +19 -0
- xoscar/backends/indigen/backend.py +51 -0
- xoscar/backends/indigen/driver.py +26 -0
- xoscar/backends/indigen/fate_sharing.py +221 -0
- xoscar/backends/indigen/pool.py +515 -0
- xoscar/backends/indigen/shared_memory.py +548 -0
- xoscar/backends/message.cpython-312-darwin.so +0 -0
- xoscar/backends/message.pyi +255 -0
- xoscar/backends/message.pyx +646 -0
- xoscar/backends/pool.py +1630 -0
- xoscar/backends/router.py +285 -0
- xoscar/backends/test/__init__.py +16 -0
- xoscar/backends/test/backend.py +38 -0
- xoscar/backends/test/pool.py +233 -0
- xoscar/batch.py +256 -0
- xoscar/collective/__init__.py +27 -0
- xoscar/collective/backend/__init__.py +13 -0
- xoscar/collective/backend/nccl_backend.py +160 -0
- xoscar/collective/common.py +102 -0
- xoscar/collective/core.py +737 -0
- xoscar/collective/process_group.py +687 -0
- xoscar/collective/utils.py +41 -0
- xoscar/collective/xoscar_pygloo.cpython-312-darwin.so +0 -0
- xoscar/collective/xoscar_pygloo.pyi +239 -0
- xoscar/constants.py +23 -0
- xoscar/context.cpython-312-darwin.so +0 -0
- xoscar/context.pxd +21 -0
- xoscar/context.pyx +368 -0
- xoscar/core.cpython-312-darwin.so +0 -0
- xoscar/core.pxd +51 -0
- xoscar/core.pyx +664 -0
- xoscar/debug.py +188 -0
- xoscar/driver.py +42 -0
- xoscar/errors.py +63 -0
- xoscar/libcpp.pxd +31 -0
- xoscar/metrics/__init__.py +21 -0
- xoscar/metrics/api.py +288 -0
- xoscar/metrics/backends/__init__.py +13 -0
- xoscar/metrics/backends/console/__init__.py +13 -0
- xoscar/metrics/backends/console/console_metric.py +82 -0
- xoscar/metrics/backends/metric.py +149 -0
- xoscar/metrics/backends/prometheus/__init__.py +13 -0
- xoscar/metrics/backends/prometheus/prometheus_metric.py +70 -0
- xoscar/nvutils.py +717 -0
- xoscar/profiling.py +260 -0
- xoscar/serialization/__init__.py +20 -0
- xoscar/serialization/aio.py +141 -0
- xoscar/serialization/core.cpython-312-darwin.so +0 -0
- xoscar/serialization/core.pxd +28 -0
- xoscar/serialization/core.pyi +57 -0
- xoscar/serialization/core.pyx +944 -0
- xoscar/serialization/cuda.py +111 -0
- xoscar/serialization/exception.py +48 -0
- xoscar/serialization/mlx.py +67 -0
- xoscar/serialization/numpy.py +82 -0
- xoscar/serialization/pyfury.py +37 -0
- xoscar/serialization/scipy.py +72 -0
- xoscar/serialization/torch.py +180 -0
- xoscar/utils.py +522 -0
- xoscar/virtualenv/__init__.py +34 -0
- xoscar/virtualenv/core.py +268 -0
- xoscar/virtualenv/platform.py +56 -0
- xoscar/virtualenv/utils.py +100 -0
- xoscar/virtualenv/uv.py +321 -0
- xoscar-0.9.0.dist-info/METADATA +230 -0
- xoscar-0.9.0.dist-info/RECORD +94 -0
- xoscar-0.9.0.dist-info/WHEEL +6 -0
- xoscar-0.9.0.dist-info/top_level.txt +2 -0
xoscar/debug.py
ADDED
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
# Copyright 2022-2023 XProbe Inc.
|
|
2
|
+
# derived from copyright 1999-2021 Alibaba Group Holding Ltd.
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
import asyncio.tasks
|
|
17
|
+
import contextvars
|
|
18
|
+
import json
|
|
19
|
+
import logging
|
|
20
|
+
import os
|
|
21
|
+
import time
|
|
22
|
+
from contextlib import contextmanager
|
|
23
|
+
from dataclasses import dataclass
|
|
24
|
+
from typing import List, Optional, Type # noqa: F401
|
|
25
|
+
|
|
26
|
+
from .utils import dataslots
|
|
27
|
+
|
|
28
|
+
logger: logging.Logger = logging.getLogger(__name__)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@dataslots
|
|
32
|
+
@dataclass
|
|
33
|
+
class MessageTraceItem:
|
|
34
|
+
uid: str
|
|
35
|
+
address: str
|
|
36
|
+
method: str
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@dataslots
|
|
40
|
+
@dataclass
|
|
41
|
+
class DebugOptions:
|
|
42
|
+
actor_call_timeout: int = 10
|
|
43
|
+
process_message_timeout: int = 30
|
|
44
|
+
actor_lock_timeout: int = 30
|
|
45
|
+
log_unhandled_errors: bool = True
|
|
46
|
+
log_cycle_send: bool = True
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
_debug_opts: Optional[DebugOptions] = None
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def get_debug_options() -> Optional[DebugOptions]:
|
|
53
|
+
return _debug_opts
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def set_debug_options(options: Optional[DebugOptions]):
|
|
57
|
+
global _debug_opts
|
|
58
|
+
_debug_opts = options
|
|
59
|
+
|
|
60
|
+
# deliver debug config to native codes for optimization
|
|
61
|
+
from .core import set_debug_options as core_set_debug_options
|
|
62
|
+
|
|
63
|
+
core_set_debug_options(options)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _reload_debug_opts_from_env(cls: Type):
|
|
67
|
+
config_str = os.environ.get("DEBUG_OSCAR", "0")
|
|
68
|
+
if config_str == "0":
|
|
69
|
+
set_debug_options(None)
|
|
70
|
+
return
|
|
71
|
+
config_str = os.environ["DEBUG_OSCAR"]
|
|
72
|
+
config_json = {} if config_str == "1" else json.loads(config_str)
|
|
73
|
+
set_debug_options(cls(**config_json))
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def reload_debug_opts_from_env():
|
|
77
|
+
_reload_debug_opts_from_env(DebugOptions)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
async def _log_timeout(timeout, msg, *args, **kwargs):
|
|
81
|
+
start_time, rnd = time.time(), 1
|
|
82
|
+
while True:
|
|
83
|
+
await asyncio.sleep(timeout * rnd)
|
|
84
|
+
rnd += 1
|
|
85
|
+
logger.warning(
|
|
86
|
+
msg + " (timeout for %.4f seconds).",
|
|
87
|
+
*args,
|
|
88
|
+
time.time() - start_time,
|
|
89
|
+
**kwargs,
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
@contextmanager
|
|
94
|
+
def debug_async_timeout(option_name: str, msg, *args, **kwargs):
|
|
95
|
+
if _debug_opts is None:
|
|
96
|
+
yield
|
|
97
|
+
else:
|
|
98
|
+
timeout_val = getattr(_debug_opts, option_name, -1)
|
|
99
|
+
timeout_task = None
|
|
100
|
+
if timeout_val and timeout_val > 0:
|
|
101
|
+
timeout_task = asyncio.create_task(
|
|
102
|
+
_log_timeout(timeout_val, msg, *args, **kwargs)
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
try:
|
|
106
|
+
yield
|
|
107
|
+
finally:
|
|
108
|
+
if timeout_task is not None:
|
|
109
|
+
timeout_task.cancel()
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
_message_trace_var: contextvars.ContextVar = contextvars.ContextVar(
|
|
113
|
+
"_message_trace_var"
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
@contextmanager
|
|
118
|
+
def record_message_trace(message):
|
|
119
|
+
if _debug_opts is None or not _debug_opts.log_cycle_send:
|
|
120
|
+
yield
|
|
121
|
+
else:
|
|
122
|
+
msg_trace = list(message.message_trace or [])
|
|
123
|
+
msg_trace.append(
|
|
124
|
+
MessageTraceItem(
|
|
125
|
+
uid=message.actor_ref.uid,
|
|
126
|
+
address=message.actor_ref.address,
|
|
127
|
+
method=message.content[0],
|
|
128
|
+
)
|
|
129
|
+
)
|
|
130
|
+
_message_trace_var.set(msg_trace)
|
|
131
|
+
try:
|
|
132
|
+
yield
|
|
133
|
+
finally:
|
|
134
|
+
_message_trace_var.set(None)
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def detect_cycle_send(message, wait_response: bool = True):
|
|
138
|
+
if _debug_opts is None or not _debug_opts.log_cycle_send or not wait_response:
|
|
139
|
+
return
|
|
140
|
+
|
|
141
|
+
cur_trace = _message_trace_var.get(None) or [] # type: List[MessageTraceItem]
|
|
142
|
+
message.message_trace = cur_trace
|
|
143
|
+
|
|
144
|
+
ref_key = (message.actor_ref.uid, message.actor_ref.address)
|
|
145
|
+
traced_ref_keys = set((item.uid, item.address) for item in cur_trace)
|
|
146
|
+
if ref_key in traced_ref_keys:
|
|
147
|
+
looped_trace = cur_trace + [
|
|
148
|
+
MessageTraceItem(
|
|
149
|
+
uid=message.actor_ref.uid,
|
|
150
|
+
address=message.actor_ref.address,
|
|
151
|
+
method=message.content[0],
|
|
152
|
+
)
|
|
153
|
+
]
|
|
154
|
+
|
|
155
|
+
formatted_trace = "\n ".join(
|
|
156
|
+
f"Calling {t.method!r} in actor {t.uid} at {t.address}"
|
|
157
|
+
for t in looped_trace
|
|
158
|
+
)
|
|
159
|
+
logger.warning(
|
|
160
|
+
"Call cycle detected when sending to actor %s at %s, the trace is\n"
|
|
161
|
+
" %s",
|
|
162
|
+
message.actor_ref.uid,
|
|
163
|
+
message.actor_ref.address,
|
|
164
|
+
formatted_trace,
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
@contextmanager
|
|
169
|
+
def no_message_trace():
|
|
170
|
+
if _debug_opts is None or not _debug_opts.log_cycle_send:
|
|
171
|
+
yield
|
|
172
|
+
else:
|
|
173
|
+
trace = pop_message_trace()
|
|
174
|
+
yield
|
|
175
|
+
set_message_trace(trace)
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def pop_message_trace():
|
|
179
|
+
trace = _message_trace_var.get(None)
|
|
180
|
+
_message_trace_var.set(None)
|
|
181
|
+
return trace
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def set_message_trace(message_trace):
|
|
185
|
+
_message_trace_var.set(message_trace)
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
reload_debug_opts_from_env()
|
xoscar/driver.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# Copyright 2022-2023 XProbe Inc.
|
|
2
|
+
# derived from copyright 1999-2021 Alibaba Group Holding Ltd.
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
from abc import ABC, abstractmethod
|
|
17
|
+
from numbers import Number
|
|
18
|
+
from typing import Dict, Type
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class BaseActorDriver(ABC):
|
|
22
|
+
@classmethod
|
|
23
|
+
@abstractmethod
|
|
24
|
+
def setup_cluster(cls, address_to_resources: Dict[str, Dict[str, Number]]):
|
|
25
|
+
"""
|
|
26
|
+
Setup cluster according to given resources,
|
|
27
|
+
resources is a dict, e.g. {'CPU': 3, 'GPU': 1}
|
|
28
|
+
|
|
29
|
+
Parameters
|
|
30
|
+
----------
|
|
31
|
+
address_to_resources: dict
|
|
32
|
+
resources that required for each node.
|
|
33
|
+
"""
|
|
34
|
+
pass
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
_backend_driver_cls: Dict[str, Type[BaseActorDriver]] = dict()
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def register_backend_driver(scheme: str, cls: Type[BaseActorDriver]):
|
|
41
|
+
assert issubclass(cls, BaseActorDriver)
|
|
42
|
+
_backend_driver_cls[scheme] = cls
|
xoscar/errors.py
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
# Copyright 2022-2023 XProbe Inc.
|
|
2
|
+
# derived from copyright 1999-2021 Alibaba Group Holding Ltd.
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class XoscarError(Exception):
|
|
18
|
+
pass
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class ReconstructWorkerError(XoscarError):
|
|
22
|
+
pass
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class ActorPoolNotStarted(XoscarError):
|
|
26
|
+
pass
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class ActorNotExist(XoscarError):
|
|
30
|
+
pass
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class ActorAlreadyExist(XoscarError):
|
|
34
|
+
pass
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class NoIdleSlot(XoscarError):
|
|
38
|
+
pass
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class NoFreeSlot(XoscarError):
|
|
42
|
+
pass
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class SlotStateError(XoscarError):
|
|
46
|
+
pass
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class ServerClosed(XoscarError):
|
|
50
|
+
pass
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class CannotCancelTask(XoscarError):
|
|
54
|
+
pass
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class SendMessageFailed(XoscarError):
|
|
58
|
+
pass
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class Return(XoscarError):
|
|
62
|
+
def __init__(self, value):
|
|
63
|
+
self.value = value
|
xoscar/libcpp.pxd
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# Copyright 2022-2023 XProbe Inc.
|
|
2
|
+
# derived from copyright 1999-2022 Alibaba Group Holding Ltd.
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
# complementary header for C++ STL libs not included in Cython
|
|
17
|
+
|
|
18
|
+
from libc.stdint cimport uint_fast64_t
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
cdef extern from "<random>" namespace "std" nogil:
|
|
22
|
+
cdef cppclass mt19937_64:
|
|
23
|
+
ctypedef uint_fast64_t result_type
|
|
24
|
+
|
|
25
|
+
mt19937_64() except +
|
|
26
|
+
mt19937_64(result_type seed) except +
|
|
27
|
+
result_type operator()() except +
|
|
28
|
+
result_type min() except +
|
|
29
|
+
result_type max() except +
|
|
30
|
+
void discard(size_t z) except +
|
|
31
|
+
void seed(result_type seed) except +
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# Copyright 2022-2023 XProbe Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from .api import (
|
|
16
|
+
Metrics,
|
|
17
|
+
Percentile,
|
|
18
|
+
init_metrics,
|
|
19
|
+
record_time_cost_percentile,
|
|
20
|
+
shutdown_metrics,
|
|
21
|
+
)
|
xoscar/metrics/api.py
ADDED
|
@@ -0,0 +1,288 @@
|
|
|
1
|
+
# Copyright 2022-2023 XProbe Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import logging
|
|
18
|
+
import time
|
|
19
|
+
import weakref
|
|
20
|
+
from contextlib import contextmanager
|
|
21
|
+
from enum import Enum
|
|
22
|
+
from queue import PriorityQueue
|
|
23
|
+
from typing import Any, Callable, NamedTuple
|
|
24
|
+
|
|
25
|
+
from .backends.console import console_metric
|
|
26
|
+
from .backends.metric import AbstractMetric
|
|
27
|
+
from .backends.prometheus import prometheus_metric
|
|
28
|
+
|
|
29
|
+
logger = logging.getLogger(__name__)
|
|
30
|
+
|
|
31
|
+
_init = False
|
|
32
|
+
_metric_backend = "console"
|
|
33
|
+
_backends_cls = {
|
|
34
|
+
"console": console_metric,
|
|
35
|
+
"prometheus": prometheus_metric,
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
_metrics_to_be_initialized: weakref.WeakSet = weakref.WeakSet()
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def init_metrics(backend="console", config: dict[str, Any] | None = None):
|
|
43
|
+
global _init
|
|
44
|
+
if _init is True:
|
|
45
|
+
return
|
|
46
|
+
|
|
47
|
+
backend = backend or "console"
|
|
48
|
+
if backend not in _backends_cls:
|
|
49
|
+
raise NotImplementedError(f"Do not support metric backend {backend}")
|
|
50
|
+
global _metric_backend
|
|
51
|
+
_metric_backend = backend
|
|
52
|
+
if _metric_backend == "prometheus":
|
|
53
|
+
try:
|
|
54
|
+
from prometheus_client import start_http_server
|
|
55
|
+
|
|
56
|
+
from ..utils import get_next_port
|
|
57
|
+
|
|
58
|
+
port = config.get("port", 0) if config else 0
|
|
59
|
+
port = port or get_next_port()
|
|
60
|
+
start_http_server(port)
|
|
61
|
+
logger.warning(
|
|
62
|
+
"Finished startup prometheus http server and port is %d", port
|
|
63
|
+
)
|
|
64
|
+
except ImportError:
|
|
65
|
+
logger.warning(
|
|
66
|
+
"Failed to start prometheus http server because there is no prometheus_client"
|
|
67
|
+
)
|
|
68
|
+
_init = True
|
|
69
|
+
for m in _metrics_to_be_initialized:
|
|
70
|
+
cls = getattr(_backends_cls[_metric_backend], m.type)
|
|
71
|
+
metric = cls(m.name, m.description, m.tag_keys)
|
|
72
|
+
m.set_metric(metric)
|
|
73
|
+
logger.debug("Finished initialize the metrics of backend: %s.", _metric_backend)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def shutdown_metrics():
|
|
77
|
+
global _metric_backend
|
|
78
|
+
_metric_backend = "console"
|
|
79
|
+
global _init
|
|
80
|
+
_init = False
|
|
81
|
+
logger.debug("Shutdown metrics of backend: %s.", _metric_backend)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class _MetricWrapper(AbstractMetric):
|
|
85
|
+
_metric: AbstractMetric | None
|
|
86
|
+
_log_not_init_error: bool
|
|
87
|
+
|
|
88
|
+
def __init__(
|
|
89
|
+
self,
|
|
90
|
+
name: str,
|
|
91
|
+
description: str = "",
|
|
92
|
+
tag_keys: tuple[str, ...] | None = None,
|
|
93
|
+
metric_type: str = "Counter",
|
|
94
|
+
):
|
|
95
|
+
self._name = name
|
|
96
|
+
self._description = description
|
|
97
|
+
self._tag_keys = tag_keys or tuple()
|
|
98
|
+
self._type = metric_type
|
|
99
|
+
self._metric = None
|
|
100
|
+
self._log_not_init_error = False
|
|
101
|
+
|
|
102
|
+
@property
|
|
103
|
+
def type(self):
|
|
104
|
+
return self._type
|
|
105
|
+
|
|
106
|
+
@property
|
|
107
|
+
def value(self):
|
|
108
|
+
assert (
|
|
109
|
+
self._metric is not None
|
|
110
|
+
), "Metric is not initialized, please call `init_metrics()` before using metrics."
|
|
111
|
+
return self._metric.value
|
|
112
|
+
|
|
113
|
+
def set_metric(self, metric):
|
|
114
|
+
assert metric is not None, "Argument metric is None, please check it."
|
|
115
|
+
self._metric = metric
|
|
116
|
+
|
|
117
|
+
def record(self, value=1, tags: dict[str, str] | None = None):
|
|
118
|
+
if self._metric is not None:
|
|
119
|
+
self._metric.record(value, tags)
|
|
120
|
+
elif not self._log_not_init_error:
|
|
121
|
+
self._log_not_init_error = True
|
|
122
|
+
logger.warning(
|
|
123
|
+
"Metric is not initialized, please call `init_metrics()` before using metrics."
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def gen_metric(func):
|
|
128
|
+
def wrapper(name, descriptions: str = "", tag_keys: tuple[str, ...] | None = None):
|
|
129
|
+
if _init is True:
|
|
130
|
+
return func(name, descriptions, tag_keys)
|
|
131
|
+
else:
|
|
132
|
+
logger.info(
|
|
133
|
+
"Metric %s will be initialized when invoking `init_metrics()`.", name
|
|
134
|
+
)
|
|
135
|
+
metric = _MetricWrapper(
|
|
136
|
+
name, descriptions, tag_keys, func.__name__.capitalize()
|
|
137
|
+
)
|
|
138
|
+
_metrics_to_be_initialized.add(metric)
|
|
139
|
+
return metric
|
|
140
|
+
|
|
141
|
+
return wrapper
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
class Metrics:
|
|
145
|
+
"""
|
|
146
|
+
A factory to generate different types of metrics.
|
|
147
|
+
|
|
148
|
+
Note:
|
|
149
|
+
Counter, Meter and Histogram are not thread safe.
|
|
150
|
+
|
|
151
|
+
Examples
|
|
152
|
+
--------
|
|
153
|
+
>>> c1 = counter('counter1', 'A counter')
|
|
154
|
+
>>> c1.record(1)
|
|
155
|
+
|
|
156
|
+
>>> c2 = counter('counter2', 'A counter', ('service', 'tenant'))
|
|
157
|
+
>>> c2.record(1, {'service': 'indigen', 'tenant': 'test'})
|
|
158
|
+
|
|
159
|
+
>>> g1 = gauge('gauge1')
|
|
160
|
+
>>> g1.record(1)
|
|
161
|
+
|
|
162
|
+
>>> m1 = meter('meter1')
|
|
163
|
+
>>> m1.record(1)
|
|
164
|
+
|
|
165
|
+
>>> h1 = histogram('histogram1')
|
|
166
|
+
>>> h1.record(1)
|
|
167
|
+
"""
|
|
168
|
+
|
|
169
|
+
@staticmethod
|
|
170
|
+
@gen_metric
|
|
171
|
+
def counter(name, description: str = "", tag_keys: tuple[str, ...] | None = None):
|
|
172
|
+
logger.debug(
|
|
173
|
+
"Initializing a counter with name: %s, tag keys: %s, backend: %s",
|
|
174
|
+
name,
|
|
175
|
+
tag_keys,
|
|
176
|
+
_metric_backend,
|
|
177
|
+
)
|
|
178
|
+
return _backends_cls[_metric_backend].Counter(name, description, tag_keys)
|
|
179
|
+
|
|
180
|
+
@staticmethod
|
|
181
|
+
@gen_metric
|
|
182
|
+
def gauge(name, description: str = "", tag_keys: tuple[str, ...] | None = None):
|
|
183
|
+
logger.debug(
|
|
184
|
+
"Initializing a gauge whose name: %s, tag keys: %s, backend: %s",
|
|
185
|
+
name,
|
|
186
|
+
tag_keys,
|
|
187
|
+
_metric_backend,
|
|
188
|
+
)
|
|
189
|
+
return _backends_cls[_metric_backend].Gauge(name, description, tag_keys)
|
|
190
|
+
|
|
191
|
+
@staticmethod
|
|
192
|
+
@gen_metric
|
|
193
|
+
def meter(name, description: str = "", tag_keys: tuple[str, ...] | None = None):
|
|
194
|
+
logger.debug(
|
|
195
|
+
"Initializing a meter whose name: %s, tag keys: %s, backend: %s",
|
|
196
|
+
name,
|
|
197
|
+
tag_keys,
|
|
198
|
+
_metric_backend,
|
|
199
|
+
)
|
|
200
|
+
return _backends_cls[_metric_backend].Meter(name, description, tag_keys)
|
|
201
|
+
|
|
202
|
+
@staticmethod
|
|
203
|
+
@gen_metric
|
|
204
|
+
def histogram(name, description: str = "", tag_keys: tuple[str, ...] | None = None):
|
|
205
|
+
logger.debug(
|
|
206
|
+
"Initializing a histogram whose name: %s, tag keys: %s, backend: %s",
|
|
207
|
+
name,
|
|
208
|
+
tag_keys,
|
|
209
|
+
_metric_backend,
|
|
210
|
+
)
|
|
211
|
+
return _backends_cls[_metric_backend].Histogram(name, description, tag_keys)
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
class Percentile:
|
|
215
|
+
class PercentileType(Enum):
|
|
216
|
+
P99 = 1
|
|
217
|
+
P95 = 2
|
|
218
|
+
P90 = 3
|
|
219
|
+
|
|
220
|
+
_min_heap: PriorityQueue
|
|
221
|
+
|
|
222
|
+
def __init__(self, capacity: int, window: int, callback: Callable[[float], None]):
|
|
223
|
+
self._capacity = capacity
|
|
224
|
+
self._window = window
|
|
225
|
+
self._callback = callback
|
|
226
|
+
self._min_heap = PriorityQueue()
|
|
227
|
+
self._cur_num = 0
|
|
228
|
+
|
|
229
|
+
if capacity <= 0 or window <= 0:
|
|
230
|
+
raise ValueError(
|
|
231
|
+
f"capacity or window expect to get a positive integer,"
|
|
232
|
+
f"but capacity got: {capacity} and window got: {window}"
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
def record_data(self, value):
|
|
236
|
+
store_value = -1 * value
|
|
237
|
+
if self._min_heap.qsize() < self._capacity:
|
|
238
|
+
self._min_heap.put(store_value)
|
|
239
|
+
else:
|
|
240
|
+
top_value = self._min_heap.get_nowait()
|
|
241
|
+
store_value = store_value if top_value < store_value else top_value
|
|
242
|
+
self._min_heap.put(store_value)
|
|
243
|
+
|
|
244
|
+
self._cur_num += 1
|
|
245
|
+
if self._cur_num % self._window == 0:
|
|
246
|
+
self._callback(-1 * self._min_heap.get_nowait())
|
|
247
|
+
self._cur_num = 0
|
|
248
|
+
self._min_heap = PriorityQueue()
|
|
249
|
+
|
|
250
|
+
@classmethod
|
|
251
|
+
def build_p99(cls, callback: Callable[[float], None], window: int):
|
|
252
|
+
return cls(int(window * 0.01), window, callback)
|
|
253
|
+
|
|
254
|
+
@classmethod
|
|
255
|
+
def build_p95(cls, callback: Callable[[float], None], window: int):
|
|
256
|
+
return cls(int(window * 0.05), window, callback)
|
|
257
|
+
|
|
258
|
+
@classmethod
|
|
259
|
+
def build_p90(cls, callback: Callable[[float], None], window: int):
|
|
260
|
+
return cls(int(window * 0.1), window, callback)
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
_percentile_builder = {
|
|
264
|
+
Percentile.PercentileType.P99: Percentile.build_p99,
|
|
265
|
+
Percentile.PercentileType.P95: Percentile.build_p95,
|
|
266
|
+
Percentile.PercentileType.P90: Percentile.build_p90,
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
class PercentileArg(NamedTuple):
|
|
271
|
+
percentile_type: Percentile.PercentileType
|
|
272
|
+
callback: Callable[[float], None]
|
|
273
|
+
window: int
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
@contextmanager
|
|
277
|
+
def record_time_cost_percentile(percentile_args: list[PercentileArg]):
|
|
278
|
+
percentile_list = [
|
|
279
|
+
_percentile_builder[percentile_type](callback, window)
|
|
280
|
+
for percentile_type, callback, window in percentile_args
|
|
281
|
+
]
|
|
282
|
+
st_time = time.time()
|
|
283
|
+
|
|
284
|
+
yield
|
|
285
|
+
|
|
286
|
+
cost_time = time.time() - st_time
|
|
287
|
+
for percentile in percentile_list:
|
|
288
|
+
percentile.record_data(cost_time)
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# Copyright 2022-2023 XProbe Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# Copyright 2022-2023 XProbe Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|