unique_toolkit 0.0.2__tar.gz → 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. unique_toolkit-0.5.0/CHANGELOG.md +38 -0
  2. unique_toolkit-0.5.0/PKG-INFO +135 -0
  3. unique_toolkit-0.5.0/README.md +74 -0
  4. unique_toolkit-0.5.0/pyproject.toml +44 -0
  5. unique_toolkit-0.5.0/unique_toolkit/app/init_logging.py +31 -0
  6. unique_toolkit-0.5.0/unique_toolkit/app/init_sdk.py +41 -0
  7. unique_toolkit-0.5.0/unique_toolkit/app/performance/async_executor.py +186 -0
  8. unique_toolkit-0.5.0/unique_toolkit/app/performance/async_wrapper.py +28 -0
  9. unique_toolkit-0.5.0/unique_toolkit/app/schemas.py +54 -0
  10. unique_toolkit-0.5.0/unique_toolkit/app/verification.py +58 -0
  11. unique_toolkit-0.5.0/unique_toolkit/chat/schemas.py +30 -0
  12. unique_toolkit-0.5.0/unique_toolkit/chat/service.py +380 -0
  13. unique_toolkit-0.5.0/unique_toolkit/chat/state.py +60 -0
  14. unique_toolkit-0.5.0/unique_toolkit/chat/utils.py +25 -0
  15. unique_toolkit-0.5.0/unique_toolkit/content/schemas.py +90 -0
  16. unique_toolkit-0.5.0/unique_toolkit/content/service.py +356 -0
  17. unique_toolkit-0.5.0/unique_toolkit/content/utils.py +188 -0
  18. unique_toolkit-0.5.0/unique_toolkit/embedding/schemas.py +5 -0
  19. unique_toolkit-0.5.0/unique_toolkit/embedding/service.py +89 -0
  20. unique_toolkit-0.5.0/unique_toolkit/language_model/infos.py +305 -0
  21. unique_toolkit-0.5.0/unique_toolkit/language_model/schemas.py +168 -0
  22. unique_toolkit-0.5.0/unique_toolkit/language_model/service.py +261 -0
  23. unique_toolkit-0.5.0/unique_toolkit/language_model/utils.py +44 -0
  24. unique_toolkit-0.0.2/PKG-INFO +0 -33
  25. unique_toolkit-0.0.2/README.md +0 -14
  26. unique_toolkit-0.0.2/pyproject.toml +0 -71
  27. {unique_toolkit-0.0.2 → unique_toolkit-0.5.0}/LICENSE +0 -0
  28. {unique_toolkit-0.0.2 → unique_toolkit-0.5.0}/unique_toolkit/__init__.py +0 -0
@@ -0,0 +1,38 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [0.5.0] - 2024-07-23
9
+ ### Added
10
+ - Added `unique_toolkit.app` module with the following components:
11
+ - `init_logging.py` for initializing the logger.
12
+ - `init_sdk.py` for initializing the SDK with environment variables.
13
+ - `schemas.py` containing the Event schema.
14
+ - `verification.py` for verifying the endpoint secret and constructing the event.
15
+
16
+ - Added `unique_toolkit.chat` module with the following components:
17
+ - `state.py` containing the `ChatState` class.
18
+ - `service.py` containing the `ChatService` class for managing chat interactions.
19
+ - `schemas.py` containing relevant schemas such as `ChatMessage`.
20
+ - `utils.py` with utility functions for chat interactions.
21
+
22
+ - Added `unique_toolkit.content` module with the following components:
23
+ - `service.py` containing the `ContentService` class for interacting with content.
24
+ - `schemas.py` containing relevant schemas such as `Content` and `ContentChunk`.
25
+ - `utils.py` with utility functions for manipulating content objects.
26
+
27
+ - Added `unique_toolkit.embedding` module with the following components:
28
+ - `service.py` containing the `EmbeddingService` class for working with embeddings.
29
+ - `schemas.py` containing relevant schemas such as `Embeddings`.
30
+
31
+ - Added `unique_toolkit.language_model` module with the following components:
32
+ - `infos.py` containing information on language models deployed on the Unique platform.
33
+ - `service.py` containing the `LanguageModelService` class for interacting with language models.
34
+ - `schemas.py` containing relevant schemas such as `LanguageModelResponse`.
35
+ - `utils.py` with utility functions for parsing language model output.
36
+
37
+ ## [0.0.2] - 2024-07-10
38
+ - Initial release of `unique_toolkit`.
@@ -0,0 +1,135 @@
1
+ Metadata-Version: 2.1
2
+ Name: unique_toolkit
3
+ Version: 0.5.0
4
+ Summary:
5
+ License: MIT
6
+ Author: Martin Fadler
7
+ Author-email: martin.fadler@unique.ch
8
+ Requires-Python: >=3.11,<4.0
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.11
12
+ Classifier: Programming Language :: Python :: 3.12
13
+ Requires-Dist: numpy (>=2.0.1,<3.0.0)
14
+ Requires-Dist: pydantic (>=2.8.2,<3.0.0)
15
+ Requires-Dist: pyhumps (>=3.8.0,<4.0.0)
16
+ Requires-Dist: python-dotenv (>=1.0.1,<2.0.0)
17
+ Requires-Dist: regex (>=2024.5.15,<2025.0.0)
18
+ Requires-Dist: requests (>=2.32.3,<3.0.0)
19
+ Requires-Dist: tiktoken (>=0.7.0,<0.8.0)
20
+ Requires-Dist: typing-extensions (>=4.9.0,<5.0.0)
21
+ Requires-Dist: unique-sdk (>=0.8.10,<0.9.0)
22
+ Description-Content-Type: text/markdown
23
+
24
+ # Unique Toolkit
25
+
26
+ This package provides highlevel abstractions and methods on top of `unique_sdk` to ease application development for the Unique Platform.
27
+
28
+ The Toolkit is structured along the following domains:
29
+ - `unique_toolkit.chat`
30
+ - `unique_toolkit.content`
31
+ - `unique_toolkit.embedding`
32
+ - `unique_toolkit.language_model`
33
+
34
+ Each domain comprises a service class (in `service.py`) which encapsulates the basic functionalities to interact with the domain entities, the schemas
35
+ (in `schemas.py`) used in the service and required for interacting with the service functions, utility functions (in `utils.py`) which give additional
36
+ functionality to interact with the domain entities (all domains except Embedding) and other domain specific functionalities which are explained in the respective domain documentation.
37
+
38
+ In addition, the `app` module provides functions to initialize and secure apps and perform parallel reuqests (only with async app like Flask) that will interact with the Unique platform.
39
+
40
+ ## Changelog
41
+
42
+ See the [CHANGELOG.md](https://github.com/Unique-AG/ai/blob/main/unique_toolkit/CHANGELOG.md) file for details on changes and version history.
43
+
44
+ # Domains
45
+
46
+ ## App
47
+
48
+ The `unique_toolkit.app` module encompasses functions for initializing and securing apps that will interact with the Unique platform.
49
+
50
+ - `init_logging.py` can be used to initalize the logger either with unique dictConfig or an any other dictConfig.
51
+ - `init_sdk.py` can be used to initialize the sdk using the correct env variables and retrieving the endpoint secret.
52
+ - `schemas.py` contains the Event schema which can be used to parse and validate the unique.chat.external-module.chosen event.
53
+ - `verification.py` can be used to verify the endpoint secret and construct the event.
54
+
55
+ ## Chat
56
+
57
+ The `unique_toolkit.chat` module encompasses all chat related functionality.
58
+
59
+ - `state.py` comprises the ChatState which is used to store the current state of the chat interaction and the user information.
60
+ - `service.py` comprises the ChatService and provides an interface to manage and load the chat history and interact with the chat ui, e.g., creating a new assistant message.
61
+ - `schemas.py` comprises all relevant schemas, e.g., ChatMessage, used in the ChatService.
62
+ - `utils.py` comprises utility functions to use and convert ChatMessage objects in assistants, e.g., convert_chat_history_to_injectable_string converts the chat history to a string that can be injected into a prompt.
63
+
64
+ ## Content
65
+
66
+ The `unique_toolkit.content` module encompasses all content related functionality. Content can be any type of textual data that is stored in the Knowledgebase on the Unique platform. During the ingestion of the content, the content is parsed, split in chunks, indexed, and stored in the database.
67
+
68
+ - `service.py` comprises the ContentService and provides an interface to interact with the content, e.g., search content, search content chunks, upload and download content.
69
+ - `schemas.py` comprises all relevant schemas, e.g., Content and ContentChunk, used in the ContentService.
70
+ - `utils.py` comprise utility functions to manipulate Content and ContentChunk objects, e.g., sort_content_chunks and merge_content_chunks.
71
+
72
+ ## Embedding
73
+
74
+ The `unique_toolkit.embedding` module encompasses all embedding related functionality. Embeddings are used to represent textual data in a high-dimensional space. The embeddings can be used to calculate the similarity between two texts, for instance.
75
+
76
+ - `service.py` encompasses the EmbeddingService and provides an interface to interact with the embeddings, e.g., embed text and calculate the similarity between two texts.
77
+ - `schemas.py` comprises all relevant schemas, e.g., Embeddings, used in the EmbeddingService.
78
+
79
+ ## Language Model
80
+
81
+ The `unique_toolkit.language_model` module encompasses all language model related functionality and information on the different language models deployed through the
82
+ Unique platform.
83
+
84
+ - `infos.py` comprises the information on all language models deployed through the Unique platform. We recommend to use the LanguageModel class, initialized with the LanguageModelName, e.g., LanguageModel(LanguageModelName.AZURE_GPT_35_TURBO_16K) to get the information on the specific language model like the name, version, token limits or retirement date.
85
+ - `service.py` comprises the LanguageModelService and provides an interface to interact with the language models, e.g., complete or stream_complete.
86
+ - `schemas.py` comprises all relevant schemas, e.g., LanguageModelResponse, used in the LanguageModelService.
87
+ - `utils.py` comprises utility functions to parse the output of the language model, e.g., convert_string_to_json finds and parses the last json object in a string.
88
+
89
+ # Development instructions
90
+
91
+ 1. Install poetry on your system (through `brew` or `pipx`).
92
+
93
+ 2. Install `pyenv` and install python 3.11. `pyenv` is recommended as otherwise poetry uses the python version used to install itself and not the user preferred python version.
94
+
95
+ 3. If you then run `python --version` in your terminal, you should be able to see python version as specified in `.python-version`.
96
+
97
+ 4. Then finally run `poetry install` to install the package and all dependencies.
98
+ # Changelog
99
+
100
+ All notable changes to this project will be documented in this file.
101
+
102
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
103
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
104
+
105
+ ## [0.5.0] - 2024-07-23
106
+ ### Added
107
+ - Added `unique_toolkit.app` module with the following components:
108
+ - `init_logging.py` for initializing the logger.
109
+ - `init_sdk.py` for initializing the SDK with environment variables.
110
+ - `schemas.py` containing the Event schema.
111
+ - `verification.py` for verifying the endpoint secret and constructing the event.
112
+
113
+ - Added `unique_toolkit.chat` module with the following components:
114
+ - `state.py` containing the `ChatState` class.
115
+ - `service.py` containing the `ChatService` class for managing chat interactions.
116
+ - `schemas.py` containing relevant schemas such as `ChatMessage`.
117
+ - `utils.py` with utility functions for chat interactions.
118
+
119
+ - Added `unique_toolkit.content` module with the following components:
120
+ - `service.py` containing the `ContentService` class for interacting with content.
121
+ - `schemas.py` containing relevant schemas such as `Content` and `ContentChunk`.
122
+ - `utils.py` with utility functions for manipulating content objects.
123
+
124
+ - Added `unique_toolkit.embedding` module with the following components:
125
+ - `service.py` containing the `EmbeddingService` class for working with embeddings.
126
+ - `schemas.py` containing relevant schemas such as `Embeddings`.
127
+
128
+ - Added `unique_toolkit.language_model` module with the following components:
129
+ - `infos.py` containing information on language models deployed on the Unique platform.
130
+ - `service.py` containing the `LanguageModelService` class for interacting with language models.
131
+ - `schemas.py` containing relevant schemas such as `LanguageModelResponse`.
132
+ - `utils.py` with utility functions for parsing language model output.
133
+
134
+ ## [0.0.2] - 2024-07-10
135
+ - Initial release of `unique_toolkit`.
@@ -0,0 +1,74 @@
1
+ # Unique Toolkit
2
+
3
+ This package provides highlevel abstractions and methods on top of `unique_sdk` to ease application development for the Unique Platform.
4
+
5
+ The Toolkit is structured along the following domains:
6
+ - `unique_toolkit.chat`
7
+ - `unique_toolkit.content`
8
+ - `unique_toolkit.embedding`
9
+ - `unique_toolkit.language_model`
10
+
11
+ Each domain comprises a service class (in `service.py`) which encapsulates the basic functionalities to interact with the domain entities, the schemas
12
+ (in `schemas.py`) used in the service and required for interacting with the service functions, utility functions (in `utils.py`) which give additional
13
+ functionality to interact with the domain entities (all domains except Embedding) and other domain specific functionalities which are explained in the respective domain documentation.
14
+
15
+ In addition, the `app` module provides functions to initialize and secure apps and perform parallel reuqests (only with async app like Flask) that will interact with the Unique platform.
16
+
17
+ ## Changelog
18
+
19
+ See the [CHANGELOG.md](https://github.com/Unique-AG/ai/blob/main/unique_toolkit/CHANGELOG.md) file for details on changes and version history.
20
+
21
+ # Domains
22
+
23
+ ## App
24
+
25
+ The `unique_toolkit.app` module encompasses functions for initializing and securing apps that will interact with the Unique platform.
26
+
27
+ - `init_logging.py` can be used to initalize the logger either with unique dictConfig or an any other dictConfig.
28
+ - `init_sdk.py` can be used to initialize the sdk using the correct env variables and retrieving the endpoint secret.
29
+ - `schemas.py` contains the Event schema which can be used to parse and validate the unique.chat.external-module.chosen event.
30
+ - `verification.py` can be used to verify the endpoint secret and construct the event.
31
+
32
+ ## Chat
33
+
34
+ The `unique_toolkit.chat` module encompasses all chat related functionality.
35
+
36
+ - `state.py` comprises the ChatState which is used to store the current state of the chat interaction and the user information.
37
+ - `service.py` comprises the ChatService and provides an interface to manage and load the chat history and interact with the chat ui, e.g., creating a new assistant message.
38
+ - `schemas.py` comprises all relevant schemas, e.g., ChatMessage, used in the ChatService.
39
+ - `utils.py` comprises utility functions to use and convert ChatMessage objects in assistants, e.g., convert_chat_history_to_injectable_string converts the chat history to a string that can be injected into a prompt.
40
+
41
+ ## Content
42
+
43
+ The `unique_toolkit.content` module encompasses all content related functionality. Content can be any type of textual data that is stored in the Knowledgebase on the Unique platform. During the ingestion of the content, the content is parsed, split in chunks, indexed, and stored in the database.
44
+
45
+ - `service.py` comprises the ContentService and provides an interface to interact with the content, e.g., search content, search content chunks, upload and download content.
46
+ - `schemas.py` comprises all relevant schemas, e.g., Content and ContentChunk, used in the ContentService.
47
+ - `utils.py` comprise utility functions to manipulate Content and ContentChunk objects, e.g., sort_content_chunks and merge_content_chunks.
48
+
49
+ ## Embedding
50
+
51
+ The `unique_toolkit.embedding` module encompasses all embedding related functionality. Embeddings are used to represent textual data in a high-dimensional space. The embeddings can be used to calculate the similarity between two texts, for instance.
52
+
53
+ - `service.py` encompasses the EmbeddingService and provides an interface to interact with the embeddings, e.g., embed text and calculate the similarity between two texts.
54
+ - `schemas.py` comprises all relevant schemas, e.g., Embeddings, used in the EmbeddingService.
55
+
56
+ ## Language Model
57
+
58
+ The `unique_toolkit.language_model` module encompasses all language model related functionality and information on the different language models deployed through the
59
+ Unique platform.
60
+
61
+ - `infos.py` comprises the information on all language models deployed through the Unique platform. We recommend to use the LanguageModel class, initialized with the LanguageModelName, e.g., LanguageModel(LanguageModelName.AZURE_GPT_35_TURBO_16K) to get the information on the specific language model like the name, version, token limits or retirement date.
62
+ - `service.py` comprises the LanguageModelService and provides an interface to interact with the language models, e.g., complete or stream_complete.
63
+ - `schemas.py` comprises all relevant schemas, e.g., LanguageModelResponse, used in the LanguageModelService.
64
+ - `utils.py` comprises utility functions to parse the output of the language model, e.g., convert_string_to_json finds and parses the last json object in a string.
65
+
66
+ # Development instructions
67
+
68
+ 1. Install poetry on your system (through `brew` or `pipx`).
69
+
70
+ 2. Install `pyenv` and install python 3.11. `pyenv` is recommended as otherwise poetry uses the python version used to install itself and not the user preferred python version.
71
+
72
+ 3. If you then run `python --version` in your terminal, you should be able to see python version as specified in `.python-version`.
73
+
74
+ 4. Then finally run `poetry install` to install the package and all dependencies.
@@ -0,0 +1,44 @@
1
+ [tool.poetry]
2
+ name = "unique_toolkit"
3
+ version = "0.5.0"
4
+ description = ""
5
+ authors = [
6
+ "Martin Fadler <martin.fadler@unique.ch>",
7
+ "Sadique Sheik <sadique@unique.ch>",
8
+ "Fabian Schläpfer <fabian@unique.ch>",
9
+ "Pascal Hauri <pascal@unique.ch>",
10
+ ]
11
+ readme = ["README.md", "CHANGELOG.md"]
12
+ license = "MIT"
13
+
14
+ [tool.poetry.dependencies]
15
+ python = "^3.11"
16
+ typing-extensions = "^4.9.0"
17
+ pydantic = "^2.8.2"
18
+ pyhumps = "^3.8.0"
19
+ unique-sdk = "^0.8.10"
20
+ numpy = "^2.0.1"
21
+ python-dotenv = "^1.0.1"
22
+ requests = "^2.32.3"
23
+ regex = "^2024.5.15"
24
+ tiktoken = "^0.7.0"
25
+
26
+
27
+ [tool.poetry.group.dev.dependencies]
28
+ ruff = "0.5.0"
29
+ pytest = "^7.4.3"
30
+ tox = "^4.11.4"
31
+ pyright = "^1.1.341"
32
+ pytest-cov = "^4.1.0"
33
+ pre-commit = "^3.7.1"
34
+ pytest-asyncio = "^0.23.8"
35
+
36
+ [build-system]
37
+ requires = ["poetry-core"]
38
+ build-backend = "poetry.core.masonry.api"
39
+
40
+ [tool.ruff]
41
+ target-version = "py311"
42
+
43
+ [tool.ruff.lint]
44
+ extend-select = ["I"]
@@ -0,0 +1,31 @@
1
+ from logging import Formatter
2
+ from logging.config import dictConfig
3
+ from time import gmtime
4
+
5
+
6
+ class UTCFormatter(Formatter):
7
+ converter = gmtime
8
+
9
+
10
+ unique_log_config = {
11
+ "version": 1,
12
+ "root": {"level": "DEBUG", "handlers": ["console"]},
13
+ "handlers": {
14
+ "console": {
15
+ "class": "logging.StreamHandler",
16
+ "level": "DEBUG",
17
+ "formatter": "utc",
18
+ }
19
+ },
20
+ "formatters": {
21
+ "utc": {
22
+ "()": UTCFormatter,
23
+ "format": "%(asctime)s: %(message)s",
24
+ "datefmt": "%Y-%m-%d %H:%M:%S",
25
+ },
26
+ },
27
+ }
28
+
29
+
30
+ def init_logging(config: dict = unique_log_config):
31
+ return dictConfig(config)
@@ -0,0 +1,41 @@
1
+ import os
2
+
3
+ import unique_sdk
4
+
5
+
6
+ def get_env(var_name, default=None, strict=False):
7
+ """Get the environment variable.
8
+
9
+ Args:
10
+ var_name (str): Name of the environment variable.
11
+ default (str, optional): Default value. Defaults to None.
12
+ strict (bool, optional): This method raises a ValueError, if strict, and no value is found in the environment. Defaults to False.
13
+
14
+ Raises:
15
+ ValueError: _description_
16
+
17
+ Returns:
18
+ _type_: _description_
19
+ """
20
+ val = os.environ.get(var_name)
21
+ if not val:
22
+ if strict:
23
+ raise ValueError(f"{var_name} is not set")
24
+ return val or default
25
+
26
+
27
+ def init_sdk(strict_all_vars=False):
28
+ """Initialize the SDK.
29
+
30
+ Args:
31
+ strict_all_vars (bool, optional): This method raises a ValueError if strict and no value is found in the environment. Defaults to False.
32
+ """
33
+ unique_sdk.api_key = get_env("API_KEY", default="dummy", strict=strict_all_vars)
34
+ unique_sdk.app_id = get_env("APP_ID", default="dummy", strict=strict_all_vars)
35
+ unique_sdk.api_base = get_env("API_BASE", default=None, strict=strict_all_vars)
36
+
37
+
38
+ def get_endpoint_secret():
39
+ """Fetch endpoint secret from the environment."""
40
+ endpoint_secret = os.getenv("ENDPOINT_SECRET")
41
+ return endpoint_secret
@@ -0,0 +1,186 @@
1
+ import asyncio
2
+ import contextlib
3
+ import logging
4
+ import threading
5
+ import time
6
+ from concurrent.futures import ThreadPoolExecutor
7
+ from math import ceil
8
+ from typing import (
9
+ AsyncContextManager,
10
+ Awaitable,
11
+ Callable,
12
+ Optional,
13
+ Sequence,
14
+ TypeVar,
15
+ Union,
16
+ )
17
+
18
+ T = TypeVar("T")
19
+ Result = Union[T, BaseException]
20
+
21
+
22
+ class AsyncExecutor:
23
+ """
24
+ A class for executing asynchronous tasks concurrently, with optional threading support.
25
+
26
+ This class provides methods to run multiple asynchronous tasks in parallel, with
27
+ the ability to limit the number of concurrent tasks and distribute work across
28
+ multiple threads if needed.
29
+
30
+ Attributes:
31
+ logger (logging.Logger): Logger instance for recording execution information.
32
+ context_manager (Callable[[], AsyncContextManager]): A factory function that returns
33
+ an async context manager to be used for each task execution, e.g., quart.current_app.app_context().
34
+
35
+ """
36
+
37
+ def __init__(
38
+ self,
39
+ logger: Optional[logging.Logger] = None,
40
+ context_manager: Optional[Callable[[], AsyncContextManager]] = None,
41
+ ) -> None:
42
+ self.logger = logger or logging.getLogger(__name__)
43
+ self.context_manager = context_manager or (lambda: contextlib.nullcontext())
44
+
45
+ async def run_async_tasks(
46
+ self,
47
+ tasks: Sequence[Awaitable[T]],
48
+ max_tasks: int,
49
+ ) -> list[Result]:
50
+ """
51
+ Executes the a set of given async tasks and returns the results.
52
+
53
+ Args:
54
+ tasks (list[Awaitable[T]]): list of async callables to execute in parallel.
55
+ max_tasks (int): Maximum number of tasks for the asyncio Semaphore.
56
+
57
+ Returns:
58
+ list[Result]: list of results from the executed tasks.
59
+ """
60
+
61
+ async def logging_wrapper(task: Awaitable[T], task_id: int) -> Result:
62
+ thread = threading.current_thread()
63
+ start_time = time.time()
64
+
65
+ self.logger.info(
66
+ f"Thread {thread.name} (ID: {thread.ident}) starting task {task_id}"
67
+ )
68
+
69
+ try:
70
+ async with self.context_manager():
71
+ result = await task
72
+ return result
73
+ except Exception as e:
74
+ self.logger.error(
75
+ f"Thread {thread.name} (ID: {thread.ident}) - Task {task_id} failed with error: {e}"
76
+ )
77
+ return e
78
+ finally:
79
+ end_time = time.time()
80
+ duration = end_time - start_time
81
+ self.logger.debug(
82
+ f"Thread {thread.name} (ID: {thread.ident}) - Task {task_id} finished in {duration:.2f} seconds"
83
+ )
84
+
85
+ sem = asyncio.Semaphore(max_tasks)
86
+
87
+ async def sem_task(task: Awaitable[T], task_id: int) -> Result:
88
+ async with sem:
89
+ return await logging_wrapper(task, task_id)
90
+
91
+ wrapped_tasks: list[Awaitable[Result]] = [
92
+ sem_task(task, i) for i, task in enumerate(tasks)
93
+ ]
94
+
95
+ results: list[Result] = await asyncio.gather(
96
+ *wrapped_tasks, return_exceptions=True
97
+ )
98
+
99
+ return results
100
+
101
+ async def run_async_tasks_in_threads(
102
+ self,
103
+ tasks: Sequence[Awaitable[T]],
104
+ max_threads: int,
105
+ max_tasks: int,
106
+ ) -> list[Result[T]]:
107
+ """
108
+ Executes the given async tasks in multiple threads and returns the results.
109
+
110
+ Args:
111
+ tasks (list[Awaitable[T]]): list of async callables to execute in parallel.
112
+ max_threads (int): Maximum number of threads.
113
+ max_tasks (int): Maximum number of tasks per thread run in parallel.
114
+
115
+ Returns:
116
+ list[Result]: list of results from the executed tasks.
117
+ """
118
+
119
+ async def run_in_thread(task_chunk: list[Awaitable[T]]) -> list[Result]:
120
+ loop = asyncio.new_event_loop()
121
+ asyncio.set_event_loop(loop)
122
+ async with self.context_manager():
123
+ return await self.run_async_tasks(task_chunk, max_tasks)
124
+
125
+ def thread_worker(
126
+ task_chunk: list[Awaitable[T]], chunk_id: int
127
+ ) -> list[Result]:
128
+ thread = threading.current_thread()
129
+ self.logger.info(
130
+ f"Thread {thread.name} (ID: {thread.ident}) starting chunk {chunk_id} with {len(task_chunk)} tasks"
131
+ )
132
+
133
+ start_time = time.time()
134
+ loop = asyncio.new_event_loop()
135
+ asyncio.set_event_loop(loop)
136
+
137
+ try:
138
+ results = loop.run_until_complete(run_in_thread(task_chunk))
139
+ end_time = time.time()
140
+ duration = end_time - start_time
141
+ self.logger.info(
142
+ f"Thread {thread.name} (ID: {thread.ident}) finished chunk {chunk_id} in {duration:.2f} seconds"
143
+ )
144
+ return results
145
+ except Exception as e:
146
+ self.logger.error(
147
+ f"Thread {thread.name} (ID: {thread.ident}) encountered an error in chunk {chunk_id}: {str(e)}"
148
+ )
149
+ raise
150
+ finally:
151
+ loop.close()
152
+
153
+ start_time = time.time()
154
+ # Calculate the number of tasks per thread
155
+ tasks_per_thread: int = ceil(len(tasks) / max_threads)
156
+
157
+ # Split tasks into chunks
158
+ task_chunks: list[Sequence[Awaitable[T]]] = [
159
+ tasks[i : i + tasks_per_thread]
160
+ for i in range(0, len(tasks), tasks_per_thread)
161
+ ]
162
+
163
+ self.logger.info(
164
+ f"Splitting {len(tasks)} tasks into {len(task_chunks)} chunks across {max_threads} threads"
165
+ )
166
+
167
+ # Use ThreadPoolExecutor to manage threads
168
+ with ThreadPoolExecutor(max_workers=max_threads) as executor:
169
+ # Submit each chunk of tasks to a thread
170
+ future_results: list[list[Result]] = list(
171
+ executor.map(
172
+ thread_worker,
173
+ task_chunks,
174
+ range(len(task_chunks)), # chunk_id
175
+ )
176
+ )
177
+
178
+ # Flatten the results from all threads
179
+ results: list[Result] = [item for sublist in future_results for item in sublist]
180
+ end_time = time.time()
181
+ duration = end_time - start_time
182
+ self.logger.info(
183
+ f"All threads completed. Total results: {len(results)}. Duration: {duration:.2f} seconds"
184
+ )
185
+
186
+ return results
@@ -0,0 +1,28 @@
1
+ import asyncio
2
+ import warnings
3
+ from functools import wraps
4
+ from typing import Any, Callable, Coroutine, TypeVar
5
+
6
+ T = TypeVar("T")
7
+
8
+
9
+ def to_async(func: Callable[..., T]) -> Callable[..., Coroutine[Any, Any, T]]:
10
+ @wraps(func)
11
+ async def wrapper(*args, **kwargs) -> T:
12
+ return await asyncio.to_thread(func, *args, **kwargs)
13
+
14
+ return wrapper
15
+
16
+
17
+ def async_warning(func):
18
+ @wraps(func)
19
+ async def wrapper(*args, **kwargs):
20
+ warnings.warn(
21
+ f"The function '{func.__name__}' is not purely async. It uses a thread pool executor underneath, "
22
+ "which may impact performance for CPU-bound operations.",
23
+ RuntimeWarning,
24
+ stacklevel=2,
25
+ )
26
+ return await func(*args, **kwargs)
27
+
28
+ return wrapper
@@ -0,0 +1,54 @@
1
+ from enum import StrEnum
2
+ from typing import Any
3
+
4
+ from humps import camelize
5
+ from pydantic import BaseModel, ConfigDict
6
+
7
+ # set config to convert camelCase to snake_case
8
+ model_config = ConfigDict(
9
+ alias_generator=camelize, populate_by_name=True, arbitrary_types_allowed=True
10
+ )
11
+
12
+
13
+ class EventName(StrEnum):
14
+ EXTERNAL_MODULE_CHOSEN = "unique.chat.external-module.chosen"
15
+
16
+
17
+ class EventUserMessage(BaseModel):
18
+ model_config = model_config
19
+
20
+ id: str
21
+ text: str
22
+ created_at: str
23
+
24
+
25
+ class EventAssistantMessage(BaseModel):
26
+ model_config = model_config
27
+
28
+ id: str
29
+ created_at: str
30
+
31
+
32
+ class EventPayload(BaseModel):
33
+ model_config = model_config
34
+
35
+ name: EventName
36
+ description: str
37
+ configuration: dict[str, Any]
38
+ chat_id: str
39
+ assistant_id: str
40
+ user_message: EventUserMessage
41
+ assistant_message: EventAssistantMessage
42
+ text: str | None = None
43
+
44
+
45
+ class Event(BaseModel):
46
+ model_config = model_config
47
+
48
+ id: str
49
+ event: str
50
+ user_id: str
51
+ company_id: str
52
+ payload: EventPayload
53
+ created_at: int | None = None
54
+ version: str | None = None