speedy-utils 1.1.47__tar.gz → 1.1.48__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/PKG-INFO +1 -1
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/pyproject.toml +1 -1
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/src/llm_utils/__init__.py +1 -3
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/src/llm_utils/chat_format/__init__.py +0 -2
- speedy_utils-1.1.48/src/llm_utils/chat_format/display.py +384 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/src/llm_utils/lm/llm.py +21 -10
- speedy_utils-1.1.47/src/llm_utils/chat_format/display.py +0 -465
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/.githooks/pre-push +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/.github/copilot-instructions.md +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/.github/prompts/improveParallelErrorHandling.prompt.md +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/.github/skills/caching-utilities/SKILL.md +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/.github/skills/caching-utilities/examples/caching_example.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/.github/skills/dataset-processing-multiprocessing/CONFIG_REFERENCE.md +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/.github/skills/dataset-processing-multiprocessing/SKILL.md +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/.github/skills/dataset-processing-multiprocessing/example_tokenize_pack.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/.github/skills/edit-llm-inference-style/SKILL.md +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/.github/skills/io-utilities/SKILL.md +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/.github/skills/io-utilities/examples/io_example.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/.github/skills/llm-integration/SKILL.md +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/.github/skills/llm-integration/examples/llm_example.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/.github/skills/multi-threading-processing/SKILL.md +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/.github/skills/ray-distributed-computing/SKILL.md +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/.github/skills/skill-creation/SKILL.md +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/.github/skills/vision-utilities/SKILL.md +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/.github/skills/vision-utilities/examples/vision_example.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/.github/workflows/publish.yml +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/.gitignore +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/.pre-commit-config.yaml +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/AGENTS.md +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/README.md +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/bumpversion.sh +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/debug/debug_generate_response.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/debug/debug_n_param.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/debug/debug_n_structure.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/debug/integration_test.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/debug/test_decode_api.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/debug/test_endpoints.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/debug/test_generate.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/debug/test_generate_endpoint.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/docs/GENERATE_QUICKREF.md +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/docs/IMPLEMENTATION.md +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/docs/QUICKSTART.md +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/docs/SKILL.md +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/docs/TOKENIZATION.md +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/docs/TOKENIZATION_IMPLEMENTATION.md +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/docs/zero_copy_sharing.md +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/examples/generate_example.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/examples/llm_ray_example.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/examples/pytorch_large_model.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/examples/shared_kwargs_example.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/examples/temperature_range_example.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/examples/test_parallel_gpu.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/examples/test_share_ray.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/examples/tokenization_example.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/examples/vision_utils_example.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/experiments/exp1/dockerfile +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/experiments/exp1/run_in_docker.sh +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/experiments/exp1/test.png +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/experiments/test_read_image.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/notebooks/README.ipynb +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/notebooks/llm_utils/llm_as_a_judge.ipynb +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/notebooks/parallel_gpu_pool.ipynb +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/notebooks/ray_tutorial.ipynb +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/notebooks/test_multi_thread.ipynb +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/ruff.toml +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/scripts/bug.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/scripts/bug_simple.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/scripts/debug_import_time.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/scripts/deploy.sh +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/scripts/imports.sh +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/scripts/test.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/scripts/test_both_backends.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/scripts/test_error_handling.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/scripts/test_import_time_vision.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/scripts/test_locals.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/scripts/test_ray_locals.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/scripts/test_ray_mp.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/setup.cfg +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/src/datasets_utils/convert_to_arrow.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/src/llm_utils/chat_format/transform.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/src/llm_utils/chat_format/utils.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/src/llm_utils/group_messages.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/src/llm_utils/llm_ray.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/src/llm_utils/lm/__init__.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/src/llm_utils/lm/async_lm/__init__.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/src/llm_utils/lm/async_lm/_utils.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/src/llm_utils/lm/async_lm/async_llm_task.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/src/llm_utils/lm/async_lm/async_lm.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/src/llm_utils/lm/async_lm/async_lm_base.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/src/llm_utils/lm/async_lm/lm_specific.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/src/llm_utils/lm/base_prompt_builder.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/src/llm_utils/lm/llm_signature.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/src/llm_utils/lm/lm_base.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/src/llm_utils/lm/mixins.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/src/llm_utils/lm/openai_memoize.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/src/llm_utils/lm/signature.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/src/llm_utils/lm/utils.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/src/llm_utils/scripts/README.md +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/src/llm_utils/scripts/fast_vllm.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/src/llm_utils/scripts/vllm_load_balancer.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/src/llm_utils/scripts/vllm_serve.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/src/llm_utils/vector_cache/__init__.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/src/llm_utils/vector_cache/cli.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/src/llm_utils/vector_cache/core.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/src/llm_utils/vector_cache/types.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/src/llm_utils/vector_cache/utils.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/src/speedy_utils/__imports.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/src/speedy_utils/__init__.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/src/speedy_utils/common/__init__.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/src/speedy_utils/common/clock.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/src/speedy_utils/common/function_decorator.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/src/speedy_utils/common/logger.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/src/speedy_utils/common/notebook_utils.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/src/speedy_utils/common/patcher.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/src/speedy_utils/common/report_manager.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/src/speedy_utils/common/utils_cache.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/src/speedy_utils/common/utils_error.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/src/speedy_utils/common/utils_io.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/src/speedy_utils/common/utils_misc.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/src/speedy_utils/common/utils_print.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/src/speedy_utils/multi_worker/__init__.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/src/speedy_utils/multi_worker/_multi_process.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/src/speedy_utils/multi_worker/_multi_process_ray.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/src/speedy_utils/multi_worker/common.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/src/speedy_utils/multi_worker/dataset_ray.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/src/speedy_utils/multi_worker/dataset_sharding.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/src/speedy_utils/multi_worker/parallel_gpu_pool.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/src/speedy_utils/multi_worker/process.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/src/speedy_utils/multi_worker/progress.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/src/speedy_utils/multi_worker/thread.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/src/speedy_utils/scripts/__init__.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/src/speedy_utils/scripts/kill_mpython.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/src/speedy_utils/scripts/mpython.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/src/speedy_utils/scripts/openapi_client_codegen.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/src/vision_utils/README.md +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/src/vision_utils/__init__.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/src/vision_utils/io_utils.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/src/vision_utils/plot.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/test.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/tests/import_all.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/tests/import_time_report.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/tests/integration_test.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/tests/llm_utils/test_llm_mixins.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/tests/sample_objects.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/tests/test.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/tests/test_logger.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/tests/test_logger_format.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/tests/test_memoize_typing.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/tests/test_mpython.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/tests/test_multithread_error_trace.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/tests/test_process.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/tests/test_process_update.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/tests/test_pytorch_sharing.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/tests/test_shared_kwargs.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/tests/test_thread.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/tests/test_tokenization.py +0 -0
- {speedy_utils-1.1.47 → speedy_utils-1.1.48}/uv.lock +0 -0
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
from llm_utils.llm_ray import LLMRay
|
|
1
2
|
from llm_utils.lm import (
|
|
2
3
|
LLM,
|
|
3
4
|
AsyncLLMTask,
|
|
@@ -12,7 +13,6 @@ from llm_utils.lm import (
|
|
|
12
13
|
from llm_utils.lm.base_prompt_builder import BasePromptBuilder
|
|
13
14
|
from llm_utils.lm.lm_base import get_model_name
|
|
14
15
|
from llm_utils.lm.openai_memoize import MOpenAI
|
|
15
|
-
from llm_utils.llm_ray import LLMRay
|
|
16
16
|
from llm_utils.vector_cache import VectorCache
|
|
17
17
|
|
|
18
18
|
|
|
@@ -37,7 +37,6 @@ from llm_utils.chat_format import (
|
|
|
37
37
|
format_msgs,
|
|
38
38
|
get_conversation_one_turn,
|
|
39
39
|
show_chat,
|
|
40
|
-
show_chat_v2,
|
|
41
40
|
show_string_diff,
|
|
42
41
|
transform_messages,
|
|
43
42
|
transform_messages_to_chatml,
|
|
@@ -54,7 +53,6 @@ __all__ = [
|
|
|
54
53
|
"build_chatml_input",
|
|
55
54
|
"format_msgs",
|
|
56
55
|
"display_chat_messages_as_html",
|
|
57
|
-
"show_chat_v2",
|
|
58
56
|
"AsyncLM",
|
|
59
57
|
"AsyncLLMTask",
|
|
60
58
|
"LLM",
|
|
@@ -4,7 +4,6 @@ from .display import (
|
|
|
4
4
|
get_conversation_one_turn,
|
|
5
5
|
highlight_diff_chars,
|
|
6
6
|
show_chat,
|
|
7
|
-
show_chat_v2,
|
|
8
7
|
show_string_diff,
|
|
9
8
|
)
|
|
10
9
|
from .transform import (
|
|
@@ -32,5 +31,4 @@ __all__ = [
|
|
|
32
31
|
"show_string_diff",
|
|
33
32
|
"display_conversations",
|
|
34
33
|
"display_chat_messages_as_html",
|
|
35
|
-
"show_chat_v2",
|
|
36
34
|
]
|
|
@@ -0,0 +1,384 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from difflib import SequenceMatcher
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from IPython.display import HTML, display
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _preprocess_as_json(content: str) -> str:
|
|
11
|
+
"""Preprocess content as JSON with proper formatting."""
|
|
12
|
+
try:
|
|
13
|
+
parsed = json.loads(content)
|
|
14
|
+
return json.dumps(parsed, indent=2, ensure_ascii=False)
|
|
15
|
+
except (json.JSONDecodeError, TypeError):
|
|
16
|
+
return content
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _preprocess_as_markdown(content: str) -> str:
|
|
20
|
+
"""Preprocess content as markdown with proper formatting."""
|
|
21
|
+
lines = content.split('\n')
|
|
22
|
+
processed_lines = []
|
|
23
|
+
|
|
24
|
+
for line in lines:
|
|
25
|
+
# Convert **bold** to span with bold styling
|
|
26
|
+
while '**' in line:
|
|
27
|
+
first_pos = line.find('**')
|
|
28
|
+
if first_pos == -1:
|
|
29
|
+
break
|
|
30
|
+
second_pos = line.find('**', first_pos + 2)
|
|
31
|
+
if second_pos == -1:
|
|
32
|
+
break
|
|
33
|
+
before = line[:first_pos]
|
|
34
|
+
bold_text = line[first_pos + 2 : second_pos]
|
|
35
|
+
after = line[second_pos + 2 :]
|
|
36
|
+
line = f'{before}<span style="font-weight: bold;">{bold_text}</span>{after}'
|
|
37
|
+
|
|
38
|
+
# Convert *italic* to span with italic styling
|
|
39
|
+
while '*' in line and line.count('*') >= 2:
|
|
40
|
+
first_pos = line.find('*')
|
|
41
|
+
if first_pos == -1:
|
|
42
|
+
break
|
|
43
|
+
second_pos = line.find('*', first_pos + 1)
|
|
44
|
+
if second_pos == -1:
|
|
45
|
+
break
|
|
46
|
+
before = line[:first_pos]
|
|
47
|
+
italic_text = line[first_pos + 1 : second_pos]
|
|
48
|
+
after = line[second_pos + 1 :]
|
|
49
|
+
line = (
|
|
50
|
+
f'{before}<span style="font-style: italic;">{italic_text}</span>{after}'
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
# Convert # headers to bold headers
|
|
54
|
+
if line.strip().startswith('#'):
|
|
55
|
+
level = len(line) - len(line.lstrip('#'))
|
|
56
|
+
header_text = line.lstrip('# ').strip()
|
|
57
|
+
line = f'<span style="font-weight: bold; font-size: 1.{min(4, level)}em;">{header_text}</span>'
|
|
58
|
+
|
|
59
|
+
processed_lines.append(line)
|
|
60
|
+
|
|
61
|
+
return '\n'.join(processed_lines)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _truncate_text(text: str, max_length: int, head_ratio: float = 0.3) -> str:
|
|
65
|
+
"""
|
|
66
|
+
Truncate text if it exceeds max_length, showing head and tail with skip indicator.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
text: Text to truncate
|
|
70
|
+
max_length: Maximum length before truncation
|
|
71
|
+
head_ratio: Ratio of max_length to show at the head (default 0.3)
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
Original text if within limit, otherwise truncated with [SKIP n chars] indicator
|
|
75
|
+
"""
|
|
76
|
+
if len(text) <= max_length:
|
|
77
|
+
return text
|
|
78
|
+
|
|
79
|
+
head_len = int(max_length * head_ratio)
|
|
80
|
+
tail_len = max_length - head_len
|
|
81
|
+
skip_len = len(text) - head_len - tail_len
|
|
82
|
+
|
|
83
|
+
return f'{text[:head_len]}\n...[SKIP {skip_len} chars]...\n{text[-tail_len:]}'
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _format_reasoning_content(
|
|
87
|
+
reasoning: str, max_reasoning_length: int | None = None
|
|
88
|
+
) -> str:
|
|
89
|
+
"""
|
|
90
|
+
Format reasoning content with <think> tags.
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
reasoning: The reasoning content
|
|
94
|
+
max_reasoning_length: Max length before truncation (None = no truncation)
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
Formatted reasoning with <think> tags
|
|
98
|
+
"""
|
|
99
|
+
if max_reasoning_length is not None:
|
|
100
|
+
reasoning = _truncate_text(reasoning, max_reasoning_length)
|
|
101
|
+
return f'<think>\n{reasoning}\n</think>'
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _escape_html(content: str) -> str:
|
|
105
|
+
"""Escape HTML special characters and convert whitespace for display."""
|
|
106
|
+
return (
|
|
107
|
+
content.replace('&', '&')
|
|
108
|
+
.replace('<', '<')
|
|
109
|
+
.replace('>', '>')
|
|
110
|
+
.replace('\n', '<br>')
|
|
111
|
+
.replace('\t', ' ')
|
|
112
|
+
.replace(' ', ' ')
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def _is_notebook() -> bool:
|
|
117
|
+
"""Detect if running in a notebook environment."""
|
|
118
|
+
try:
|
|
119
|
+
from IPython.core.getipython import get_ipython
|
|
120
|
+
|
|
121
|
+
ipython = get_ipython()
|
|
122
|
+
return ipython is not None and 'IPKernelApp' in ipython.config
|
|
123
|
+
except (ImportError, AttributeError):
|
|
124
|
+
return False
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
# Color configurations
|
|
128
|
+
ROLE_COLORS_HTML = {
|
|
129
|
+
'system': 'red',
|
|
130
|
+
'user': 'darkorange',
|
|
131
|
+
'assistant': 'green',
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
ROLE_COLORS_TERMINAL = {
|
|
135
|
+
'system': '\033[91m', # Red
|
|
136
|
+
'user': '\033[38;5;208m', # Orange
|
|
137
|
+
'assistant': '\033[92m', # Green
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
ROLE_LABELS = {
|
|
141
|
+
'system': 'System Instruction:',
|
|
142
|
+
'user': 'User:',
|
|
143
|
+
'assistant': 'Assistant:',
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
TERMINAL_RESET = '\033[0m'
|
|
147
|
+
TERMINAL_BOLD = '\033[1m'
|
|
148
|
+
TERMINAL_GRAY = '\033[90m'
|
|
149
|
+
TERMINAL_DIM = '\033[2m' # Dim text for reasoning
|
|
150
|
+
|
|
151
|
+
# HTML colors
|
|
152
|
+
HTML_REASONING_COLOR = '#AAAAAA' # Lighter gray for better readability
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def _build_assistant_content_parts(
|
|
156
|
+
msg: dict[str, Any], max_reasoning_length: int | None
|
|
157
|
+
) -> tuple[str | None, str]:
|
|
158
|
+
"""
|
|
159
|
+
Build display content parts for assistant message.
|
|
160
|
+
|
|
161
|
+
Returns:
|
|
162
|
+
Tuple of (reasoning_formatted, answer_content)
|
|
163
|
+
reasoning_formatted is None if no reasoning present
|
|
164
|
+
"""
|
|
165
|
+
content = msg.get('content', '')
|
|
166
|
+
reasoning = msg.get('reasoning_content')
|
|
167
|
+
|
|
168
|
+
if reasoning:
|
|
169
|
+
formatted_reasoning = _format_reasoning_content(reasoning, max_reasoning_length)
|
|
170
|
+
return formatted_reasoning, content
|
|
171
|
+
|
|
172
|
+
return None, content
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def _show_chat_html(
|
|
176
|
+
messages: list[dict[str, Any]], max_reasoning_length: int | None
|
|
177
|
+
) -> None:
|
|
178
|
+
"""Display chat messages as HTML in notebook."""
|
|
179
|
+
html_parts = [
|
|
180
|
+
"<div style='font-family:monospace; line-height:1.6em; white-space:pre-wrap;'>"
|
|
181
|
+
]
|
|
182
|
+
separator = "<div style='color:#888; margin:0.5em 0;'>───────────────────────────────────────────────────</div>"
|
|
183
|
+
|
|
184
|
+
for i, msg in enumerate(messages):
|
|
185
|
+
role = msg.get('role', 'unknown').lower()
|
|
186
|
+
color = ROLE_COLORS_HTML.get(role, 'black')
|
|
187
|
+
label = ROLE_LABELS.get(role, f'{role.capitalize()}:')
|
|
188
|
+
|
|
189
|
+
if role == 'assistant':
|
|
190
|
+
reasoning, answer = _build_assistant_content_parts(
|
|
191
|
+
msg, max_reasoning_length
|
|
192
|
+
)
|
|
193
|
+
html_parts.append(
|
|
194
|
+
f"<div><strong style='color:{color}'>{label}</strong><br>"
|
|
195
|
+
)
|
|
196
|
+
if reasoning:
|
|
197
|
+
escaped_reasoning = _escape_html(reasoning)
|
|
198
|
+
html_parts.append(
|
|
199
|
+
f"<span style='color:{HTML_REASONING_COLOR}'>{escaped_reasoning}</span><br><br>"
|
|
200
|
+
)
|
|
201
|
+
if answer:
|
|
202
|
+
escaped_answer = _escape_html(answer)
|
|
203
|
+
html_parts.append(
|
|
204
|
+
f"<span style='color:{color}'>{escaped_answer}</span>"
|
|
205
|
+
)
|
|
206
|
+
html_parts.append('</div>')
|
|
207
|
+
else:
|
|
208
|
+
content = msg.get('content', '')
|
|
209
|
+
escaped_content = _escape_html(content)
|
|
210
|
+
html_parts.append(
|
|
211
|
+
f"<div style='color:{color}'><strong>{label}</strong><br>{escaped_content}</div>"
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
if i < len(messages) - 1:
|
|
215
|
+
html_parts.append(separator)
|
|
216
|
+
|
|
217
|
+
html_parts.append('</div>')
|
|
218
|
+
display(HTML(''.join(html_parts)))
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def _show_chat_terminal(
|
|
222
|
+
messages: list[dict[str, Any]], max_reasoning_length: int | None
|
|
223
|
+
) -> None:
|
|
224
|
+
"""Display chat messages with ANSI colors in terminal."""
|
|
225
|
+
separator = f'{TERMINAL_GRAY}─────────────────────────────────────────────────────────{TERMINAL_RESET}'
|
|
226
|
+
|
|
227
|
+
for i, msg in enumerate(messages):
|
|
228
|
+
role = msg.get('role', 'unknown').lower()
|
|
229
|
+
color = ROLE_COLORS_TERMINAL.get(role, '')
|
|
230
|
+
label = ROLE_LABELS.get(role, f'{role.capitalize()}:')
|
|
231
|
+
|
|
232
|
+
print(f'{color}{TERMINAL_BOLD}{label}{TERMINAL_RESET}')
|
|
233
|
+
|
|
234
|
+
if role == 'assistant':
|
|
235
|
+
reasoning, answer = _build_assistant_content_parts(
|
|
236
|
+
msg, max_reasoning_length
|
|
237
|
+
)
|
|
238
|
+
if reasoning:
|
|
239
|
+
# Use lighter gray without dim for better readability
|
|
240
|
+
print(f'\033[38;5;246m{reasoning.strip()}{TERMINAL_RESET}')
|
|
241
|
+
if answer:
|
|
242
|
+
print() # Blank line between reasoning and answer
|
|
243
|
+
if answer:
|
|
244
|
+
print(f'{color}{answer.strip()}{TERMINAL_RESET}')
|
|
245
|
+
else:
|
|
246
|
+
content = msg.get('content', '')
|
|
247
|
+
print(f'{color}{content}{TERMINAL_RESET}')
|
|
248
|
+
|
|
249
|
+
if i < len(messages) - 1:
|
|
250
|
+
print(separator)
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
def show_chat(
|
|
254
|
+
messages: list[dict[str, Any]], max_reasoning_length: int | None = 2000
|
|
255
|
+
) -> None:
|
|
256
|
+
"""
|
|
257
|
+
Display chat messages with colored formatting.
|
|
258
|
+
|
|
259
|
+
Automatically detects notebook vs terminal environment and formats accordingly.
|
|
260
|
+
Handles reasoning_content in assistant messages, formatting it with <think> tags.
|
|
261
|
+
|
|
262
|
+
Args:
|
|
263
|
+
messages: List of message dicts with 'role', 'content', and optionally 'reasoning_content'
|
|
264
|
+
max_reasoning_length: Max chars for reasoning before truncation (None = no limit)
|
|
265
|
+
|
|
266
|
+
Example:
|
|
267
|
+
>>> messages = [
|
|
268
|
+
... {"role": "system", "content": "You are helpful."},
|
|
269
|
+
... {"role": "user", "content": "Hello!"},
|
|
270
|
+
... {"role": "assistant", "content": "Hi!", "reasoning_content": "User greeted me..."},
|
|
271
|
+
... ]
|
|
272
|
+
>>> show_chat(messages)
|
|
273
|
+
"""
|
|
274
|
+
if _is_notebook():
|
|
275
|
+
_show_chat_html(messages, max_reasoning_length)
|
|
276
|
+
else:
|
|
277
|
+
_show_chat_terminal(messages, max_reasoning_length)
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
def get_conversation_one_turn(
|
|
281
|
+
system_msg: str | None = None,
|
|
282
|
+
user_msg: str | None = None,
|
|
283
|
+
assistant_msg: str | None = None,
|
|
284
|
+
assistant_prefix: str | None = None,
|
|
285
|
+
return_format: str = 'chatml',
|
|
286
|
+
) -> Any:
|
|
287
|
+
"""Build a one-turn conversation."""
|
|
288
|
+
messages: list[dict[str, str]] = []
|
|
289
|
+
|
|
290
|
+
if system_msg is not None:
|
|
291
|
+
messages.append({'role': 'system', 'content': system_msg})
|
|
292
|
+
if user_msg is not None:
|
|
293
|
+
messages.append({'role': 'user', 'content': user_msg})
|
|
294
|
+
if assistant_msg is not None:
|
|
295
|
+
messages.append({'role': 'assistant', 'content': assistant_msg})
|
|
296
|
+
|
|
297
|
+
if assistant_prefix is not None:
|
|
298
|
+
if return_format == 'chatml':
|
|
299
|
+
raise ValueError('Change return_format to "text" to use assistant_prefix')
|
|
300
|
+
if not messages or messages[-1]['role'] != 'user':
|
|
301
|
+
raise ValueError(
|
|
302
|
+
'Last message must be from user when using assistant_prefix'
|
|
303
|
+
)
|
|
304
|
+
|
|
305
|
+
from .transform import transform_messages
|
|
306
|
+
|
|
307
|
+
msg = transform_messages(messages, 'chatml', 'text', add_generation_prompt=True)
|
|
308
|
+
return str(msg) + assistant_prefix
|
|
309
|
+
|
|
310
|
+
if return_format != 'chatml':
|
|
311
|
+
raise ValueError(f'Unsupported return_format: {return_format}')
|
|
312
|
+
|
|
313
|
+
return messages
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
def highlight_diff_chars(text1: str, text2: str) -> str:
|
|
317
|
+
"""Return a string with deletions in red and additions in green."""
|
|
318
|
+
matcher = SequenceMatcher(None, text1, text2)
|
|
319
|
+
html_parts: list[str] = []
|
|
320
|
+
|
|
321
|
+
for tag, i1, i2, j1, j2 in matcher.get_opcodes():
|
|
322
|
+
if tag == 'equal':
|
|
323
|
+
html_parts.append(text1[i1:i2])
|
|
324
|
+
elif tag == 'replace':
|
|
325
|
+
if i1 != i2:
|
|
326
|
+
html_parts.append(
|
|
327
|
+
f'<span style="background-color:#ffd6d6; color:#b20000;">{text1[i1:i2]}</span>'
|
|
328
|
+
)
|
|
329
|
+
if j1 != j2:
|
|
330
|
+
html_parts.append(
|
|
331
|
+
f'<span style="background-color:#d6ffd6; color:#006600;">{text2[j1:j2]}</span>'
|
|
332
|
+
)
|
|
333
|
+
elif tag == 'delete':
|
|
334
|
+
html_parts.append(
|
|
335
|
+
f'<span style="background-color:#ffd6d6; color:#b20000;">{text1[i1:i2]}</span>'
|
|
336
|
+
)
|
|
337
|
+
elif tag == 'insert':
|
|
338
|
+
html_parts.append(
|
|
339
|
+
f'<span style="background-color:#d6ffd6; color:#006600;">{text2[j1:j2]}</span>'
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
return ''.join(html_parts)
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
def show_string_diff(old: str, new: str) -> None:
|
|
346
|
+
"""Display a visual diff between two strings (old -> new)."""
|
|
347
|
+
display(HTML(highlight_diff_chars(old, new)))
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
def display_conversations(data1: Any, data2: Any) -> None:
|
|
351
|
+
"""Display two conversations side by side. Deprecated."""
|
|
352
|
+
import warnings
|
|
353
|
+
|
|
354
|
+
warnings.warn(
|
|
355
|
+
'display_conversations is deprecated and will be removed.',
|
|
356
|
+
DeprecationWarning,
|
|
357
|
+
stacklevel=2,
|
|
358
|
+
)
|
|
359
|
+
print('=== Conversation 1 ===')
|
|
360
|
+
show_chat(data1)
|
|
361
|
+
print('\n=== Conversation 2 ===')
|
|
362
|
+
show_chat(data2)
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
def display_chat_messages_as_html(*args, **kwargs):
|
|
366
|
+
"""Deprecated alias for show_chat."""
|
|
367
|
+
import warnings
|
|
368
|
+
|
|
369
|
+
warnings.warn(
|
|
370
|
+
'display_chat_messages_as_html is deprecated, use show_chat instead.',
|
|
371
|
+
DeprecationWarning,
|
|
372
|
+
stacklevel=2,
|
|
373
|
+
)
|
|
374
|
+
return show_chat(*args, **kwargs)
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
__all__ = [
|
|
378
|
+
'show_chat',
|
|
379
|
+
'get_conversation_one_turn',
|
|
380
|
+
'highlight_diff_chars',
|
|
381
|
+
'show_string_diff',
|
|
382
|
+
'display_conversations',
|
|
383
|
+
'display_chat_messages_as_html',
|
|
384
|
+
]
|
|
@@ -9,12 +9,12 @@ from typing import Any, Dict, List, Optional, cast
|
|
|
9
9
|
|
|
10
10
|
from httpx import Timeout
|
|
11
11
|
from loguru import logger
|
|
12
|
-
from openai import AuthenticationError, BadRequestError, OpenAI, RateLimitError
|
|
12
|
+
from openai import AuthenticationError, BadRequestError, OpenAI, RateLimitError, APITimeoutError
|
|
13
13
|
from openai.types.chat import ChatCompletionMessageParam
|
|
14
14
|
from pydantic import BaseModel
|
|
15
15
|
|
|
16
|
-
from speedy_utils.common.utils_io import jdumps
|
|
17
16
|
from speedy_utils import clean_traceback
|
|
17
|
+
from speedy_utils.common.utils_io import jdumps
|
|
18
18
|
|
|
19
19
|
from .base_prompt_builder import BasePromptBuilder
|
|
20
20
|
from .mixins import (
|
|
@@ -173,34 +173,45 @@ class LLM(
|
|
|
173
173
|
)
|
|
174
174
|
# Store raw response from client
|
|
175
175
|
self.last_ai_response = completion
|
|
176
|
+
except APITimeoutError as exc:
|
|
177
|
+
error_msg = f'OpenAI API timeout ({api_kwargs['timeout']}) error: {exc} for model {model_name}'
|
|
178
|
+
logger.error(error_msg)
|
|
179
|
+
raise
|
|
176
180
|
except (AuthenticationError, RateLimitError, BadRequestError) as exc:
|
|
177
181
|
error_msg = f'OpenAI API error ({type(exc).__name__}): {exc}'
|
|
178
182
|
logger.error(error_msg)
|
|
179
183
|
raise
|
|
184
|
+
except ValueError as exc:
|
|
185
|
+
logger.error(f'ValueError during API call: {exc}')
|
|
186
|
+
raise
|
|
180
187
|
except Exception as e:
|
|
181
188
|
is_length_error = 'Length' in str(e) or 'maximum context length' in str(e)
|
|
182
189
|
if is_length_error:
|
|
183
190
|
raise ValueError(
|
|
184
191
|
f'Input too long for model {model_name}. Error: {str(e)[:100]}...'
|
|
185
192
|
) from e
|
|
186
|
-
# Re-raise all other exceptions
|
|
187
193
|
raise
|
|
188
194
|
# print(completion)
|
|
189
195
|
|
|
190
196
|
results: list[dict[str, Any]] = []
|
|
191
197
|
for choice in completion.choices:
|
|
198
|
+
assistant_message = [{'role': 'assistant', 'content': choice.message.content}]
|
|
199
|
+
try:
|
|
200
|
+
reasoning_content = choice.message.reasoning
|
|
201
|
+
except:
|
|
202
|
+
reasoning_content = None
|
|
203
|
+
if reasoning_content:
|
|
204
|
+
assistant_message[0]['reasoning_content'] = reasoning_content
|
|
205
|
+
|
|
192
206
|
choice_messages = cast(
|
|
193
207
|
Messages,
|
|
194
|
-
messages +
|
|
208
|
+
messages + assistant_message,
|
|
195
209
|
)
|
|
196
210
|
result_dict = {
|
|
197
211
|
'parsed': choice.message.content,
|
|
198
212
|
'messages': choice_messages,
|
|
199
213
|
}
|
|
200
214
|
|
|
201
|
-
# Add reasoning content if this is a reasoning model
|
|
202
|
-
if self.is_reasoning_model and hasattr(choice.message, 'reasoning_content'):
|
|
203
|
-
result_dict['reasoning_content'] = choice.message.reasoning_content
|
|
204
215
|
|
|
205
216
|
results.append(result_dict)
|
|
206
217
|
return results
|
|
@@ -394,12 +405,12 @@ class LLM(
|
|
|
394
405
|
) -> list[dict[str, Any]]:
|
|
395
406
|
"""Inspect the message history of a specific response choice."""
|
|
396
407
|
if hasattr(self, '_last_conversations'):
|
|
397
|
-
from llm_utils import
|
|
408
|
+
from llm_utils import show_chat
|
|
398
409
|
|
|
399
410
|
conv = self._last_conversations[idx]
|
|
400
411
|
if k_last_messages > 0:
|
|
401
412
|
conv = conv[-k_last_messages:]
|
|
402
|
-
return
|
|
413
|
+
return show_chat(conv)
|
|
403
414
|
raise ValueError('No message history available. Make a call first.')
|
|
404
415
|
|
|
405
416
|
def __inner_call__(
|
|
@@ -442,7 +453,7 @@ class LLM(
|
|
|
442
453
|
is_reasoning_model: bool = False,
|
|
443
454
|
lora_path: str | None = None,
|
|
444
455
|
vllm_cmd: str | None = None,
|
|
445
|
-
vllm_timeout: int =
|
|
456
|
+
vllm_timeout: int = 0.1,
|
|
446
457
|
vllm_reuse: bool = True,
|
|
447
458
|
timeout: float | Timeout | None = None,
|
|
448
459
|
**model_kwargs,
|