strands-sglang 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. strands_sglang-0.1.0/.github/ISSUE_TEMPLATE/bug_report.md +38 -0
  2. strands_sglang-0.1.0/.github/ISSUE_TEMPLATE/feature_request.md +20 -0
  3. strands_sglang-0.1.0/.github/workflows/publish.yml +50 -0
  4. strands_sglang-0.1.0/.github/workflows/test.yml +65 -0
  5. strands_sglang-0.1.0/.gitignore +221 -0
  6. strands_sglang-0.1.0/.pre-commit-config.yaml +37 -0
  7. strands_sglang-0.1.0/CHANGELOG.md +115 -0
  8. strands_sglang-0.1.0/LICENSE +201 -0
  9. strands_sglang-0.1.0/PKG-INFO +223 -0
  10. strands_sglang-0.1.0/README.md +181 -0
  11. strands_sglang-0.1.0/examples/math_agent.py +110 -0
  12. strands_sglang-0.1.0/examples/retokenization_drift/README.md +59 -0
  13. strands_sglang-0.1.0/examples/retokenization_drift/main.py +132 -0
  14. strands_sglang-0.1.0/pyproject.toml +77 -0
  15. strands_sglang-0.1.0/src/strands_sglang/__init__.py +42 -0
  16. strands_sglang-0.1.0/src/strands_sglang/client.py +278 -0
  17. strands_sglang-0.1.0/src/strands_sglang/sglang.py +477 -0
  18. strands_sglang-0.1.0/src/strands_sglang/token.py +158 -0
  19. strands_sglang-0.1.0/src/strands_sglang/tool_limiter.py +102 -0
  20. strands_sglang-0.1.0/src/strands_sglang/tool_parser.py +248 -0
  21. strands_sglang-0.1.0/tests/README.md +103 -0
  22. strands_sglang-0.1.0/tests/__init__.py +13 -0
  23. strands_sglang-0.1.0/tests/conftest.py +58 -0
  24. strands_sglang-0.1.0/tests/integration/conftest.py +81 -0
  25. strands_sglang-0.1.0/tests/integration/test_agent_math500.py +985 -0
  26. strands_sglang-0.1.0/tests/integration/test_sglang_integration.py +302 -0
  27. strands_sglang-0.1.0/tests/integration/test_tool_iteration_limiter.py +393 -0
  28. strands_sglang-0.1.0/tests/unit/test_client.py +214 -0
  29. strands_sglang-0.1.0/tests/unit/test_messages.py +377 -0
  30. strands_sglang-0.1.0/tests/unit/test_sglang.py +324 -0
  31. strands_sglang-0.1.0/tests/unit/test_token.py +310 -0
  32. strands_sglang-0.1.0/tests/unit/test_tool_parser.py +401 -0
@@ -0,0 +1,38 @@
1
+ ---
2
+ name: Bug report
3
+ about: Create a report to help us improve
4
+ title: ''
5
+ labels: ''
6
+ assignees: ''
7
+
8
+ ---
9
+
10
+ **Describe the bug**
11
+ A clear and concise description of what the bug is.
12
+
13
+ **To Reproduce**
14
+ Steps to reproduce the behavior:
15
+ 1. Go to '...'
16
+ 2. Click on '....'
17
+ 3. Scroll down to '....'
18
+ 4. See error
19
+
20
+ **Expected behavior**
21
+ A clear and concise description of what you expected to happen.
22
+
23
+ **Screenshots**
24
+ If applicable, add screenshots to help explain your problem.
25
+
26
+ **Desktop (please complete the following information):**
27
+ - OS: [e.g. iOS]
28
+ - Browser [e.g. chrome, safari]
29
+ - Version [e.g. 22]
30
+
31
+ **Smartphone (please complete the following information):**
32
+ - Device: [e.g. iPhone6]
33
+ - OS: [e.g. iOS8.1]
34
+ - Browser [e.g. stock browser, safari]
35
+ - Version [e.g. 22]
36
+
37
+ **Additional context**
38
+ Add any other context about the problem here.
@@ -0,0 +1,20 @@
1
+ ---
2
+ name: Feature request
3
+ about: Suggest an idea for this project
4
+ title: ''
5
+ labels: ''
6
+ assignees: ''
7
+
8
+ ---
9
+
10
+ **Is your feature request related to a problem? Please describe.**
11
+ A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12
+
13
+ **Describe the solution you'd like**
14
+ A clear and concise description of what you want to happen.
15
+
16
+ **Describe alternatives you've considered**
17
+ A clear and concise description of any alternative solutions or features you've considered.
18
+
19
+ **Additional context**
20
+ Add any other context or screenshots about the feature request here.
@@ -0,0 +1,50 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ release:
5
+ types: [published]
6
+
7
+ jobs:
8
+ build:
9
+ runs-on: ubuntu-latest
10
+ steps:
11
+ - uses: actions/checkout@v4
12
+
13
+ - name: Set up Python
14
+ uses: actions/setup-python@v5
15
+ with:
16
+ python-version: "3.12"
17
+
18
+ - name: Install build tools
19
+ run: pip install build twine
20
+
21
+ - name: Build package
22
+ run: python -m build
23
+
24
+ - name: Check package
25
+ run: twine check dist/*
26
+
27
+ - name: Upload build artifacts
28
+ uses: actions/upload-artifact@v4
29
+ with:
30
+ name: dist
31
+ path: dist/
32
+
33
+ publish-pypi:
34
+ needs: build
35
+ runs-on: ubuntu-latest
36
+ environment: pypi
37
+ permissions:
38
+ id-token: write # Required for trusted publishing
39
+
40
+ steps:
41
+ - name: Download build artifacts
42
+ uses: actions/download-artifact@v4
43
+ with:
44
+ name: dist
45
+ path: dist/
46
+
47
+ - name: Publish to PyPI
48
+ uses: pypa/gh-action-pypi-publish@release/v1
49
+ # Uses trusted publishing (OIDC) - no API token needed
50
+ # Configure at: https://pypi.org/manage/project/strands-sglang/settings/publishing/
@@ -0,0 +1,65 @@
1
+ name: Tests
2
+
3
+ on:
4
+ push:
5
+ branches: [main, mainline]
6
+ pull_request:
7
+ branches: [main, mainline]
8
+
9
+ jobs:
10
+ lint:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v4
14
+
15
+ - name: Set up Python
16
+ uses: actions/setup-python@v5
17
+ with:
18
+ python-version: "3.12"
19
+
20
+ - name: Install ruff
21
+ run: pip install ruff
22
+
23
+ - name: Run ruff check
24
+ run: ruff check src/
25
+
26
+ - name: Run ruff format check
27
+ run: ruff format --check src/
28
+
29
+ test:
30
+ runs-on: ubuntu-latest
31
+ strategy:
32
+ fail-fast: false
33
+ matrix:
34
+ python-version: ["3.10", "3.11", "3.12"]
35
+
36
+ steps:
37
+ - uses: actions/checkout@v4
38
+
39
+ - name: Set up Python ${{ matrix.python-version }}
40
+ uses: actions/setup-python@v5
41
+ with:
42
+ python-version: ${{ matrix.python-version }}
43
+
44
+ - name: Cache pip dependencies
45
+ uses: actions/cache@v4
46
+ with:
47
+ path: ~/.cache/pip
48
+ key: ${{ runner.os }}-pip-${{ matrix.python-version }}-${{ hashFiles('pyproject.toml') }}
49
+ restore-keys: |
50
+ ${{ runner.os }}-pip-${{ matrix.python-version }}-
51
+
52
+ - name: Install dependencies
53
+ run: |
54
+ python -m pip install --upgrade pip
55
+ pip install -e ".[dev]"
56
+
57
+ - name: Run unit tests
58
+ run: pytest tests/unit/ -v --cov=src/strands_sglang --cov-report=xml
59
+
60
+ - name: Upload coverage
61
+ uses: codecov/codecov-action@v4
62
+ if: matrix.python-version == '3.12'
63
+ with:
64
+ files: ./coverage.xml
65
+ fail_ci_if_error: false
@@ -0,0 +1,221 @@
1
+ # Created by https://www.toptal.com/developers/gitignore/api/python,jupyternotebooks
2
+ # Edit at https://www.toptal.com/developers/gitignore?templates=python,jupyternotebooks
3
+
4
+ ### JupyterNotebooks ###
5
+ # gitignore template for Jupyter Notebooks
6
+ # website: http://jupyter.org/
7
+
8
+ .ipynb_checkpoints
9
+ */.ipynb_checkpoints/*
10
+
11
+ # IPython
12
+ profile_default/
13
+ ipython_config.py
14
+
15
+ # Remove previous ipynb_checkpoints
16
+ # git rm -r .ipynb_checkpoints/
17
+
18
+ ### Python ###
19
+ # Byte-compiled / optimized / DLL files
20
+ __pycache__/
21
+ *.py[cod]
22
+ *$py.class
23
+
24
+ # C extensions
25
+ *.so
26
+
27
+ # Distribution / packaging
28
+ .Python
29
+ build/
30
+ develop-eggs/
31
+ dist/
32
+ downloads/
33
+ eggs/
34
+ .eggs/
35
+ lib/
36
+ lib64/
37
+ parts/
38
+ sdist/
39
+ var/
40
+ wheels/
41
+ share/python-wheels/
42
+ *.egg-info/
43
+ .installed.cfg
44
+ *.egg
45
+ MANIFEST
46
+
47
+ # PyInstaller
48
+ # Usually these files are written by a python script from a template
49
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
50
+ *.manifest
51
+ *.spec
52
+
53
+ # Installer logs
54
+ pip-log.txt
55
+ pip-delete-this-directory.txt
56
+
57
+ # Unit test / coverage reports
58
+ htmlcov/
59
+ .tox/
60
+ .nox/
61
+ .coverage
62
+ .coverage.*
63
+ .cache
64
+ nosetests.xml
65
+ coverage.xml
66
+ *.cover
67
+ *.py,cover
68
+ .hypothesis/
69
+ .pytest_cache/
70
+ cover/
71
+
72
+ # Translations
73
+ *.mo
74
+ *.pot
75
+
76
+ # Django stuff:
77
+ *.log
78
+ local_settings.py
79
+ db.sqlite3
80
+ db.sqlite3-journal
81
+
82
+ # Flask stuff:
83
+ instance/
84
+ .webassets-cache
85
+
86
+ # Scrapy stuff:
87
+ .scrapy
88
+
89
+ # Sphinx documentation
90
+ docs/_build/
91
+
92
+ # PyBuilder
93
+ .pybuilder/
94
+ target/
95
+
96
+ # Jupyter Notebook
97
+
98
+ # IPython
99
+
100
+ # pyenv
101
+ # For a library or package, you might want to ignore these files since the code is
102
+ # intended to run in multiple environments; otherwise, check them in:
103
+ # .python-version
104
+
105
+ # pipenv
106
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
107
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
108
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
109
+ # install all needed dependencies.
110
+ #Pipfile.lock
111
+
112
+ # poetry
113
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
114
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
115
+ # commonly ignored for libraries.
116
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
117
+ #poetry.lock
118
+
119
+ # pdm
120
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
121
+ #pdm.lock
122
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
123
+ # in version control.
124
+ # https://pdm.fming.dev/#use-with-ide
125
+ .pdm.toml
126
+
127
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
128
+ __pypackages__/
129
+
130
+ # Celery stuff
131
+ celerybeat-schedule
132
+ celerybeat.pid
133
+
134
+ # SageMath parsed files
135
+ *.sage.py
136
+
137
+ # Environments
138
+ .env
139
+ .venv
140
+ env/
141
+ venv/
142
+ ENV/
143
+ env.bak/
144
+ venv.bak/
145
+
146
+ # Spyder project settings
147
+ .spyderproject
148
+ .spyproject
149
+
150
+ # Rope project settings
151
+ .ropeproject
152
+
153
+ # mkdocs documentation
154
+ /site
155
+
156
+ # mypy
157
+ .mypy_cache/
158
+ .dmypy.json
159
+ dmypy.json
160
+
161
+ # Pyre type checker
162
+ .pyre/
163
+
164
+ # pytype static type analyzer
165
+ .pytype/
166
+
167
+ # Cython debug symbols
168
+ cython_debug/
169
+
170
+ # PyCharm
171
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
172
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
173
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
174
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
175
+ #.idea/
176
+
177
+ ### Python Patch ###
178
+ # Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
179
+ poetry.toml
180
+
181
+ # ruff
182
+ .ruff_cache/
183
+
184
+ # LSP config files
185
+ pyrightconfig.json
186
+
187
+ # End of https://www.toptal.com/developers/gitignore/api/python,jupyternotebooks
188
+
189
+ # Ignore uv lock files for now (uncomment if later UV is used)
190
+ uv.lock
191
+
192
+ # Ignore nohup.out
193
+ nohup.out
194
+
195
+ # Ignore data downloaded from AWS S3
196
+ # data*/
197
+ package_data_source/
198
+
199
+ # Ignore peru files
200
+ .hatch/
201
+ # Ignore PeruHatch local cache directory
202
+ /.hatch
203
+
204
+ # Ignore private directory
205
+ private/
206
+
207
+ # Ignore temp directory
208
+ temp/
209
+
210
+ # Ignore test notebooks
211
+ test*.ipynb
212
+ testing/
213
+
214
+ # Ignore uv.lock
215
+ uv.lock
216
+
217
+ # Ignore data folder
218
+ /data
219
+
220
+ # Ignore DS_Store
221
+ .DS_Store
@@ -0,0 +1,37 @@
1
+ repos:
2
+ - repo: https://github.com/astral-sh/ruff-pre-commit
3
+ rev: v0.6.4
4
+ hooks:
5
+ - id: ruff
6
+ args: [--fix]
7
+ files: ^(src/strands_sglang/|scripts/|dev/).*\.py$
8
+ - id: ruff-format
9
+ files: ^(src/strands_sglang/|scripts/|dev/).*\.py$
10
+
11
+ - repo: https://github.com/pre-commit/pre-commit-hooks
12
+ rev: v4.5.0
13
+ hooks:
14
+ - id: check-yaml
15
+ - id: check-case-conflict
16
+ - id: detect-private-key
17
+ - id: check-added-large-files
18
+ args: ['--maxkb=1000']
19
+ - id: requirements-txt-fixer
20
+
21
+ - repo: https://github.com/compilerla/conventional-pre-commit
22
+ rev: v3.4.0
23
+ hooks:
24
+ - id: conventional-pre-commit
25
+ stages: [commit-msg]
26
+ args:
27
+ - feat
28
+ - fix
29
+ - docs
30
+ - style
31
+ - refactor
32
+ - perf
33
+ - test
34
+ - build
35
+ - ci
36
+ - chore
37
+ - revert
@@ -0,0 +1,115 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [Unreleased]
9
+
10
+ ## [0.1.0] - 2026-01-20
11
+
12
+ First beta release. The library is now considered stable for production use in agentic RL training.
13
+
14
+ ### Added
15
+
16
+ - **Tool Parse Error Tracking**: `SGLangModel` now tracks tool call parse errors via the `tool_parse_errors` attribute. This enables distinguishing between parse errors (malformed JSON, missing tool name) and execution errors (tool threw exception) during RL training metrics collection.
17
+
18
+ ```python
19
+ model = SGLangModel(tokenizer=tokenizer)
20
+ # After generation:
21
+ print(model.tool_parse_errors) # {"tool_name": count, ...}
22
+ ```
23
+
24
+ ### Changed
25
+
26
+ - **Beta Status**: Upgraded from Alpha to Beta. The API is now stable and suitable for production RL training workloads.
27
+
28
+ ## [0.0.3] - 2026-01-08
29
+
30
+ ### Added
31
+
32
+ - **Qwen3 Hybrid Thinking Mode Support**: Added `enable_thinking` config option for Qwen3 hybrid thinking models. This is passed to `apply_chat_template` to control whether the model uses its internal reasoning mode with `<think>` tokens. Default is `None` (not passed to template) to avoid affecting non-Qwen3 models.
33
+
34
+ ```python
35
+ # For Qwen3 hybrid models: enable thinking mode
36
+ model = SGLangModel(tokenizer=tokenizer, enable_thinking=True)
37
+
38
+ # For Qwen3 hybrid models: disable thinking for faster non-reasoning tasks
39
+ model = SGLangModel(tokenizer=tokenizer, enable_thinking=False)
40
+
41
+ # For non-Qwen3 models: don't set (default None, parameter not passed)
42
+ model = SGLangModel(tokenizer=tokenizer)
43
+ ```
44
+
45
+ - **Related Projects**: Added [strands-vllm](https://github.com/agents-community/strands-vllm) to README as a community vLLM provider inspired by this project.
46
+
47
+ ### Changed
48
+
49
+ - **Simplified TokenManager API**: Removed `tokenizer` parameter from `TokenManager.__init__()` and removed the `decode()` method. The tokenizer was only used for single-token decoding which was never used in practice—all real usage calls `model.tokenizer.decode()` directly for batch decoding with options like `skip_special_tokens`.
50
+
51
+ - **Message Formatting Methods**: Converted `_format_message_content` and `format_request_messages` to `@classmethod` since they don't use instance state. This clarifies intent and allows calling without an instance.
52
+
53
+ ### Fixed
54
+
55
+ - **SLIME-Aligned Retry for Local Servers**: Changed retry behavior to match SLIME's aggressive retry philosophy for local SGLang servers during RL training:
56
+ - 400 errors are now **retried** (can be transient during weight reloading, memory pressure)
57
+ - Only truly non-retryable: 401 (auth), 403 (forbidden), 404 (not found)
58
+ - 400 with context length patterns still not retried (won't help)
59
+ - References: [OpenAI Python SDK](https://github.com/openai/openai-python) retries 408/409/429/5xx; [SLIME](https://github.com/THUDM/slime) retries ALL errors
60
+
61
+ - **Improved Context Length Detection**: Expanded patterns to detect context/prompt length errors in 400 responses:
62
+ - Now matches: "exceed", "too long", "max model len", "maximum length", "context length"
63
+ - These are converted to `ContextWindowOverflowException` (TRUNCATED, not ABORTED)
64
+ - Added logging for unexpected 400 errors to aid debugging
65
+
66
+ ## [0.0.2] - 2026-01-07
67
+
68
+ ### Added
69
+
70
+ - **`SGLangClient` Class** (`client.py`): High-level async HTTP client for SGLang server, aligned with [Slime's http_utils.py](https://github.com/THUDM/slime/blob/main/slime/utils/http_utils.py) for RL training stability:
71
+ - Connection pooling (default 1000 max connections, with matching keepalive)
72
+ - Aggressive retry: 60 attempts with 1s delay (like Slime)
73
+ - Infinite timeout by default for long generations (`timeout=None`)
74
+ - Non-streaming POST for better parallelism at scale
75
+
76
+ - **`SGLangClient.from_slime_args()` Factory Method**: Create client directly from Slime training args with auto-computed `max_connections`:
77
+
78
+ ```python
79
+ client = SGLangClient.from_slime_args(args)
80
+ model = SGLangModel(tokenizer=tokenizer, client=client)
81
+ ```
82
+
83
+ - **Slime-Aligned Retry Logic**: Aggressive retry on most errors (blacklist approach):
84
+ - Retries all 5xx server errors
85
+ - Retries 408 (Request Timeout) and 429 (Rate Limit) per OpenAI best practices
86
+ - Retries connection errors (`ConnectError`, `PoolTimeout`, `ReadTimeout`)
87
+ - Only non-retryable: permanent client errors (400, 401, 403, 404, 422, etc.)
88
+
89
+ - **Conventional Commits**: Added `commit-msg` hook via `conventional-pre-commit` to enforce [Conventional Commits](https://www.conventionalcommits.org/) format.
90
+
91
+ ### Changed
92
+
93
+ - **Default Port**: Changed from 8000 to 30000 to match SGLang's default.
94
+ - **`SGLangModel` Now Uses `SGLangClient`**: The model uses `SGLangClient` for HTTP communication, providing retry logic and better error handling.
95
+ - **Improved Error Handling**: SGLang HTTP errors now properly raise `ContextWindowOverflowException` for context length errors and `ModelThrottledException` for rate limiting (429/503).
96
+ - **BREAKING: Non-Streaming Only**: `SGLangClient.generate()` now returns `dict[str, Any]` directly instead of an `AsyncGenerator`. This provides ~20x better parallelism for RL training at scale by releasing connections immediately after response.
97
+
98
+ ### Removed
99
+
100
+ - **Streaming Support**: Removed all streaming/SSE code. Non-streaming POST is now the only mode, aligned with Slime's `http_utils.py` for optimal RL training performance. Streaming held connections open during generation, causing serialization at high concurrency.
101
+ - **`stream` Config Option**: Removed from `SGLangConfig` as streaming is no longer supported.
102
+
103
+ ### Fixed
104
+
105
+ - Default `max_new_tokens` increased for thinking models that require longer outputs.
106
+
107
+ ## [0.0.1] - 2026-01-03
108
+
109
+ ### Added
110
+
111
+ - Initial release with SGLang native `/generate` API support.
112
+ - Token-In/Token-Out (TITO) tracking via `TokenManager`.
113
+ - Hermes/Qwen tool call parsing with `HermesToolCallParser`.
114
+ - `ToolIterationLimiter` hook for clean trajectory truncation.
115
+ - Integration with Strands Agents SDK.