thinhost 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- thinhost-0.1.0/PKG-INFO +50 -0
- thinhost-0.1.0/README.md +16 -0
- thinhost-0.1.0/pyproject.toml +3 -0
- thinhost-0.1.0/setup.cfg +4 -0
- thinhost-0.1.0/setup.py +35 -0
- thinhost-0.1.0/thinhost/__init__.py +0 -0
- thinhost-0.1.0/thinhost/llm_base_service.py +93 -0
- thinhost-0.1.0/thinhost/llm_base_service_test.py +264 -0
- thinhost-0.1.0/thinhost/proxy_service_model.py +50 -0
- thinhost-0.1.0/thinhost/simple_upload_host.py +1157 -0
- thinhost-0.1.0/thinhost/simple_upload_host_tests.py +556 -0
- thinhost-0.1.0/thinhost/simple_web_server.py +99 -0
- thinhost-0.1.0/thinhost.egg-info/PKG-INFO +50 -0
- thinhost-0.1.0/thinhost.egg-info/SOURCES.txt +15 -0
- thinhost-0.1.0/thinhost.egg-info/dependency_links.txt +1 -0
- thinhost-0.1.0/thinhost.egg-info/requires.txt +9 -0
- thinhost-0.1.0/thinhost.egg-info/top_level.txt +1 -0
thinhost-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: thinhost
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Thin HTTP hosts for Decelium microsite workflows
|
|
5
|
+
Home-page: https://github.com/Decelium/decelium_simple_host
|
|
6
|
+
Author: Justin Girard
|
|
7
|
+
Author-email: justingirard@decelium.com
|
|
8
|
+
License: MIT
|
|
9
|
+
Classifier: Development Status :: 2 - Pre-Alpha
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Requires-Python: >=3.7
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
Requires-Dist: nodejobs>=0.3.0
|
|
16
|
+
Requires-Dist: flask
|
|
17
|
+
Requires-Dist: flask-cors
|
|
18
|
+
Requires-Dist: requests
|
|
19
|
+
Requires-Dist: httpx
|
|
20
|
+
Requires-Dist: psutil
|
|
21
|
+
Requires-Dist: python-dotenv
|
|
22
|
+
Requires-Dist: urllib3
|
|
23
|
+
Requires-Dist: werkzeug
|
|
24
|
+
Dynamic: author
|
|
25
|
+
Dynamic: author-email
|
|
26
|
+
Dynamic: classifier
|
|
27
|
+
Dynamic: description
|
|
28
|
+
Dynamic: description-content-type
|
|
29
|
+
Dynamic: home-page
|
|
30
|
+
Dynamic: license
|
|
31
|
+
Dynamic: requires-dist
|
|
32
|
+
Dynamic: requires-python
|
|
33
|
+
Dynamic: summary
|
|
34
|
+
|
|
35
|
+
# thinhost
|
|
36
|
+
|
|
37
|
+
Thin HTTP hosts used in the Decelium microsite workflow.
|
|
38
|
+
|
|
39
|
+
## Install
|
|
40
|
+
pip install thinhost
|
|
41
|
+
|
|
42
|
+
## What's included
|
|
43
|
+
- Simple web server: `decelium_simple_host.simple_web_server`
|
|
44
|
+
- Upload host: `decelium_simple_host.simple_upload_host` (requires `decelium_core_data`)
|
|
45
|
+
- LLM proxy: `decelium_simple_host.llm_base_service` (requires extra deps)
|
|
46
|
+
|
|
47
|
+
## Run
|
|
48
|
+
python -m decelium_simple_host.simple_web_server
|
|
49
|
+
python -m decelium_simple_host.simple_upload_host
|
|
50
|
+
python -m decelium_simple_host.llm_base_service
|
thinhost-0.1.0/README.md
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# thinhost
|
|
2
|
+
|
|
3
|
+
Thin HTTP hosts used in the Decelium microsite workflow.
|
|
4
|
+
|
|
5
|
+
## Install
|
|
6
|
+
pip install thinhost
|
|
7
|
+
|
|
8
|
+
## What's included
|
|
9
|
+
- Simple web server: `decelium_simple_host.simple_web_server`
|
|
10
|
+
- Upload host: `decelium_simple_host.simple_upload_host` (requires `decelium_core_data`)
|
|
11
|
+
- LLM proxy: `decelium_simple_host.llm_base_service` (requires extra deps)
|
|
12
|
+
|
|
13
|
+
## Run
|
|
14
|
+
python -m decelium_simple_host.simple_web_server
|
|
15
|
+
python -m decelium_simple_host.simple_upload_host
|
|
16
|
+
python -m decelium_simple_host.llm_base_service
|
thinhost-0.1.0/setup.cfg
ADDED
thinhost-0.1.0/setup.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from setuptools import setup, find_packages
|
|
3
|
+
|
|
4
|
+
long_description = Path("README.md").read_text(encoding="utf-8")
|
|
5
|
+
|
|
6
|
+
setup(
|
|
7
|
+
name="thinhost",
|
|
8
|
+
version="0.1.0",
|
|
9
|
+
description="Thin HTTP hosts for Decelium microsite workflows",
|
|
10
|
+
long_description=long_description,
|
|
11
|
+
long_description_content_type="text/markdown",
|
|
12
|
+
author="Justin Girard",
|
|
13
|
+
author_email="justingirard@decelium.com",
|
|
14
|
+
url="https://github.com/Decelium/decelium_simple_host",
|
|
15
|
+
license="MIT",
|
|
16
|
+
classifiers=[
|
|
17
|
+
"Development Status :: 2 - Pre-Alpha",
|
|
18
|
+
"Intended Audience :: Developers",
|
|
19
|
+
"License :: OSI Approved :: MIT License",
|
|
20
|
+
"Programming Language :: Python :: 3",
|
|
21
|
+
],
|
|
22
|
+
packages=find_packages(),
|
|
23
|
+
install_requires=[
|
|
24
|
+
"nodejobs>=0.3.0",
|
|
25
|
+
"flask",
|
|
26
|
+
"flask-cors",
|
|
27
|
+
"requests",
|
|
28
|
+
"httpx",
|
|
29
|
+
"psutil",
|
|
30
|
+
"python-dotenv",
|
|
31
|
+
"urllib3",
|
|
32
|
+
"werkzeug",
|
|
33
|
+
],
|
|
34
|
+
python_requires=">=3.7",
|
|
35
|
+
)
|
|
File without changes
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
|
|
2
|
+
try:
|
|
3
|
+
from simple_upload_host import UploadService
|
|
4
|
+
except:
|
|
5
|
+
from .simple_upload_host import UploadService
|
|
6
|
+
|
|
7
|
+
import os, sys
|
|
8
|
+
import httpx
|
|
9
|
+
from flask import Response, request
|
|
10
|
+
from processing_graph.ProcessingNode import ProcessingNode
|
|
11
|
+
class LLMProxyService(UploadService):
|
|
12
|
+
|
|
13
|
+
@classmethod
|
|
14
|
+
def _after_launch_endpoints(cls, app, hostSpec):
|
|
15
|
+
hostSpec = cls.tHostSpec(hostSpec)
|
|
16
|
+
openai_key = hostSpec.get(hostSpec.f_open_ai_key, None)
|
|
17
|
+
if not openai_key:
|
|
18
|
+
raise RuntimeError('openai_api_key is not set in hostSpec')
|
|
19
|
+
|
|
20
|
+
client = httpx.Client(
|
|
21
|
+
base_url='https://api.openai.com',
|
|
22
|
+
headers={'Authorization': f'Bearer {openai_key}'}
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
def _proxy(path: str):
|
|
26
|
+
body = request.get_data()
|
|
27
|
+
payload = request.get_json(silent=True) or {}
|
|
28
|
+
|
|
29
|
+
if payload.get("stream", False):
|
|
30
|
+
def generate():
|
|
31
|
+
with client.stream(
|
|
32
|
+
"POST", path,
|
|
33
|
+
content=body,
|
|
34
|
+
headers={"Content-Type":"application/json"},
|
|
35
|
+
timeout=None
|
|
36
|
+
) as resp:
|
|
37
|
+
for chunk in resp.iter_bytes():
|
|
38
|
+
yield chunk
|
|
39
|
+
return Response(generate(), content_type="text/event-stream")
|
|
40
|
+
|
|
41
|
+
resp = client.post(path, content=body, headers={"Content-Type":"application/json"})
|
|
42
|
+
return Response(resp.content, status=resp.status_code,
|
|
43
|
+
content_type=resp.headers.get("content-type"))
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _proxy_models():
|
|
48
|
+
resp = client.get('/v1/models')
|
|
49
|
+
return Response(
|
|
50
|
+
resp.content,
|
|
51
|
+
status=resp.status_code,
|
|
52
|
+
content_type=resp.headers.get("content-type")
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
app.add_url_rule(
|
|
56
|
+
'/v1/models',
|
|
57
|
+
endpoint='proxy_models',
|
|
58
|
+
view_func=_proxy_models,
|
|
59
|
+
methods=['GET']
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
app.add_url_rule(
|
|
63
|
+
'/v1',
|
|
64
|
+
endpoint='health',
|
|
65
|
+
view_func=lambda: Response(
|
|
66
|
+
'{"status":"ok"}',
|
|
67
|
+
status=200,
|
|
68
|
+
content_type='application/json'
|
|
69
|
+
),
|
|
70
|
+
methods=['GET']
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
# register both endpoints
|
|
74
|
+
app.add_url_rule(
|
|
75
|
+
'/v1/chat/completions',
|
|
76
|
+
endpoint='proxy_chat',
|
|
77
|
+
view_func=lambda: _proxy('/v1/chat/completions'),
|
|
78
|
+
methods=['POST']
|
|
79
|
+
)
|
|
80
|
+
app.add_url_rule(
|
|
81
|
+
'/v1/completions',
|
|
82
|
+
endpoint='proxy_completions',
|
|
83
|
+
view_func=lambda: _proxy('/v1/completions'),
|
|
84
|
+
methods=['POST']
|
|
85
|
+
)
|
|
86
|
+
if __name__ == "__main__":
|
|
87
|
+
from dotenv import load_dotenv
|
|
88
|
+
load_dotenv()
|
|
89
|
+
LLMProxyService.run_cli()
|
|
90
|
+
# python llm_base_service.py launch [[server=./test_specs/hostSpec.json]] -- For a quick test
|
|
91
|
+
# python llm_base_service.py start "[[server=./test_specs/hostSpec.json]]" -- deploy job to nodejobs
|
|
92
|
+
# open-webui serve UI layer (rests on service)
|
|
93
|
+
# TODO wrap up with retry and uptime monitor
|
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
import unittest
|
|
3
|
+
import os
|
|
4
|
+
import shutil
|
|
5
|
+
import time
|
|
6
|
+
import requests
|
|
7
|
+
from dotenv import load_dotenv
|
|
8
|
+
from llm_base_service import LLMProxyService # your subclass with proxy support
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
import json
|
|
11
|
+
from typing import Any, Dict
|
|
12
|
+
from decelium_wallet.commands.BaseData import BaseData
|
|
13
|
+
|
|
14
|
+
from proxy_service_model import ProxyServiceModel
|
|
15
|
+
'''
|
|
16
|
+
some proxy_service_model content:
|
|
17
|
+
---
|
|
18
|
+
|
|
19
|
+
class ProxyServiceModel(BaseModel):
|
|
20
|
+
endpoint: str # e.g. "http://localhost:8003"
|
|
21
|
+
api_key: str
|
|
22
|
+
model_id: str
|
|
23
|
+
|
|
24
|
+
def generate(self,
|
|
25
|
+
messages: List[Dict[str, str]],
|
|
26
|
+
**kwargs: Any) -> str:
|
|
27
|
+
url = f"{self.endpoint}/v1/chat/completions"
|
|
28
|
+
assert self.endpoint != None
|
|
29
|
+
assert self.model_id != None
|
|
30
|
+
assert messages != None
|
|
31
|
+
|
|
32
|
+
headers = {
|
|
33
|
+
"Authorization": f"Bearer {self.api_key}",
|
|
34
|
+
"Content-Type": "application/json",
|
|
35
|
+
}
|
|
36
|
+
payload = {"model": self.model_id, "messages": messages, **kwargs}
|
|
37
|
+
print(f"URL OUT : {type(url)}----{url}")
|
|
38
|
+
r = requests.post(url, json=payload, headers=headers, timeout=60)
|
|
39
|
+
r.raise_for_status()
|
|
40
|
+
return r.json()["choices"][0]["message"]["content"]
|
|
41
|
+
|
|
42
|
+
def stream(self,
|
|
43
|
+
messages: List[Dict[str, str]],
|
|
44
|
+
**kwargs: Any):
|
|
45
|
+
url = f"{self.endpoint}/v1/chat/completions"
|
|
46
|
+
headers = {
|
|
47
|
+
"Authorization": f"Bearer {self.api_key}",
|
|
48
|
+
"Content-Type": "application/json",
|
|
49
|
+
}
|
|
50
|
+
payload = {"model": self.model_id,
|
|
51
|
+
"messages": messages,
|
|
52
|
+
"stream": True,
|
|
53
|
+
**kwargs}
|
|
54
|
+
r = requests.post(url, json=payload,
|
|
55
|
+
headers=headers,
|
|
56
|
+
stream=True,
|
|
57
|
+
timeout=None)
|
|
58
|
+
r.raise_for_status()
|
|
59
|
+
for line in r.iter_lines():
|
|
60
|
+
if line.startswith(b"data:"):
|
|
61
|
+
data = line.removeprefix(b"data:").strip()
|
|
62
|
+
if data == b"[DONE]":
|
|
63
|
+
break
|
|
64
|
+
yield data.decode()
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
'''
|
|
68
|
+
class LMStudioSchemaEntry(BaseData):
|
|
69
|
+
type: str
|
|
70
|
+
default: (Any, None)
|
|
71
|
+
|
|
72
|
+
class LMStudioConfigSchema(BaseData):
|
|
73
|
+
endpoint: LMStudioSchemaEntry
|
|
74
|
+
api_key: LMStudioSchemaEntry
|
|
75
|
+
model_id: LMStudioSchemaEntry
|
|
76
|
+
def get_keys(self): # Make sure all other keys are entries as well.
|
|
77
|
+
req,opt = super().get_keys()
|
|
78
|
+
req = {"*":LMStudioSchemaEntry}
|
|
79
|
+
return req,opt
|
|
80
|
+
|
|
81
|
+
class LMStudioModel(BaseData):
|
|
82
|
+
name: str
|
|
83
|
+
display_name: str
|
|
84
|
+
entrypoint: str
|
|
85
|
+
config_schema: LMStudioConfigSchema
|
|
86
|
+
|
|
87
|
+
class TestLLMProxyService(unittest.TestCase):
|
|
88
|
+
@staticmethod
|
|
89
|
+
def get_model_config():
|
|
90
|
+
|
|
91
|
+
model = LMStudioModel({
|
|
92
|
+
LMStudioModel.name: "lama-service",
|
|
93
|
+
LMStudioModel.display_name:"LamaService Proxy",
|
|
94
|
+
LMStudioModel.entrypoint: "model.py",
|
|
95
|
+
LMStudioModel.config_schema: LMStudioConfigSchema({
|
|
96
|
+
LMStudioConfigSchema.endpoint: {
|
|
97
|
+
LMStudioSchemaEntry.type: "string",
|
|
98
|
+
LMStudioSchemaEntry.default: "http://localhost:8003"
|
|
99
|
+
},
|
|
100
|
+
LMStudioConfigSchema.api_key: {
|
|
101
|
+
LMStudioSchemaEntry.type: "string",
|
|
102
|
+
LMStudioSchemaEntry.default: "<<OPENAI_API_KEY>>"
|
|
103
|
+
},
|
|
104
|
+
LMStudioConfigSchema.model_id: {
|
|
105
|
+
LMStudioSchemaEntry.type: "string",
|
|
106
|
+
LMStudioSchemaEntry.default: "gpt-3.5-turbo"
|
|
107
|
+
}
|
|
108
|
+
})
|
|
109
|
+
})
|
|
110
|
+
return model
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def test_register(self):
|
|
116
|
+
|
|
117
|
+
model = TestLLMProxyService.get_model_config()
|
|
118
|
+
|
|
119
|
+
base_dir = Path.home() / ".lmstudio" / "models" / model[LMStudioModel.name]
|
|
120
|
+
base_dir.mkdir(parents=True, exist_ok=True)
|
|
121
|
+
|
|
122
|
+
model_json = base_dir / "model.json"
|
|
123
|
+
with model_json.open("w") as f:
|
|
124
|
+
json.dump(dict(model), f, indent=2)
|
|
125
|
+
|
|
126
|
+
# verify registration
|
|
127
|
+
self.assertTrue(model_json.exists())
|
|
128
|
+
data = json.loads(model_json.read_text())
|
|
129
|
+
self.assertEqual(data.get("name"), model[LMStudioModel.name])
|
|
130
|
+
self.assertEqual(data.get("entrypoint"), model[LMStudioModel.entrypoint])
|
|
131
|
+
# from proxy_service_model import ProxyServiceModel copy tp model.py
|
|
132
|
+
src = Path(__file__).parent / "proxy_service_model.py"
|
|
133
|
+
dst = base_dir / model[LMStudioModel.entrypoint]
|
|
134
|
+
shutil.copy(src, dst)
|
|
135
|
+
self.assertTrue(dst.exists())
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def test_chat_completions_via_model(self):
|
|
139
|
+
|
|
140
|
+
# set up and start the proxy server
|
|
141
|
+
hostSpec = {
|
|
142
|
+
"protocol": "http",
|
|
143
|
+
"host": "127.0.0.1",
|
|
144
|
+
"port": "8003",
|
|
145
|
+
"url": "/do_upload",
|
|
146
|
+
"command_uri": "/cmd",
|
|
147
|
+
"upload_dir": "./tmp/upload",
|
|
148
|
+
"settings_dir": "./tmp/settings",
|
|
149
|
+
"tokens": "<<DSH_SERVER_TOKEN_LIST>>",
|
|
150
|
+
"open_ai_key": "<<OPENAI_API_KEY>>",
|
|
151
|
+
"published": []
|
|
152
|
+
}
|
|
153
|
+
hostSpec = LLMProxyService.tHostSpec(hostSpec)
|
|
154
|
+
shutil.rmtree(hostSpec["upload_dir"], ignore_errors=True)
|
|
155
|
+
shutil.rmtree(hostSpec["settings_dir"], ignore_errors=True)
|
|
156
|
+
|
|
157
|
+
LLMProxyService.run(__command=["stop"], **hostSpec)
|
|
158
|
+
time.sleep(0.2)
|
|
159
|
+
LLMProxyService.run(__command=["start"], **hostSpec)
|
|
160
|
+
time.sleep(0.5)
|
|
161
|
+
|
|
162
|
+
# build config for the ProxyServiceModel from our schema
|
|
163
|
+
model_cfg = TestLLMProxyService.get_model_config()
|
|
164
|
+
entries = dict(model_cfg.config_schema)
|
|
165
|
+
print(json.dumps(entries,indent=4))
|
|
166
|
+
#return
|
|
167
|
+
sc = LMStudioConfigSchema
|
|
168
|
+
conf = {
|
|
169
|
+
sc.endpoint: LMStudioSchemaEntry(entries[sc.endpoint]).default,
|
|
170
|
+
sc.api_key: LMStudioSchemaEntry(entries[sc.api_key]).default,
|
|
171
|
+
sc.model_id: LMStudioSchemaEntry(entries[sc.model_id]).default
|
|
172
|
+
}
|
|
173
|
+
assert conf["endpoint"]
|
|
174
|
+
assert conf["api_key"]
|
|
175
|
+
assert conf["model_id"]
|
|
176
|
+
proxy_model = ProxyServiceModel(**conf)
|
|
177
|
+
|
|
178
|
+
# non-streaming check
|
|
179
|
+
reply = proxy_model.generate([{"role": "user", "content": "Hello, world!"}])
|
|
180
|
+
self.assertIsInstance(reply, str)
|
|
181
|
+
self.assertTrue(reply.strip())
|
|
182
|
+
|
|
183
|
+
# streaming check
|
|
184
|
+
stream_iter = proxy_model.stream([{"role": "user", "content": "Hello again!"}])
|
|
185
|
+
collected = ""
|
|
186
|
+
for chunk in stream_iter:
|
|
187
|
+
try:
|
|
188
|
+
data = json.loads(chunk)
|
|
189
|
+
except json.JSONDecodeError:
|
|
190
|
+
continue
|
|
191
|
+
for choice in data.get("choices", []):
|
|
192
|
+
text = choice.get("delta", {}).get("content")
|
|
193
|
+
if text:
|
|
194
|
+
collected += text
|
|
195
|
+
|
|
196
|
+
self.assertTrue(len(collected.strip()) > 3, f"Stream did not yield enough content ->[{collected}]")
|
|
197
|
+
print(f"!!! {collected}")
|
|
198
|
+
with self.assertRaises(StopIteration):
|
|
199
|
+
next(stream_iter)
|
|
200
|
+
|
|
201
|
+
# shut down
|
|
202
|
+
LLMProxyService.run(__command=["stop"], **hostSpec)
|
|
203
|
+
|
|
204
|
+
def test_chat_completions_direct(self):
|
|
205
|
+
load_dotenv()
|
|
206
|
+
|
|
207
|
+
hostSpec = {
|
|
208
|
+
"protocol": "http",
|
|
209
|
+
"host": "127.0.0.1",
|
|
210
|
+
"port": "8003",
|
|
211
|
+
"url": "/do_upload",
|
|
212
|
+
"command_uri": "/cmd",
|
|
213
|
+
"upload_dir": "./tmp/upload",
|
|
214
|
+
"settings_dir": "./tmp/settings",
|
|
215
|
+
"tokens": "<<DSH_SERVER_TOKEN_LIST>>",
|
|
216
|
+
"open_ai_key": "<<OPENAI_API_KEY>>",
|
|
217
|
+
"published": []
|
|
218
|
+
}
|
|
219
|
+
hostSpec = LLMProxyService.tHostSpec(hostSpec)
|
|
220
|
+
# clean before start
|
|
221
|
+
shutil.rmtree(hostSpec["upload_dir"], ignore_errors=True)
|
|
222
|
+
shutil.rmtree(hostSpec["settings_dir"], ignore_errors=True)
|
|
223
|
+
|
|
224
|
+
# start server
|
|
225
|
+
LLMProxyService.run(__command=["stop"], **hostSpec)
|
|
226
|
+
time.sleep(1)
|
|
227
|
+
LLMProxyService.run(__command=["start"], **hostSpec)
|
|
228
|
+
time.sleep(0.2)
|
|
229
|
+
|
|
230
|
+
# hit chat completions proxy
|
|
231
|
+
resp = requests.post(
|
|
232
|
+
f"http://{hostSpec['host']}:{hostSpec['port']}/v1/chat/completions",
|
|
233
|
+
json={
|
|
234
|
+
"model": "gpt-3.5-turbo",
|
|
235
|
+
"messages": [{"role": "user", "content": "Hello, world!"}]
|
|
236
|
+
}
|
|
237
|
+
)
|
|
238
|
+
print(resp.json())
|
|
239
|
+
self.assertEqual(resp.status_code, 200)
|
|
240
|
+
data = resp.json()
|
|
241
|
+
self.assertIn("choices", data)
|
|
242
|
+
|
|
243
|
+
# hit models proxy
|
|
244
|
+
resp = requests.get(f"http://{hostSpec['host']}:{hostSpec['port']}/v1/models")
|
|
245
|
+
self.assertEqual(resp.status_code, 200)
|
|
246
|
+
models = resp.json()
|
|
247
|
+
self.assertIsInstance(models, dict)
|
|
248
|
+
self.assertIn("data", models)
|
|
249
|
+
self.assertIsInstance(models["data"], list)
|
|
250
|
+
|
|
251
|
+
# hit health endpoint
|
|
252
|
+
resp = requests.get(f"http://{hostSpec['host']}:{hostSpec['port']}/v1")
|
|
253
|
+
self.assertEqual(resp.status_code, 200)
|
|
254
|
+
self.assertEqual(resp.json(), {"status": "ok"})
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
if __name__ == "__main__":
|
|
258
|
+
#unittest.main()
|
|
259
|
+
from dotenv import load_dotenv
|
|
260
|
+
load_dotenv()
|
|
261
|
+
#unittest.main(defaultTest="TestLLMProxyService.test_chat_completions_via_model")
|
|
262
|
+
unittest.main(defaultTest="TestLLMProxyService.test_chat_completions_direct")
|
|
263
|
+
|
|
264
|
+
#unittest.main(defaultTest="TestLLMProxyService.test_register")
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
from lmstudio import BaseModel
|
|
2
|
+
import requests
|
|
3
|
+
from typing import List, Dict, Any
|
|
4
|
+
|
|
5
|
+
class ProxyServiceModel(BaseModel):
|
|
6
|
+
endpoint: str # e.g. "http://localhost:8003"
|
|
7
|
+
api_key: str
|
|
8
|
+
model_id: str
|
|
9
|
+
|
|
10
|
+
def generate(self,
|
|
11
|
+
messages: List[Dict[str, str]],
|
|
12
|
+
**kwargs: Any) -> str:
|
|
13
|
+
url = f"{self.endpoint}/v1/chat/completions"
|
|
14
|
+
assert self.endpoint != None
|
|
15
|
+
assert self.model_id != None
|
|
16
|
+
assert messages != None
|
|
17
|
+
|
|
18
|
+
headers = {
|
|
19
|
+
"Authorization": f"Bearer {self.api_key}",
|
|
20
|
+
"Content-Type": "application/json",
|
|
21
|
+
}
|
|
22
|
+
payload = {"model": self.model_id, "messages": messages, **kwargs}
|
|
23
|
+
print(f"URL OUT : {type(url)}----{url}")
|
|
24
|
+
r = requests.post(url, json=payload, headers=headers, timeout=60)
|
|
25
|
+
r.raise_for_status()
|
|
26
|
+
return r.json()["choices"][0]["message"]["content"]
|
|
27
|
+
|
|
28
|
+
def stream(self,
|
|
29
|
+
messages: List[Dict[str, str]],
|
|
30
|
+
**kwargs: Any):
|
|
31
|
+
url = f"{self.endpoint}/v1/chat/completions"
|
|
32
|
+
headers = {
|
|
33
|
+
"Authorization": f"Bearer {self.api_key}",
|
|
34
|
+
"Content-Type": "application/json",
|
|
35
|
+
}
|
|
36
|
+
payload = {"model": self.model_id,
|
|
37
|
+
"messages": messages,
|
|
38
|
+
"stream": True,
|
|
39
|
+
**kwargs}
|
|
40
|
+
r = requests.post(url, json=payload,
|
|
41
|
+
headers=headers,
|
|
42
|
+
stream=True,
|
|
43
|
+
timeout=None)
|
|
44
|
+
r.raise_for_status()
|
|
45
|
+
for line in r.iter_lines():
|
|
46
|
+
if line.startswith(b"data:"):
|
|
47
|
+
data = line.removeprefix(b"data:").strip()
|
|
48
|
+
if data == b"[DONE]":
|
|
49
|
+
break
|
|
50
|
+
yield data.decode()
|