tiny-lfm-builtin 0.0.1__cp38-abi3-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tiny_lfm.py
ADDED
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
import os, sys
|
|
2
|
+
import tiny_lfm_builtin
|
|
3
|
+
from typing import List, Dict, Optional, Union, Generator
|
|
4
|
+
import urllib.request
|
|
5
|
+
|
|
6
|
+
# Configuration
|
|
7
|
+
MODEL_URL = "https://huggingface.co/cnmoro/LFM2-350M-Q4_0-GGUF/resolve/main/model-q4.gguf"
|
|
8
|
+
CACHE_DIR = os.path.join(os.path.expanduser("~"), ".cache", "tiny_lfm_builtin")
|
|
9
|
+
MODEL_FILENAME = "model-q4.gguf"
|
|
10
|
+
|
|
11
|
+
class TinyLFM:
|
|
12
|
+
def __init__(self):
|
|
13
|
+
"""
|
|
14
|
+
Initialize the Liquid LFM model.
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
model_path (str, optional): Path to GGUF. If None, downloads/uses the cached model.
|
|
18
|
+
"""
|
|
19
|
+
os.makedirs(CACHE_DIR, exist_ok=True)
|
|
20
|
+
model_path = os.path.join(CACHE_DIR, MODEL_FILENAME)
|
|
21
|
+
|
|
22
|
+
if not os.path.exists(model_path):
|
|
23
|
+
self._download_model(model_path)
|
|
24
|
+
|
|
25
|
+
if not os.path.exists(model_path):
|
|
26
|
+
raise FileNotFoundError(f"Model file not found at: {model_path}")
|
|
27
|
+
|
|
28
|
+
print(f"Loading LFM Engine from {model_path}...")
|
|
29
|
+
self._engine = tiny_lfm_builtin.LiquidLFM(model_path)
|
|
30
|
+
print("Engine loaded. KV Cache is active.")
|
|
31
|
+
|
|
32
|
+
def _download_model(self, dest_path: str):
|
|
33
|
+
print(f"Model not found locally.")
|
|
34
|
+
print(f"Downloading LFM2-350M (approx 200MB) to {dest_path}...")
|
|
35
|
+
|
|
36
|
+
def _progress(count, block_size, total_size):
|
|
37
|
+
percent = int(count * block_size * 100 / total_size)
|
|
38
|
+
sys.stdout.write(f"\rDownload: {percent}%")
|
|
39
|
+
sys.stdout.flush()
|
|
40
|
+
|
|
41
|
+
try:
|
|
42
|
+
urllib.request.urlretrieve(MODEL_URL, dest_path, reporthook=_progress)
|
|
43
|
+
print("\nDownload complete.")
|
|
44
|
+
except KeyboardInterrupt:
|
|
45
|
+
print("\nDownload cancelled.")
|
|
46
|
+
if os.path.exists(dest_path): os.remove(dest_path)
|
|
47
|
+
sys.exit(1)
|
|
48
|
+
except Exception as e:
|
|
49
|
+
print(f"\nError downloading model: {e}")
|
|
50
|
+
if os.path.exists(dest_path): os.remove(dest_path)
|
|
51
|
+
raise e
|
|
52
|
+
|
|
53
|
+
def chat(self,
|
|
54
|
+
messages: List[Dict[str, str]],
|
|
55
|
+
max_tokens: int = None,
|
|
56
|
+
stream: bool = True) -> Union[str, Generator[str, None, None]]:
|
|
57
|
+
"""
|
|
58
|
+
Regular chat generation. Maintains history automatically via the input list.
|
|
59
|
+
KV Caching is handled automatically by the Rust engine based on prefix matching.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
messages: List of dicts, e.g. [{"role": "user", "content": "..."}]
|
|
63
|
+
max_tokens: Maximum new tokens to generate.
|
|
64
|
+
stream: If True, returns a generator. If False, returns the full string.
|
|
65
|
+
"""
|
|
66
|
+
streamer = self._engine.generate(messages, max_tokens) if max_tokens else self._engine.generate(messages)
|
|
67
|
+
|
|
68
|
+
if stream:
|
|
69
|
+
return self._stream_wrapper(streamer)
|
|
70
|
+
else:
|
|
71
|
+
return "".join(list(streamer))
|
|
72
|
+
|
|
73
|
+
def completion(self,
|
|
74
|
+
prompt: str,
|
|
75
|
+
system_prompt: Optional[str] = None,
|
|
76
|
+
assistant_start: Optional[str] = None,
|
|
77
|
+
stop: Optional[Union[str, List[str]]] = None,
|
|
78
|
+
max_tokens: int = None,
|
|
79
|
+
stream: bool = True) -> Union[str, Generator[str, None, None]]:
|
|
80
|
+
"""
|
|
81
|
+
Raw completion with 'Prompt Hacking' capabilities.
|
|
82
|
+
Allows pre-filling the assistant's response to guide output (e.g., forcing JSON).
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
prompt: The user's input/query.
|
|
86
|
+
system_prompt: Optional system instruction.
|
|
87
|
+
assistant_start: Text to pre-fill the assistant's response with.
|
|
88
|
+
The model will continue generating from this point.
|
|
89
|
+
stop: A string or list of strings that should stop generation.
|
|
90
|
+
max_tokens: Max new tokens.
|
|
91
|
+
stream: Yield tokens as they arrive.
|
|
92
|
+
"""
|
|
93
|
+
# 1. Construct the raw prompt manually to allow template hacking
|
|
94
|
+
full_prompt = ""
|
|
95
|
+
|
|
96
|
+
if system_prompt:
|
|
97
|
+
full_prompt += f"<|im_start|>system\n{system_prompt}<|im_end|>\n"
|
|
98
|
+
|
|
99
|
+
full_prompt += f"<|im_start|>user\n{prompt}<|im_end|>\n"
|
|
100
|
+
full_prompt += "<|im_start|>assistant\n"
|
|
101
|
+
|
|
102
|
+
if assistant_start:
|
|
103
|
+
# We append the pre-fill without an EOS token, so the model continues it
|
|
104
|
+
full_prompt += assistant_start
|
|
105
|
+
|
|
106
|
+
# 2. Call Rust Engine
|
|
107
|
+
# The engine will automatically check if 'full_prompt' shares a prefix
|
|
108
|
+
# with the previous generation and reuse the KV cache.
|
|
109
|
+
streamer = self._engine.completion(full_prompt, max_tokens) if max_tokens else self._engine.completion(full_prompt)
|
|
110
|
+
|
|
111
|
+
# 3. Handle Python-side stop tokens
|
|
112
|
+
stop_sequences = []
|
|
113
|
+
if stop:
|
|
114
|
+
stop_sequences = [stop] if isinstance(stop, str) else stop
|
|
115
|
+
|
|
116
|
+
generator = self._stop_aware_iterator(streamer, stop_sequences)
|
|
117
|
+
|
|
118
|
+
if stream:
|
|
119
|
+
return generator
|
|
120
|
+
else:
|
|
121
|
+
return "".join(list(generator))
|
|
122
|
+
|
|
123
|
+
def save_cache(self, session_name: str):
|
|
124
|
+
"""Saves the current KV cache to disk."""
|
|
125
|
+
self._engine.save_session(session_name)
|
|
126
|
+
|
|
127
|
+
def load_cache(self, session_name: str):
|
|
128
|
+
"""Loads a KV cache from disk."""
|
|
129
|
+
self._engine.load_session(session_name)
|
|
130
|
+
|
|
131
|
+
def _stream_wrapper(self, rust_streamer) -> Generator[str, None, None]:
|
|
132
|
+
"""Simple wrapper to yield from Rust streamer."""
|
|
133
|
+
for token in rust_streamer:
|
|
134
|
+
yield token
|
|
135
|
+
|
|
136
|
+
def _stop_aware_iterator(self, rust_streamer, stop_sequences: List[str]) -> Generator[str, None, None]:
|
|
137
|
+
"""
|
|
138
|
+
Wraps the Rust streamer to implement custom stop sequences in Python.
|
|
139
|
+
Note: The Rust engine handles standard EOS (<|im_end|>) internally.
|
|
140
|
+
"""
|
|
141
|
+
generated_text = ""
|
|
142
|
+
|
|
143
|
+
for token in rust_streamer:
|
|
144
|
+
yield token
|
|
145
|
+
generated_text += token
|
|
146
|
+
|
|
147
|
+
# Check for stop sequences
|
|
148
|
+
if stop_sequences:
|
|
149
|
+
for seq in stop_sequences:
|
|
150
|
+
if seq in generated_text:
|
|
151
|
+
return # Stop generation immediately
|
|
152
|
+
|
|
153
|
+
if __name__ == "__main__":
|
|
154
|
+
try:
|
|
155
|
+
lfm = TinyLFM()
|
|
156
|
+
|
|
157
|
+
print("\n--- 1. Regular Chat Streaming ---")
|
|
158
|
+
history = [{"role": "user", "content": "What is 2+2?"}]
|
|
159
|
+
for token in lfm.chat(history):
|
|
160
|
+
print(token, end="", flush=True)
|
|
161
|
+
print("\n")
|
|
162
|
+
|
|
163
|
+
print("--- 2. Prompt Hacking (JSON Mode) ---")
|
|
164
|
+
# Scenario: We want to extract keywords as a JSON list.
|
|
165
|
+
|
|
166
|
+
sys_p = "You are a data extraction tool. Output only JSON."
|
|
167
|
+
user_p = "Extract keywords from: 'Liquid AI released LFM2, a powerful edge model.'"
|
|
168
|
+
pre_fill = "Sure, here are the keywords in JSON format:\n```json\n[\n"
|
|
169
|
+
|
|
170
|
+
stream = lfm.completion(
|
|
171
|
+
prompt=user_p,
|
|
172
|
+
system_prompt=sys_p,
|
|
173
|
+
assistant_start=pre_fill,
|
|
174
|
+
stop="]", # Stop when it tries to close the block
|
|
175
|
+
stream=True
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
for token in stream:
|
|
179
|
+
print(token, end="", flush=True)
|
|
180
|
+
print("\n")
|
|
181
|
+
|
|
182
|
+
except FileNotFoundError as e:
|
|
183
|
+
print(e)
|
|
184
|
+
except Exception as e:
|
|
185
|
+
print(f"An error occurred: {e}")
|
|
Binary file
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: tiny_lfm_builtin
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Classifier: Programming Language :: Rust
|
|
5
|
+
Classifier: Programming Language :: Python :: Implementation :: CPython
|
|
6
|
+
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
|
7
|
+
Classifier: Programming Language :: Python :: 3
|
|
8
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
9
|
+
Classifier: Operating System :: POSIX :: Linux
|
|
10
|
+
Classifier: Operating System :: Microsoft :: Windows
|
|
11
|
+
Classifier: Operating System :: MacOS
|
|
12
|
+
Summary: LiquidAI-LFM2-350M embedded in a python package (200mb); inference with rust; completely encapsulated
|
|
13
|
+
Author-email: Carlo Moro <cnmoro@gmail.com>
|
|
14
|
+
Requires-Python: >=3.8
|
|
15
|
+
Project-URL: Repository, https://github.com/cnmoro/tiny-lfm-builtin
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
tiny_lfm.py,sha256=O9a2Ov-Lo6jYez3WgUAITylnFVH7rad8amTsBTY9Gxs,7374
|
|
2
|
+
tiny_lfm_builtin\__init__.py,sha256=AC4g_i8_eQZutzQZXoyUQqtAaiC3zw1Z5SEuGjbF3kY,147
|
|
3
|
+
tiny_lfm_builtin\tiny_lfm_builtin.pyd,sha256=7gzLftdDhpsbofu6NZrhCLvA0FSAsABfc4ff617IlB0,10788352
|
|
4
|
+
tiny_lfm_builtin-0.0.1.dist-info\METADATA,sha256=O2ul0RlrjaJD-tFMsyQiRr6kLLDE5Yble2xt754U7oQ,725
|
|
5
|
+
tiny_lfm_builtin-0.0.1.dist-info\WHEEL,sha256=gPqN4EsdiAyGvmfrYy_ONrF276O8o0hPitI2CKZrEFA,95
|
|
6
|
+
tiny_lfm_builtin-0.0.1.dist-info\RECORD,,
|