smol-html 0.1.5__tar.gz → 0.1.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {smol_html-0.1.5 → smol_html-0.1.6}/PKG-INFO +35 -6
- {smol_html-0.1.5 → smol_html-0.1.6}/README.md +61 -32
- {smol_html-0.1.5 → smol_html-0.1.6}/pyproject.toml +1 -1
- {smol_html-0.1.5 → smol_html-0.1.6}/.gitignore +0 -0
- {smol_html-0.1.5 → smol_html-0.1.6}/LICENSE +0 -0
- {smol_html-0.1.5 → smol_html-0.1.6}/smol.png +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: smol-html
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.6
|
4
4
|
Summary: Small, dependable HTML cleaner/minifier with sensible defaults
|
5
5
|
Project-URL: Homepage, https://github.com/NosibleAI/smol-html
|
6
6
|
Project-URL: Repository, https://github.com/NosibleAI/smol-html
|
@@ -125,11 +125,17 @@ out = cleaner.make_smol(raw_html="<p>Hi</p>")
|
|
125
125
|
|
126
126
|
## Compressed Bytes Output
|
127
127
|
|
128
|
-
Produce compressed bytes using Brotli with `make_smol_bytes
|
128
|
+
Produce compressed bytes using Brotli with `make_smol_bytes`.
|
129
129
|
|
130
|
+
- By default, the compressed bytes are URL-safe Base64 encoded (`base64_encode=True`).
|
131
|
+
- If you enable Base64, you must decode before Brotli-decompressing.
|
132
|
+
- You can disable Base64 by passing `base64_encode=False` and decompress directly.
|
133
|
+
|
134
|
+
Default (Base64-encoded) output:
|
130
135
|
|
131
136
|
```python
|
132
137
|
from smol_html import SmolHtmlCleaner
|
138
|
+
import base64
|
133
139
|
import brotli # only needed if you want to decompress here in the example
|
134
140
|
|
135
141
|
html = """
|
@@ -145,15 +151,31 @@ cleaner = SmolHtmlCleaner()
|
|
145
151
|
# Get compressed bytes (quality 11 is strong compression)
|
146
152
|
compressed = cleaner.make_smol_bytes(raw_html=html, compression_level=11)
|
147
153
|
|
148
|
-
#
|
149
|
-
|
154
|
+
# Because Base64 is enabled by default, decode before decompressing
|
155
|
+
decoded = base64.urlsafe_b64decode(compressed)
|
156
|
+
decompressed = brotli.decompress(decoded).decode("utf-8")
|
150
157
|
print(decompressed)
|
151
158
|
|
152
|
-
# Or write compressed output directly to a file
|
153
|
-
with open("page.html.br", "wb") as f:
|
159
|
+
# Or write Base64-encoded compressed output directly to a file
|
160
|
+
with open("page.html.br.b64", "wb") as f:
|
154
161
|
f.write(compressed)
|
155
162
|
```
|
156
163
|
|
164
|
+
Disable Base64 and decompress directly:
|
165
|
+
|
166
|
+
```python
|
167
|
+
from smol_html import SmolHtmlCleaner
|
168
|
+
import brotli
|
169
|
+
|
170
|
+
cleaner = SmolHtmlCleaner()
|
171
|
+
compressed_raw = cleaner.make_smol_bytes(
|
172
|
+
raw_html="<p>Hi</p>",
|
173
|
+
compression_level=11,
|
174
|
+
base64_encode=False,
|
175
|
+
)
|
176
|
+
print(brotli.decompress(compressed_raw).decode("utf-8"))
|
177
|
+
```
|
178
|
+
|
157
179
|
## Parameter Reference
|
158
180
|
|
159
181
|
To improve readability, the reference is split into two tables:
|
@@ -213,3 +235,10 @@ To improve readability, the reference is split into two tables:
|
|
213
235
|
| `remove_unknown_tags` | `bool` | `True` |
|
214
236
|
| `safe_attrs_only` | `bool` | `True` |
|
215
237
|
| `safe_attrs` | `set[str]` | curated set |
|
238
|
+
|
239
|
+
### `make_smol_bytes` Options
|
240
|
+
|
241
|
+
| Parameter | Type | Default |
|
242
|
+
|---|---|---|
|
243
|
+
| `compression_level` | `int` | `4` |
|
244
|
+
| `base64_encode` | `bool` | `True` |
|
@@ -93,36 +93,58 @@ cleaner = SmolHtmlCleaner(
|
|
93
93
|
out = cleaner.make_smol(raw_html="<p>Hi</p>")
|
94
94
|
```
|
95
95
|
|
96
|
-
## Compressed Bytes Output
|
97
|
-
|
98
|
-
Produce compressed bytes using Brotli with `make_smol_bytes
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
"""
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
96
|
+
## Compressed Bytes Output
|
97
|
+
|
98
|
+
Produce compressed bytes using Brotli with `make_smol_bytes`.
|
99
|
+
|
100
|
+
- By default, the compressed bytes are URL-safe Base64 encoded (`base64_encode=True`).
|
101
|
+
- If you enable Base64, you must decode before Brotli-decompressing.
|
102
|
+
- You can disable Base64 by passing `base64_encode=False` and decompress directly.
|
103
|
+
|
104
|
+
Default (Base64-encoded) output:
|
105
|
+
|
106
|
+
```python
|
107
|
+
from smol_html import SmolHtmlCleaner
|
108
|
+
import base64
|
109
|
+
import brotli # only needed if you want to decompress here in the example
|
110
|
+
|
111
|
+
html = """
|
112
|
+
<html>
|
113
|
+
<body>
|
114
|
+
<div> Hello <span> world </span> </div>
|
115
|
+
</body>
|
116
|
+
</html>
|
117
|
+
"""
|
118
|
+
|
119
|
+
cleaner = SmolHtmlCleaner()
|
120
|
+
|
121
|
+
# Get compressed bytes (quality 11 is strong compression)
|
122
|
+
compressed = cleaner.make_smol_bytes(raw_html=html, compression_level=11)
|
123
|
+
|
124
|
+
# Because Base64 is enabled by default, decode before decompressing
|
125
|
+
decoded = base64.urlsafe_b64decode(compressed)
|
126
|
+
decompressed = brotli.decompress(decoded).decode("utf-8")
|
127
|
+
print(decompressed)
|
128
|
+
|
129
|
+
# Or write Base64-encoded compressed output directly to a file
|
130
|
+
with open("page.html.br.b64", "wb") as f:
|
131
|
+
f.write(compressed)
|
132
|
+
```
|
133
|
+
|
134
|
+
Disable Base64 and decompress directly:
|
135
|
+
|
136
|
+
```python
|
137
|
+
from smol_html import SmolHtmlCleaner
|
138
|
+
import brotli
|
139
|
+
|
140
|
+
cleaner = SmolHtmlCleaner()
|
141
|
+
compressed_raw = cleaner.make_smol_bytes(
|
142
|
+
raw_html="<p>Hi</p>",
|
143
|
+
compression_level=11,
|
144
|
+
base64_encode=False,
|
145
|
+
)
|
146
|
+
print(brotli.decompress(compressed_raw).decode("utf-8"))
|
147
|
+
```
|
126
148
|
|
127
149
|
## Parameter Reference
|
128
150
|
|
@@ -157,7 +179,7 @@ To improve readability, the reference is split into two tables:
|
|
157
179
|
| `safe_attrs_only` | Only allow attributes listed in `safe_attrs`. | Set `False` if you need to keep arbitrary attributes. |
|
158
180
|
| `safe_attrs` | Allowed HTML attributes when `safe_attrs_only=True`. | Extend to keep additional attributes you trust. |
|
159
181
|
|
160
|
-
### Types and Defaults
|
182
|
+
### Types and Defaults
|
161
183
|
|
162
184
|
| Parameter | Type | Default |
|
163
185
|
|---|---|---|
|
@@ -182,4 +204,11 @@ To improve readability, the reference is split into two tables:
|
|
182
204
|
| `kill_tags` | `set[str] | None` | `None` |
|
183
205
|
| `remove_unknown_tags` | `bool` | `True` |
|
184
206
|
| `safe_attrs_only` | `bool` | `True` |
|
185
|
-
| `safe_attrs` | `set[str]` | curated set |
|
207
|
+
| `safe_attrs` | `set[str]` | curated set |
|
208
|
+
|
209
|
+
### `make_smol_bytes` Options
|
210
|
+
|
211
|
+
| Parameter | Type | Default |
|
212
|
+
|---|---|---|
|
213
|
+
| `compression_level` | `int` | `4` |
|
214
|
+
| `base64_encode` | `bool` | `True` |
|
File without changes
|
File without changes
|
File without changes
|