tigrcorn-http 0.3.16.dev5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tigrcorn_http/etag.py ADDED
@@ -0,0 +1,133 @@
1
+ from __future__ import annotations
2
+
3
+ import hashlib
4
+ from dataclasses import dataclass
5
+
6
+
7
+ @dataclass(frozen=True, slots=True)
8
+ class EntityTag:
9
+ value: str
10
+ weak: bool = False
11
+
12
+ def to_bytes(self) -> bytes:
13
+ return format_etag(self.value, weak=self.weak)
14
+
15
+
16
+ @dataclass(frozen=True, slots=True)
17
+ class EntityTagList:
18
+ any_value: bool
19
+ items: tuple[EntityTag, ...] = ()
20
+
21
+
22
+ def _normalize_opaque_tag(value: bytes | str) -> str:
23
+ if isinstance(value, bytes):
24
+ text = value.decode('latin1')
25
+ else:
26
+ text = value
27
+ return text.replace('\\', '\\\\').replace('"', '\\"')
28
+
29
+
30
+ def format_etag(value: bytes | str, *, weak: bool = False) -> bytes:
31
+ opaque = _normalize_opaque_tag(value).encode('latin1')
32
+ prefix = b'W/' if weak else b''
33
+ return prefix + b'"' + opaque + b'"'
34
+
35
+
36
+ def generate_entity_tag(payload: bytes, *, weak: bool = False) -> bytes:
37
+ digest = hashlib.blake2s(payload, digest_size=16).hexdigest()
38
+ return format_etag(digest, weak=weak)
39
+
40
+
41
+ def parse_entity_tag(raw: bytes | str | None) -> EntityTag | None:
42
+ if raw is None:
43
+ return None
44
+ if isinstance(raw, str):
45
+ data = raw.encode('latin1')
46
+ else:
47
+ data = bytes(raw)
48
+ data = data.strip()
49
+ weak = False
50
+ if data.startswith((b'W/"', b'w/"')):
51
+ weak = True
52
+ data = data[2:]
53
+ if len(data) < 2 or data[:1] != b'"' or data[-1:] != b'"':
54
+ return None
55
+ opaque = data[1:-1].decode('latin1', 'strict')
56
+ return EntityTag(opaque, weak=weak)
57
+
58
+
59
+ def parse_entity_tag_list(raw: bytes | str | None) -> EntityTagList | None:
60
+ if raw is None:
61
+ return None
62
+ if isinstance(raw, str):
63
+ data = raw.encode('latin1')
64
+ else:
65
+ data = bytes(raw)
66
+ data = data.strip()
67
+ if not data:
68
+ return EntityTagList(any_value=False, items=())
69
+ if data == b'*':
70
+ return EntityTagList(any_value=True, items=())
71
+
72
+ items: list[EntityTag] = []
73
+ token = bytearray()
74
+ in_quotes = False
75
+ escape = False
76
+ for byte in data:
77
+ if in_quotes:
78
+ token.append(byte)
79
+ if escape:
80
+ escape = False
81
+ continue
82
+ if byte == 0x5C: # backslash
83
+ escape = True
84
+ elif byte == 0x22: # quote
85
+ in_quotes = False
86
+ continue
87
+ if byte == 0x22:
88
+ token.append(byte)
89
+ in_quotes = True
90
+ continue
91
+ if byte == 0x2C: # comma
92
+ item = parse_entity_tag(bytes(token).strip())
93
+ if item is None:
94
+ return None
95
+ items.append(item)
96
+ token.clear()
97
+ continue
98
+ token.append(byte)
99
+ if in_quotes:
100
+ return None
101
+ final = bytes(token).strip()
102
+ if final:
103
+ item = parse_entity_tag(final)
104
+ if item is None:
105
+ return None
106
+ items.append(item)
107
+ return EntityTagList(any_value=False, items=tuple(items))
108
+
109
+
110
+ def strong_compare(left: EntityTag | None, right: EntityTag | None) -> bool:
111
+ if left is None or right is None:
112
+ return False
113
+ if left.weak or right.weak:
114
+ return False
115
+ return left.value == right.value
116
+
117
+
118
+ def weak_compare(left: EntityTag | None, right: EntityTag | None) -> bool:
119
+ if left is None or right is None:
120
+ return False
121
+ return left.value == right.value
122
+
123
+
124
+ __all__ = [
125
+ 'EntityTag',
126
+ 'EntityTagList',
127
+ 'format_etag',
128
+ 'generate_entity_tag',
129
+ 'parse_entity_tag',
130
+ 'parse_entity_tag_list',
131
+ 'strong_compare',
132
+ 'weak_compare',
133
+ ]
tigrcorn_http/py.typed ADDED
@@ -0,0 +1 @@
1
+
tigrcorn_http/range.py ADDED
@@ -0,0 +1,293 @@
1
+ from __future__ import annotations
2
+
3
+ import hashlib
4
+ from dataclasses import dataclass
5
+ from pathlib import Path
6
+
7
+ from tigrcorn_asgi.send import FileBodySegment, MemoryBodySegment
8
+ from tigrcorn_http.conditional import parse_http_date
9
+ from tigrcorn_http.etag import parse_entity_tag, strong_compare
10
+ from tigrcorn_protocols.http1.serializer import response_allows_body
11
+ from tigrcorn_core.utils.headers import append_if_missing, get_header, replace_header
12
+
13
+
14
+ HeaderList = list[tuple[bytes, bytes]]
15
+
16
+
17
+ @dataclass(frozen=True, slots=True)
18
+ class ByteRange:
19
+ start: int
20
+ end: int
21
+
22
+
23
+ @dataclass(frozen=True, slots=True)
24
+ class RangeEvaluation:
25
+ status: int
26
+ headers: HeaderList
27
+ body: bytes
28
+ applied: bool = False
29
+ unsatisfied: bool = False
30
+
31
+
32
+ @dataclass(frozen=True, slots=True)
33
+ class FileRangePlan:
34
+ status: int
35
+ headers: HeaderList
36
+ body_length: int
37
+ parts: tuple[ByteRange, ...] = ()
38
+ boundary: bytes | None = None
39
+ applied: bool = False
40
+ unsatisfied: bool = False
41
+
42
+
43
+ def parse_range_header(value: bytes | str | None, *, resource_length: int) -> list[ByteRange] | None:
44
+ if value is None:
45
+ return None
46
+ raw = value.decode('latin1') if isinstance(value, bytes) else value
47
+ unit, sep, spec = raw.partition('=')
48
+ if sep != '=' or unit.strip().lower() != 'bytes':
49
+ return None
50
+ ranges: list[ByteRange] = []
51
+ for part in spec.split(','):
52
+ token = part.strip()
53
+ if not token or '-' not in token:
54
+ return None
55
+ start_raw, end_raw = token.split('-', 1)
56
+ if not start_raw:
57
+ try:
58
+ suffix_length = int(end_raw)
59
+ except ValueError:
60
+ return None
61
+ if suffix_length <= 0:
62
+ return None
63
+ if resource_length <= 0:
64
+ continue
65
+ start = max(resource_length - suffix_length, 0)
66
+ end = resource_length - 1
67
+ else:
68
+ try:
69
+ start = int(start_raw)
70
+ except ValueError:
71
+ return None
72
+ if start < 0:
73
+ return None
74
+ if not end_raw:
75
+ if start >= resource_length:
76
+ continue
77
+ end = resource_length - 1
78
+ else:
79
+ try:
80
+ end = int(end_raw)
81
+ except ValueError:
82
+ return None
83
+ if end < 0 or start > end:
84
+ return None
85
+ if start >= resource_length:
86
+ continue
87
+ end = min(end, resource_length - 1)
88
+ if start > end:
89
+ continue
90
+ ranges.append(ByteRange(start, end))
91
+ if not ranges:
92
+ return []
93
+ return ranges
94
+
95
+
96
+ def _if_range_allows_range(request_headers: list[tuple[bytes, bytes]] | tuple[tuple[bytes, bytes], ...], response_headers: HeaderList) -> bool:
97
+ if_range_raw = get_header(request_headers, b'if-range')
98
+ if if_range_raw is None:
99
+ return True
100
+ if b'"' in if_range_raw:
101
+ current = parse_entity_tag(get_header(response_headers, b'etag'))
102
+ provided = parse_entity_tag(if_range_raw)
103
+ return strong_compare(current, provided)
104
+ current_last_modified = parse_http_date(get_header(response_headers, b'last-modified'))
105
+ provided_date = parse_http_date(if_range_raw)
106
+ if current_last_modified is None or provided_date is None:
107
+ return False
108
+ return current_last_modified <= provided_date
109
+
110
+
111
+ def _multipart_boundary_for_ranges(*, total_length: int, response_headers: HeaderList) -> bytes:
112
+ seed = (get_header(response_headers, b'etag') or b'') + b':' + str(total_length).encode('ascii')
113
+ return f'tigrcorn-{hashlib.blake2s(seed, digest_size=8).hexdigest()}'.encode('ascii')
114
+
115
+
116
+ def _multipart_body(ranges: list[ByteRange], body: bytes, *, content_type: bytes | None) -> tuple[bytes, bytes]:
117
+ boundary = _multipart_boundary_for_ranges(total_length=len(body), response_headers=[(b'etag', hashlib.blake2s(body, digest_size=8).hexdigest().encode('ascii'))])
118
+ parts: list[bytes] = []
119
+ total_length = len(body)
120
+ for item in ranges:
121
+ part_headers = [b'--' + boundary]
122
+ if content_type is not None:
123
+ part_headers.append(b'Content-Type: ' + content_type)
124
+ part_headers.append(b'Content-Range: bytes ' + f'{item.start}-{item.end}/{total_length}'.encode('ascii'))
125
+ parts.append(b'\r\n'.join(part_headers) + b'\r\n\r\n' + body[item.start : item.end + 1] + b'\r\n')
126
+ parts.append(b'--' + boundary + b'--\r\n')
127
+ return boundary, b''.join(parts)
128
+
129
+
130
+ def _multipart_part_prefix(item: ByteRange, *, total_length: int, boundary: bytes, content_type: bytes | None) -> bytes:
131
+ lines = [b'--' + boundary]
132
+ if content_type is not None:
133
+ lines.append(b'Content-Type: ' + content_type)
134
+ lines.append(b'Content-Range: bytes ' + f'{item.start}-{item.end}/{total_length}'.encode('ascii'))
135
+ return b'\r\n'.join(lines) + b'\r\n\r\n'
136
+
137
+
138
+ def _multipart_total_length(
139
+ ranges: tuple[ByteRange, ...],
140
+ *,
141
+ total_length: int,
142
+ boundary: bytes,
143
+ content_type: bytes | None,
144
+ ) -> int:
145
+ size = 0
146
+ for item in ranges:
147
+ size += len(_multipart_part_prefix(item, total_length=total_length, boundary=boundary, content_type=content_type))
148
+ size += (item.end - item.start + 1)
149
+ size += 2 # trailing CRLF
150
+ size += len(b'--' + boundary + b'--\r\n')
151
+ return size
152
+
153
+
154
+ def plan_file_byte_ranges(
155
+ *,
156
+ method: str,
157
+ request_headers: list[tuple[bytes, bytes]] | tuple[tuple[bytes, bytes], ...],
158
+ response_headers: HeaderList,
159
+ resource_length: int,
160
+ status: int,
161
+ ) -> FileRangePlan:
162
+ headers = [(bytes(name).lower(), bytes(value)) for name, value in response_headers]
163
+ if method.upper() not in {'GET', 'HEAD'}:
164
+ return FileRangePlan(status=status, headers=headers, body_length=resource_length)
165
+ if status != 200 or not response_allows_body(status):
166
+ return FileRangePlan(status=status, headers=headers, body_length=resource_length)
167
+ if get_header(headers, b'content-encoding') is not None:
168
+ return FileRangePlan(status=status, headers=headers, body_length=resource_length)
169
+ append_if_missing(headers, b'accept-ranges', b'bytes')
170
+ range_header = get_header(request_headers, b'range')
171
+ if range_header is None:
172
+ return FileRangePlan(status=status, headers=headers, body_length=resource_length)
173
+ if not _if_range_allows_range(request_headers, headers):
174
+ return FileRangePlan(status=status, headers=headers, body_length=resource_length)
175
+
176
+ resolved = parse_range_header(range_header, resource_length=resource_length)
177
+ if resolved is None:
178
+ return FileRangePlan(status=status, headers=headers, body_length=resource_length)
179
+ if resolved == []:
180
+ headers = replace_header(headers, b'content-range', f'bytes */{resource_length}'.encode('ascii'))
181
+ headers = replace_header(headers, b'content-length', b'0')
182
+ return FileRangePlan(status=416, headers=headers, body_length=0, unsatisfied=True)
183
+
184
+ headers = [(name, value) for name, value in headers if name not in {b'content-range', b'content-length'}]
185
+ parts = tuple(resolved)
186
+ if len(parts) == 1:
187
+ item = parts[0]
188
+ part_length = item.end - item.start + 1
189
+ headers.append((b'content-range', f'bytes {item.start}-{item.end}/{resource_length}'.encode('ascii')))
190
+ headers.append((b'content-length', str(part_length).encode('ascii')))
191
+ return FileRangePlan(status=206, headers=headers, body_length=part_length, parts=parts, applied=True)
192
+
193
+ boundary = _multipart_boundary_for_ranges(total_length=resource_length, response_headers=headers)
194
+ original_content_type = get_header(headers, b'content-type')
195
+ headers = replace_header(headers, b'content-type', b'multipart/byteranges; boundary=' + boundary)
196
+ multipart_length = _multipart_total_length(parts, total_length=resource_length, boundary=boundary, content_type=original_content_type)
197
+ headers.append((b'content-length', str(multipart_length).encode('ascii')))
198
+ return FileRangePlan(status=206, headers=headers, body_length=multipart_length, parts=parts, boundary=boundary, applied=True)
199
+
200
+
201
+ def build_file_range_segments(
202
+ *,
203
+ path: str | Path,
204
+ plan: FileRangePlan,
205
+ total_length: int,
206
+ source_content_type: bytes | None = None,
207
+ ) -> tuple[MemoryBodySegment | FileBodySegment, ...]:
208
+ source_path = str(path)
209
+ if not plan.applied or not plan.parts:
210
+ return (FileBodySegment(source_path, 0, total_length),)
211
+ if len(plan.parts) == 1:
212
+ item = plan.parts[0]
213
+ return (FileBodySegment(source_path, item.start, item.end - item.start + 1),)
214
+ assert plan.boundary is not None
215
+ segments: list[MemoryBodySegment | FileBodySegment] = []
216
+ for item in plan.parts:
217
+ segments.append(
218
+ MemoryBodySegment(
219
+ _multipart_part_prefix(
220
+ item,
221
+ total_length=total_length,
222
+ boundary=plan.boundary,
223
+ content_type=source_content_type,
224
+ )
225
+ )
226
+ )
227
+ segments.append(FileBodySegment(source_path, item.start, item.end - item.start + 1))
228
+ segments.append(MemoryBodySegment(b'\r\n'))
229
+ segments.append(MemoryBodySegment(b'--' + plan.boundary + b'--\r\n'))
230
+ return tuple(segments)
231
+
232
+
233
+ def apply_byte_ranges(
234
+ *,
235
+ method: str,
236
+ request_headers: list[tuple[bytes, bytes]] | tuple[tuple[bytes, bytes], ...],
237
+ response_headers: HeaderList,
238
+ body: bytes,
239
+ status: int,
240
+ ) -> RangeEvaluation:
241
+ headers = [(bytes(name).lower(), bytes(value)) for name, value in response_headers]
242
+ if method.upper() not in {'GET', 'HEAD'}:
243
+ return RangeEvaluation(status=status, headers=headers, body=body)
244
+ if status != 200 or not response_allows_body(status):
245
+ return RangeEvaluation(status=status, headers=headers, body=body)
246
+ if get_header(headers, b'content-encoding') is not None:
247
+ return RangeEvaluation(status=status, headers=headers, body=body)
248
+ append_if_missing(headers, b'accept-ranges', b'bytes')
249
+ range_header = get_header(request_headers, b'range')
250
+ if range_header is None:
251
+ return RangeEvaluation(status=status, headers=headers, body=body)
252
+ if not _if_range_allows_range(request_headers, headers):
253
+ return RangeEvaluation(status=status, headers=headers, body=body)
254
+
255
+ resolved = parse_range_header(range_header, resource_length=len(body))
256
+ if resolved is None:
257
+ return RangeEvaluation(status=status, headers=headers, body=body)
258
+ if resolved == []:
259
+ headers = replace_header(headers, b'content-range', f'bytes */{len(body)}'.encode('ascii'))
260
+ headers = replace_header(headers, b'content-length', b'0')
261
+ return RangeEvaluation(status=416, headers=headers, body=b'', unsatisfied=True)
262
+
263
+ headers = [(name, value) for name, value in headers if name not in {b'content-range', b'content-length'}]
264
+ if len(resolved) == 1:
265
+ item = resolved[0]
266
+ partial = body[item.start : item.end + 1]
267
+ headers.append((b'content-range', f'bytes {item.start}-{item.end}/{len(body)}'.encode('ascii')))
268
+ headers.append((b'content-length', str(len(partial)).encode('ascii')))
269
+ return RangeEvaluation(status=206, headers=headers, body=partial, applied=True)
270
+
271
+ boundary = _multipart_boundary_for_ranges(total_length=len(body), response_headers=headers)
272
+ parts: list[bytes] = []
273
+ content_type = get_header(headers, b'content-type')
274
+ for item in resolved:
275
+ parts.append(_multipart_part_prefix(item, total_length=len(body), boundary=boundary, content_type=content_type))
276
+ parts.append(body[item.start : item.end + 1])
277
+ parts.append(b'\r\n')
278
+ parts.append(b'--' + boundary + b'--\r\n')
279
+ multipart = b''.join(parts)
280
+ headers = replace_header(headers, b'content-type', b'multipart/byteranges; boundary=' + boundary)
281
+ headers.append((b'content-length', str(len(multipart)).encode('ascii')))
282
+ return RangeEvaluation(status=206, headers=headers, body=multipart, applied=True)
283
+
284
+
285
+ __all__ = [
286
+ 'ByteRange',
287
+ 'FileRangePlan',
288
+ 'build_file_range_segments',
289
+ 'RangeEvaluation',
290
+ 'apply_byte_ranges',
291
+ 'parse_range_header',
292
+ 'plan_file_byte_ranges',
293
+ ]