streamlit-nightly 1.35.1.dev20240523__py2.py3-none-any.whl → 1.35.1.dev20240525__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (23) hide show
  1. streamlit/__init__.py +4 -5
  2. streamlit/config.py +1 -15
  3. streamlit/elements/dialog_decorator.py +5 -3
  4. streamlit/elements/utils.py +3 -6
  5. streamlit/runtime/app_session.py +1 -2
  6. streamlit/runtime/caching/__init__.py +5 -0
  7. streamlit/runtime/caching/legacy_cache_api.py +164 -0
  8. streamlit/runtime/fragment.py +91 -80
  9. streamlit/runtime/runtime.py +0 -2
  10. streamlit/static/asset-manifest.json +2 -2
  11. streamlit/static/index.html +1 -1
  12. streamlit/static/static/js/{main.7e42f54d.js → main.e93f99a3.js} +2 -2
  13. streamlit/web/cli.py +1 -8
  14. {streamlit_nightly-1.35.1.dev20240523.dist-info → streamlit_nightly-1.35.1.dev20240525.dist-info}/METADATA +1 -1
  15. {streamlit_nightly-1.35.1.dev20240523.dist-info → streamlit_nightly-1.35.1.dev20240525.dist-info}/RECORD +20 -22
  16. streamlit/runtime/legacy_caching/__init__.py +0 -17
  17. streamlit/runtime/legacy_caching/caching.py +0 -810
  18. streamlit/runtime/legacy_caching/hashing.py +0 -1005
  19. /streamlit/static/static/js/{main.7e42f54d.js.LICENSE.txt → main.e93f99a3.js.LICENSE.txt} +0 -0
  20. {streamlit_nightly-1.35.1.dev20240523.data → streamlit_nightly-1.35.1.dev20240525.data}/scripts/streamlit.cmd +0 -0
  21. {streamlit_nightly-1.35.1.dev20240523.dist-info → streamlit_nightly-1.35.1.dev20240525.dist-info}/WHEEL +0 -0
  22. {streamlit_nightly-1.35.1.dev20240523.dist-info → streamlit_nightly-1.35.1.dev20240525.dist-info}/entry_points.txt +0 -0
  23. {streamlit_nightly-1.35.1.dev20240523.dist-info → streamlit_nightly-1.35.1.dev20240525.dist-info}/top_level.txt +0 -0
@@ -1,1005 +0,0 @@
1
- # Copyright (c) Streamlit Inc. (2018-2022) Snowflake Inc. (2022-2024)
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
-
15
- """A hashing utility for code."""
16
-
17
- from __future__ import annotations
18
-
19
- import collections
20
- import enum
21
- import functools
22
- import hashlib
23
- import inspect
24
- import io
25
- import os
26
- import pickle
27
- import sys
28
- import tempfile
29
- import textwrap
30
- import threading
31
- import weakref
32
- from typing import Any, Callable, Dict, Pattern, Type, Union
33
-
34
- from streamlit import config, file_util, type_util, util
35
- from streamlit.errors import MarkdownFormattedException, StreamlitAPIException
36
- from streamlit.folder_black_list import FolderBlackList
37
- from streamlit.runtime.uploaded_file_manager import UploadedFile
38
- from streamlit.util import HASHLIB_KWARGS
39
-
40
- # If a dataframe has more than this many rows, we consider it large and hash a sample.
41
- _PANDAS_ROWS_LARGE = 100000
42
- _PANDAS_SAMPLE_SIZE = 10000
43
-
44
-
45
- # Similar to dataframes, we also sample large numpy arrays.
46
- _NP_SIZE_LARGE = 1000000
47
- _NP_SAMPLE_SIZE = 100000
48
-
49
-
50
- # Arbitrary item to denote where we found a cycle in a hashed object.
51
- # This allows us to hash self-referencing lists, dictionaries, etc.
52
- _CYCLE_PLACEHOLDER = b"streamlit-57R34ML17-hesamagicalponyflyingthroughthesky-CYCLE"
53
-
54
-
55
- # This needs to be initialized lazily to avoid calling config.get_option() and
56
- # thus initializing config options when this file is first imported.
57
- _FOLDER_BLACK_LIST = None
58
-
59
-
60
- # FFI objects (objects that interface with C libraries) can be any of these types:
61
- _FFI_TYPE_NAMES = [
62
- "_cffi_backend.FFI",
63
- "builtins.CompiledFFI",
64
- ]
65
-
66
- # KERAS objects can be any of these types:
67
- _KERAS_TYPE_NAMES = [
68
- "keras.engine.training.Model",
69
- "tensorflow.python.keras.engine.training.Model",
70
- "tensorflow.python.keras.engine.functional.Functional",
71
- ]
72
-
73
-
74
- Context = collections.namedtuple("Context", ["globals", "cells", "varnames"])
75
-
76
-
77
- # Mapping of types or fully qualified names to hash functions. This is used to
78
- # override the behavior of the hasher inside Streamlit's caching mechanism:
79
- # when the hasher encounters an object, it will first check to see if its type
80
- # matches a key in this dict and, if so, will use the provided function to
81
- # generate a hash for it.
82
- HashFuncsDict = Dict[Union[str, Type[Any]], Callable[[Any], Any]]
83
-
84
-
85
- class HashReason(enum.Enum):
86
- CACHING_FUNC_ARGS = 0
87
- CACHING_FUNC_BODY = 1
88
- CACHING_FUNC_OUTPUT = 2
89
- CACHING_BLOCK = 3
90
-
91
-
92
- def update_hash(
93
- val: Any,
94
- hasher,
95
- hash_reason: HashReason,
96
- hash_source: Callable[..., Any],
97
- context: Context | None = None,
98
- hash_funcs: HashFuncsDict | None = None,
99
- ) -> None:
100
- """Updates a hashlib hasher with the hash of val.
101
-
102
- This is the main entrypoint to hashing.py.
103
- """
104
- hash_stacks.current.hash_reason = hash_reason
105
- hash_stacks.current.hash_source = hash_source
106
-
107
- ch = _CodeHasher(hash_funcs)
108
- ch.update(hasher, val, context)
109
-
110
-
111
- class _HashStack:
112
- """Stack of what has been hashed, for debug and circular reference detection.
113
-
114
- This internally keeps 1 stack per thread.
115
-
116
- Internally, this stores the ID of pushed objects rather than the objects
117
- themselves because otherwise the "in" operator inside __contains__ would
118
- fail for objects that don't return a boolean for "==" operator. For
119
- example, arr == 10 where arr is a NumPy array returns another NumPy array.
120
- This causes the "in" to crash since it expects a boolean.
121
- """
122
-
123
- def __init__(self):
124
- self._stack: collections.OrderedDict[int, list[Any]] = collections.OrderedDict()
125
-
126
- # The reason why we're doing this hashing, for debug purposes.
127
- self.hash_reason: HashReason | None = None
128
-
129
- # Either a function or a code block, depending on whether the reason is
130
- # due to hashing part of a function (i.e. body, args, output) or an
131
- # st.Cache codeblock.
132
- self.hash_source: Callable[..., Any] | None = None
133
-
134
- def __repr__(self) -> str:
135
- return util.repr_(self)
136
-
137
- def push(self, val: Any):
138
- self._stack[id(val)] = val
139
-
140
- def pop(self):
141
- self._stack.popitem()
142
-
143
- def __contains__(self, val: Any):
144
- return id(val) in self._stack
145
-
146
- def pretty_print(self):
147
- def to_str(v):
148
- try:
149
- return "Object of type {}: {}".format(type_util.get_fqn_type(v), str(v))
150
- except Exception:
151
- return "<Unable to convert item to string>"
152
-
153
- # IDEA: Maybe we should remove our internal "hash_funcs" from the
154
- # stack. I'm not removing those now because even though those aren't
155
- # useful to users I think they might be useful when we're debugging an
156
- # issue sent by a user. So let's wait a few months and see if they're
157
- # indeed useful...
158
- return "\n".join(to_str(x) for x in reversed(self._stack.values()))
159
-
160
-
161
- class _HashStacks:
162
- """Stacks of what has been hashed, with at most 1 stack per thread."""
163
-
164
- def __init__(self):
165
- self._stacks: weakref.WeakKeyDictionary[
166
- threading.Thread, _HashStack
167
- ] = weakref.WeakKeyDictionary()
168
-
169
- def __repr__(self) -> str:
170
- return util.repr_(self)
171
-
172
- @property
173
- def current(self) -> _HashStack:
174
- current_thread = threading.current_thread()
175
-
176
- stack = self._stacks.get(current_thread, None)
177
-
178
- if stack is None:
179
- stack = _HashStack()
180
- self._stacks[current_thread] = stack
181
-
182
- return stack
183
-
184
-
185
- hash_stacks = _HashStacks()
186
-
187
-
188
- class _Cells:
189
- """
190
- Class which is basically a dict that allows us to push/pop frames of data.
191
-
192
- Python code objects are nested. In the following function:
193
-
194
- @st.cache()
195
- def func():
196
- production = [[x + y for x in range(3)] for y in range(5)]
197
- return production
198
-
199
- func.__code__ is a code object, and contains (inside
200
- func.__code__.co_consts) additional code objects for the list
201
- comprehensions. Those objects have their own co_freevars and co_cellvars.
202
-
203
- What we need to do as we're traversing this "tree" of code objects is to
204
- save each code object's vars, hash it, and then restore the original vars.
205
- """
206
-
207
- _cell_delete_obj = object()
208
-
209
- def __init__(self):
210
- self.values = {}
211
- self.stack = []
212
- self.frames = []
213
-
214
- def __repr__(self) -> str:
215
- return util.repr_(self)
216
-
217
- def _set(self, key, value):
218
- """
219
- Sets a value and saves the old value so it can be restored when
220
- we pop the frame. A sentinel object, _cell_delete_obj, indicates that
221
- the key was previously empty and should just be deleted.
222
- """
223
-
224
- # save the old value (or mark that it didn't exist)
225
- self.stack.append((key, self.values.get(key, self._cell_delete_obj)))
226
-
227
- # write the new value
228
- self.values[key] = value
229
-
230
- def pop(self):
231
- """Pop off the last frame we created, and restore all the old values."""
232
-
233
- idx = self.frames.pop()
234
- for key, val in self.stack[idx:]:
235
- if val is self._cell_delete_obj:
236
- del self.values[key]
237
- else:
238
- self.values[key] = val
239
- self.stack = self.stack[:idx]
240
-
241
- def push(self, code, func=None):
242
- """Create a new frame, and save all of `code`'s vars into it."""
243
-
244
- self.frames.append(len(self.stack))
245
-
246
- for var in code.co_cellvars:
247
- self._set(var, var)
248
-
249
- if code.co_freevars:
250
- if func is not None:
251
- assert len(code.co_freevars) == len(func.__closure__)
252
- for var, cell in zip(code.co_freevars, func.__closure__):
253
- self._set(var, cell.cell_contents)
254
- else:
255
- # List comprehension code objects also have freevars, but they
256
- # don't have a surrounding closure. In these cases we just use the name.
257
- for var in code.co_freevars:
258
- self._set(var, var)
259
-
260
-
261
- def _get_context(func) -> Context:
262
- varnames = {}
263
- if inspect.ismethod(func):
264
- varnames = {"self": func.__self__}
265
-
266
- return Context(globals=func.__globals__, cells=_Cells(), varnames=varnames)
267
-
268
-
269
- def _int_to_bytes(i: int) -> bytes:
270
- num_bytes = (i.bit_length() + 8) // 8
271
- return i.to_bytes(num_bytes, "little", signed=True)
272
-
273
-
274
- def _key(obj: Any | None) -> Any:
275
- """Return key for memoization."""
276
-
277
- if obj is None:
278
- return None
279
-
280
- def is_simple(obj):
281
- return (
282
- isinstance(obj, bytes)
283
- or isinstance(obj, bytearray)
284
- or isinstance(obj, str)
285
- or isinstance(obj, float)
286
- or isinstance(obj, int)
287
- or isinstance(obj, bool)
288
- or obj is None
289
- )
290
-
291
- if is_simple(obj):
292
- return obj
293
-
294
- if isinstance(obj, tuple):
295
- if all(map(is_simple, obj)):
296
- return obj
297
-
298
- if isinstance(obj, list):
299
- if all(map(is_simple, obj)):
300
- return ("__l", tuple(obj))
301
-
302
- if (
303
- type_util.is_type(obj, "pandas.core.frame.DataFrame")
304
- or type_util.is_type(obj, "numpy.ndarray")
305
- or inspect.isbuiltin(obj)
306
- or inspect.isroutine(obj)
307
- or inspect.iscode(obj)
308
- ):
309
- return id(obj)
310
-
311
- return NoResult
312
-
313
-
314
- class _CodeHasher:
315
- """A hasher that can hash code objects including dependencies."""
316
-
317
- def __init__(self, hash_funcs: HashFuncsDict | None = None):
318
- # Can't use types as the keys in the internal _hash_funcs because
319
- # we always remove user-written modules from memory when rerunning a
320
- # script in order to reload it and grab the latest code changes.
321
- # (See LocalSourcesWatcher.py:on_file_changed) This causes
322
- # the type object to refer to different underlying class instances each run,
323
- # so type-based comparisons fail. To solve this, we use the types converted
324
- # to fully-qualified strings as keys in our internal dict.
325
- self._hash_funcs: HashFuncsDict
326
- if hash_funcs:
327
- self._hash_funcs = {
328
- k if isinstance(k, str) else type_util.get_fqn(k): v
329
- for k, v in hash_funcs.items()
330
- }
331
- else:
332
- self._hash_funcs = {}
333
-
334
- self._hashes: dict[Any, bytes] = {}
335
-
336
- # The number of the bytes in the hash.
337
- self.size = 0
338
-
339
- def __repr__(self) -> str:
340
- return util.repr_(self)
341
-
342
- def to_bytes(self, obj: Any, context: Context | None = None) -> bytes:
343
- """Add memoization to _to_bytes and protect against cycles in data structures."""
344
- tname = type(obj).__qualname__.encode()
345
- key = (tname, _key(obj))
346
-
347
- # Memoize if possible.
348
- if key[1] is not NoResult:
349
- if key in self._hashes:
350
- return self._hashes[key]
351
-
352
- # Break recursive cycles.
353
- if obj in hash_stacks.current:
354
- return _CYCLE_PLACEHOLDER
355
-
356
- hash_stacks.current.push(obj)
357
-
358
- try:
359
- # Hash the input
360
- b = b"%s:%s" % (tname, self._to_bytes(obj, context))
361
-
362
- # Hmmm... It's possible that the size calculation is wrong. When we
363
- # call to_bytes inside _to_bytes things get double-counted.
364
- self.size += sys.getsizeof(b)
365
-
366
- if key[1] is not NoResult:
367
- self._hashes[key] = b
368
-
369
- except (UnhashableTypeError, UserHashError, InternalHashError):
370
- # Re-raise exceptions we hand-raise internally.
371
- raise
372
-
373
- except Exception as ex:
374
- raise InternalHashError(ex, obj)
375
-
376
- finally:
377
- # In case an UnhashableTypeError (or other) error is thrown, clean up the
378
- # stack so we don't get false positives in future hashing calls
379
- hash_stacks.current.pop()
380
-
381
- return b
382
-
383
- def update(self, hasher, obj: Any, context: Context | None = None) -> None:
384
- """Update the provided hasher with the hash of an object."""
385
- b = self.to_bytes(obj, context)
386
- hasher.update(b)
387
-
388
- def _file_should_be_hashed(self, filename: str) -> bool:
389
- global _FOLDER_BLACK_LIST
390
-
391
- if not _FOLDER_BLACK_LIST:
392
- _FOLDER_BLACK_LIST = FolderBlackList(
393
- config.get_option("server.folderWatchBlacklist")
394
- )
395
-
396
- filepath = os.path.abspath(filename)
397
- file_is_blacklisted = _FOLDER_BLACK_LIST.is_blacklisted(filepath)
398
- # Short circuiting for performance.
399
- if file_is_blacklisted:
400
- return False
401
- return file_util.file_is_in_folder_glob(
402
- filepath, self._get_main_script_directory()
403
- ) or file_util.file_in_pythonpath(filepath)
404
-
405
- def _to_bytes(self, obj: Any, context: Context | None) -> bytes:
406
- """Hash objects to bytes, including code with dependencies.
407
-
408
- Python's built in `hash` does not produce consistent results across
409
- runs.
410
- """
411
-
412
- h = hashlib.new("md5", **HASHLIB_KWARGS)
413
-
414
- if type_util.is_type(obj, "unittest.mock.Mock") or type_util.is_type(
415
- obj, "unittest.mock.MagicMock"
416
- ):
417
- # Mock objects can appear to be infinitely
418
- # deep, so we don't try to hash them at all.
419
- return self.to_bytes(id(obj))
420
-
421
- elif isinstance(obj, bytes) or isinstance(obj, bytearray):
422
- return obj
423
-
424
- elif type_util.get_fqn_type(obj) in self._hash_funcs:
425
- # Escape hatch for unsupported objects
426
- hash_func = self._hash_funcs[type_util.get_fqn_type(obj)]
427
- try:
428
- output = hash_func(obj)
429
- except Exception as ex:
430
- raise UserHashError(ex, obj, hash_func=hash_func)
431
-
432
- return self.to_bytes(output)
433
-
434
- elif isinstance(obj, str):
435
- return obj.encode()
436
-
437
- elif isinstance(obj, float):
438
- return self.to_bytes(hash(obj))
439
-
440
- elif isinstance(obj, int):
441
- return _int_to_bytes(obj)
442
-
443
- elif isinstance(obj, (list, tuple)):
444
- for item in obj:
445
- self.update(h, item, context)
446
- return h.digest()
447
-
448
- elif isinstance(obj, dict):
449
- for item in obj.items():
450
- self.update(h, item, context)
451
- return h.digest()
452
-
453
- elif obj is None:
454
- return b"0"
455
-
456
- elif obj is True:
457
- return b"1"
458
-
459
- elif obj is False:
460
- return b"0"
461
-
462
- elif type_util.is_type(obj, "pandas.core.frame.DataFrame") or type_util.is_type(
463
- obj, "pandas.core.series.Series"
464
- ):
465
- import pandas as pd
466
-
467
- if len(obj) >= _PANDAS_ROWS_LARGE:
468
- obj = obj.sample(n=_PANDAS_SAMPLE_SIZE, random_state=0)
469
- try:
470
- return b"%s" % pd.util.hash_pandas_object(obj).sum()
471
- except TypeError:
472
- # Use pickle if pandas cannot hash the object for example if
473
- # it contains unhashable objects.
474
- return b"%s" % pickle.dumps(obj, pickle.HIGHEST_PROTOCOL)
475
-
476
- elif type_util.is_type(obj, "numpy.ndarray"):
477
- self.update(h, obj.shape)
478
-
479
- if obj.size >= _NP_SIZE_LARGE:
480
- import numpy as np
481
-
482
- state = np.random.RandomState(0)
483
- obj = state.choice(obj.flat, size=_NP_SAMPLE_SIZE)
484
-
485
- self.update(h, obj.tobytes())
486
- return h.digest()
487
-
488
- elif inspect.isbuiltin(obj):
489
- return bytes(obj.__name__.encode())
490
-
491
- elif any(type_util.is_type(obj, typename) for typename in _FFI_TYPE_NAMES):
492
- return self.to_bytes(None)
493
-
494
- elif type_util.is_type(obj, "builtins.mappingproxy") or type_util.is_type(
495
- obj, "builtins.dict_items"
496
- ):
497
- return self.to_bytes(dict(obj))
498
-
499
- elif type_util.is_type(obj, "builtins.getset_descriptor"):
500
- return bytes(obj.__qualname__.encode())
501
-
502
- elif isinstance(obj, UploadedFile):
503
- # UploadedFile is a BytesIO (thus IOBase) but has a name.
504
- # It does not have a timestamp so this must come before
505
- # temporary files
506
- h = hashlib.new("md5", **HASHLIB_KWARGS)
507
- self.update(h, obj.name)
508
- self.update(h, obj.tell())
509
- self.update(h, obj.getvalue())
510
- return h.digest()
511
-
512
- elif hasattr(obj, "name") and (
513
- isinstance(obj, io.IOBase)
514
- # Handle temporary files used during testing
515
- or isinstance(obj, tempfile._TemporaryFileWrapper)
516
- ):
517
- # Hash files as name + last modification date + offset.
518
- # NB: we're using hasattr("name") to differentiate between
519
- # on-disk and in-memory StringIO/BytesIO file representations.
520
- # That means that this condition must come *before* the next
521
- # condition, which just checks for StringIO/BytesIO.
522
- obj_name = getattr(obj, "name", "wonthappen") # Just to appease MyPy.
523
- self.update(h, obj_name)
524
- self.update(h, os.path.getmtime(obj_name))
525
- self.update(h, obj.tell())
526
- return h.digest()
527
-
528
- elif isinstance(obj, Pattern):
529
- return self.to_bytes([obj.pattern, obj.flags])
530
-
531
- elif isinstance(obj, io.StringIO) or isinstance(obj, io.BytesIO):
532
- # Hash in-memory StringIO/BytesIO by their full contents
533
- # and seek position.
534
- self.update(h, obj.tell())
535
- self.update(h, obj.getvalue())
536
- return h.digest()
537
-
538
- elif any(
539
- type_util.get_fqn(x) == "sqlalchemy.pool.base.Pool"
540
- for x in type(obj).__bases__
541
- ):
542
- # Get connect_args from the closure of the creator function. It includes
543
- # arguments parsed from the URL and those passed in via `connect_args`.
544
- # However if a custom `creator` function is passed in then we don't
545
- # expect to get this data.
546
- cargs = obj._creator.__closure__
547
- cargs = [cargs[0].cell_contents, cargs[1].cell_contents] if cargs else None
548
-
549
- # Sort kwargs since hashing dicts is sensitive to key order
550
- if cargs:
551
- cargs[1] = dict(
552
- collections.OrderedDict(
553
- sorted(cargs[1].items(), key=lambda t: t[0])
554
- )
555
- )
556
-
557
- reduce_data = obj.__reduce__()
558
-
559
- # Remove thread related objects
560
- for attr in [
561
- "_overflow_lock",
562
- "_pool",
563
- "_conn",
564
- "_fairy",
565
- "_threadconns",
566
- "logger",
567
- ]:
568
- reduce_data[2].pop(attr, None)
569
-
570
- return self.to_bytes([reduce_data, cargs])
571
-
572
- elif type_util.is_type(obj, "sqlalchemy.engine.base.Engine"):
573
- # Remove the url because it's overwritten by creator and connect_args
574
- reduce_data = obj.__reduce__()
575
- reduce_data[2].pop("url", None)
576
- reduce_data[2].pop("logger", None)
577
-
578
- return self.to_bytes(reduce_data)
579
-
580
- elif type_util.is_type(obj, "numpy.ufunc"):
581
- # For numpy.remainder, this returns remainder.
582
- return bytes(obj.__name__.encode())
583
-
584
- elif type_util.is_type(obj, "socket.socket"):
585
- return self.to_bytes(id(obj))
586
-
587
- elif any(
588
- type_util.get_fqn(x) == "torch.nn.modules.module.Module"
589
- for x in type(obj).__bases__
590
- ):
591
- return self.to_bytes(id(obj))
592
-
593
- elif type_util.is_type(obj, "tensorflow.python.client.session.Session"):
594
- return self.to_bytes(id(obj))
595
-
596
- elif type_util.is_type(obj, "torch.Tensor") or type_util.is_type(
597
- obj, "torch._C._TensorBase"
598
- ):
599
- return self.to_bytes([obj.detach().numpy(), obj.grad])
600
-
601
- elif any(type_util.is_type(obj, typename) for typename in _KERAS_TYPE_NAMES):
602
- return self.to_bytes(id(obj))
603
-
604
- elif type_util.is_type(
605
- obj,
606
- "tensorflow.python.saved_model.load.Loader._recreate_base_user_object.<locals>._UserObject",
607
- ):
608
- return self.to_bytes(id(obj))
609
-
610
- elif inspect.isroutine(obj):
611
- wrapped = getattr(obj, "__wrapped__", None)
612
- if wrapped is not None:
613
- # Ignore the wrapper of wrapped functions.
614
- return self.to_bytes(wrapped)
615
-
616
- if obj.__module__.startswith("streamlit"):
617
- # Ignore streamlit modules even if they are in the CWD
618
- # (e.g. during development).
619
- return self.to_bytes("{}.{}".format(obj.__module__, obj.__name__))
620
-
621
- code = getattr(obj, "__code__", None)
622
- assert code is not None
623
- if self._file_should_be_hashed(code.co_filename):
624
- context = _get_context(obj)
625
- defaults = getattr(obj, "__defaults__", None)
626
- if defaults is not None:
627
- self.update(h, defaults, context)
628
- h.update(self._code_to_bytes(code, context, func=obj))
629
- else:
630
- # Don't hash code that is not in the current working directory.
631
- self.update(h, obj.__module__)
632
- self.update(h, obj.__name__)
633
- return h.digest()
634
-
635
- elif inspect.iscode(obj):
636
- if context is None:
637
- raise RuntimeError("context must be defined when hashing code")
638
- return self._code_to_bytes(obj, context)
639
-
640
- elif inspect.ismodule(obj):
641
- # TODO: Figure out how to best show this kind of warning to the
642
- # user. In the meantime, show nothing. This scenario is too common,
643
- # so the current warning is quite annoying...
644
- # st.warning(('Streamlit does not support hashing modules. '
645
- # 'We did not hash `%s`.') % obj.__name__)
646
- # TODO: Hash more than just the name for internal modules.
647
- return self.to_bytes(obj.__name__)
648
-
649
- elif inspect.isclass(obj):
650
- # TODO: Figure out how to best show this kind of warning to the
651
- # user. In the meantime, show nothing. This scenario is too common,
652
- # (e.g. in every "except" statement) so the current warning is
653
- # quite annoying...
654
- # st.warning(('Streamlit does not support hashing classes. '
655
- # 'We did not hash `%s`.') % obj.__name__)
656
- # TODO: Hash more than just the name of classes.
657
- return self.to_bytes(obj.__name__)
658
-
659
- elif isinstance(obj, functools.partial):
660
- # The return value of functools.partial is not a plain function:
661
- # it's a callable object that remembers the original function plus
662
- # the values you pickled into it. So here we need to special-case it.
663
- h = hashlib.new("md5", **HASHLIB_KWARGS)
664
- self.update(h, obj.args)
665
- self.update(h, obj.func)
666
- self.update(h, obj.keywords)
667
- return h.digest()
668
-
669
- else:
670
- # As a last resort, hash the output of the object's __reduce__ method
671
- try:
672
- reduce_data = obj.__reduce__()
673
- except Exception as ex:
674
- raise UnhashableTypeError(ex, obj)
675
-
676
- for item in reduce_data:
677
- self.update(h, item, context)
678
- return h.digest()
679
-
680
- def _code_to_bytes(self, code, context: Context, func=None) -> bytes:
681
- h = hashlib.new("md5", **HASHLIB_KWARGS)
682
-
683
- # Hash the bytecode.
684
- self.update(h, code.co_code)
685
-
686
- # Hash constants that are referenced by the bytecode but ignore names of lambdas.
687
- consts = [
688
- n
689
- for n in code.co_consts
690
- if not isinstance(n, str) or not n.endswith(".<lambda>")
691
- ]
692
- self.update(h, consts, context)
693
-
694
- context.cells.push(code, func=func)
695
- for ref in get_referenced_objects(code, context):
696
- self.update(h, ref, context)
697
- context.cells.pop()
698
-
699
- return h.digest()
700
-
701
- @staticmethod
702
- def _get_main_script_directory() -> str:
703
- """Get the absolute path to directory of the main script."""
704
- import pathlib
705
-
706
- import __main__
707
-
708
- # This works because we set __main__.__file__ to the
709
- # script path in ScriptRunner.
710
- abs_main_path = pathlib.Path(__main__.__file__).resolve()
711
- return str(abs_main_path.parent)
712
-
713
-
714
- def get_referenced_objects(code, context: Context) -> list[Any]:
715
- # Top of the stack
716
- tos: Any = None
717
- lineno = None
718
- refs: list[Any] = []
719
-
720
- def set_tos(t):
721
- nonlocal tos
722
- if tos is not None:
723
- # Hash tos so we support reading multiple objects
724
- refs.append(tos)
725
- tos = t
726
-
727
- # Our goal is to find referenced objects. The problem is that co_names
728
- # does not have full qualified names in it. So if you access `foo.bar`,
729
- # co_names has `foo` and `bar` in it but it doesn't tell us that the
730
- # code reads `bar` of `foo`. We are going over the bytecode to resolve
731
- # from which object an attribute is requested.
732
- # Read more about bytecode at https://docs.python.org/3/library/dis.html
733
- import dis
734
-
735
- for op in dis.get_instructions(code):
736
- try:
737
- # Sometimes starts_line is None, in which case let's just remember the
738
- # previous start_line (if any). This way when there's an exception we at
739
- # least can point users somewhat near the line where the error stems from.
740
- if op.starts_line is not None:
741
- lineno = op.starts_line
742
-
743
- if op.opname in ["LOAD_GLOBAL", "LOAD_NAME"]:
744
- if op.argval in context.globals:
745
- set_tos(context.globals[op.argval])
746
- else:
747
- set_tos(op.argval)
748
- elif op.opname in ["LOAD_DEREF", "LOAD_CLOSURE"]:
749
- set_tos(context.cells.values[op.argval])
750
- elif op.opname == "IMPORT_NAME":
751
- try:
752
- import importlib
753
-
754
- set_tos(importlib.import_module(op.argval))
755
- except ImportError:
756
- set_tos(op.argval)
757
- elif op.opname in ["LOAD_METHOD", "LOAD_ATTR", "IMPORT_FROM"]:
758
- if tos is None:
759
- refs.append(op.argval)
760
- elif isinstance(tos, str):
761
- tos += "." + op.argval
762
- else:
763
- tos = getattr(tos, op.argval)
764
- elif op.opname == "DELETE_FAST" and tos:
765
- del context.varnames[op.argval]
766
- tos = None
767
- elif op.opname == "STORE_FAST" and tos:
768
- context.varnames[op.argval] = tos
769
- tos = None
770
- elif op.opname == "LOAD_FAST" and op.argval in context.varnames:
771
- set_tos(context.varnames[op.argval])
772
- else:
773
- # For all other instructions, hash the current TOS.
774
- if tos is not None:
775
- refs.append(tos)
776
- tos = None
777
- except Exception as e:
778
- raise UserHashError(e, code, lineno=lineno)
779
-
780
- return refs
781
-
782
-
783
- class NoResult:
784
- """Placeholder class for return values when None is meaningful."""
785
-
786
- pass
787
-
788
-
789
- class UnhashableTypeError(StreamlitAPIException):
790
- def __init__(self, orig_exc, failed_obj):
791
- msg = self._get_message(orig_exc, failed_obj)
792
- super().__init__(msg)
793
- self.with_traceback(orig_exc.__traceback__)
794
-
795
- def _get_message(self, orig_exc, failed_obj):
796
- args = _get_error_message_args(orig_exc, failed_obj)
797
-
798
- # This needs to have zero indentation otherwise %(hash_stack)s will
799
- # render incorrectly in Markdown.
800
- return (
801
- """
802
- Cannot hash object of type `%(failed_obj_type_str)s`, found in %(object_part)s
803
- %(object_desc)s.
804
-
805
- While caching %(object_part)s %(object_desc)s, Streamlit encountered an
806
- object of type `%(failed_obj_type_str)s`, which it does not know how to hash.
807
-
808
- To address this, please try helping Streamlit understand how to hash that type
809
- by passing the `hash_funcs` argument into `@st.cache`. For example:
810
-
811
- ```
812
- @st.cache(hash_funcs={%(failed_obj_type_str)s: my_hash_func})
813
- def my_func(...):
814
- ...
815
- ```
816
-
817
- If you don't know where the object of type `%(failed_obj_type_str)s` is coming
818
- from, try looking at the hash chain below for an object that you do recognize,
819
- then pass that to `hash_funcs` instead:
820
-
821
- ```
822
- %(hash_stack)s
823
- ```
824
-
825
- Please see the `hash_funcs` [documentation](https://docs.streamlit.io/library/advanced-features/caching#the-hash_funcs-parameter)
826
- for more details.
827
- """
828
- % args
829
- ).strip("\n")
830
-
831
-
832
- class UserHashError(StreamlitAPIException):
833
- def __init__(self, orig_exc, cached_func_or_code, hash_func=None, lineno=None):
834
- self.alternate_name = type(orig_exc).__name__
835
-
836
- if hash_func:
837
- msg = self._get_message_from_func(orig_exc, cached_func_or_code, hash_func)
838
- else:
839
- msg = self._get_message_from_code(orig_exc, cached_func_or_code, lineno)
840
-
841
- super().__init__(msg)
842
- self.with_traceback(orig_exc.__traceback__)
843
-
844
- def _get_message_from_func(self, orig_exc, cached_func, hash_func):
845
- args = _get_error_message_args(orig_exc, cached_func)
846
-
847
- if hasattr(hash_func, "__name__"):
848
- args["hash_func_name"] = "`%s()`" % hash_func.__name__
849
- else:
850
- args["hash_func_name"] = "a function"
851
-
852
- return (
853
- """
854
- %(orig_exception_desc)s
855
-
856
- This error is likely due to a bug in %(hash_func_name)s, which is a
857
- user-defined hash function that was passed into the `@st.cache` decorator of
858
- %(object_desc)s.
859
-
860
- %(hash_func_name)s failed when hashing an object of type
861
- `%(failed_obj_type_str)s`. If you don't know where that object is coming from,
862
- try looking at the hash chain below for an object that you do recognize, then
863
- pass that to `hash_funcs` instead:
864
-
865
- ```
866
- %(hash_stack)s
867
- ```
868
-
869
- If you think this is actually a Streamlit bug, please
870
- [file a bug report here](https://github.com/streamlit/streamlit/issues/new/choose).
871
- """
872
- % args
873
- ).strip("\n")
874
-
875
- def _get_message_from_code(self, orig_exc: BaseException, cached_code, lineno: int):
876
- args = _get_error_message_args(orig_exc, cached_code)
877
-
878
- failing_lines = _get_failing_lines(cached_code, lineno)
879
- failing_lines_str = "".join(failing_lines)
880
- failing_lines_str = textwrap.dedent(failing_lines_str).strip("\n")
881
-
882
- args["failing_lines_str"] = failing_lines_str
883
- args["filename"] = cached_code.co_filename
884
- args["lineno"] = lineno
885
-
886
- # This needs to have zero indentation otherwise %(lines_str)s will
887
- # render incorrectly in Markdown.
888
- return (
889
- """
890
- %(orig_exception_desc)s
891
-
892
- Streamlit encountered an error while caching %(object_part)s %(object_desc)s.
893
- This is likely due to a bug in `%(filename)s` near line `%(lineno)s`:
894
-
895
- ```
896
- %(failing_lines_str)s
897
- ```
898
-
899
- Please modify the code above to address this.
900
-
901
- If you think this is actually a Streamlit bug, you may [file a bug report
902
- here.] (https://github.com/streamlit/streamlit/issues/new/choose)
903
- """
904
- % args
905
- ).strip("\n")
906
-
907
-
908
- class InternalHashError(MarkdownFormattedException):
909
- """Exception in Streamlit hashing code (i.e. not a user error)"""
910
-
911
- def __init__(self, orig_exc: BaseException, failed_obj: Any):
912
- msg = self._get_message(orig_exc, failed_obj)
913
- super().__init__(msg)
914
- self.with_traceback(orig_exc.__traceback__)
915
-
916
- def _get_message(self, orig_exc: BaseException, failed_obj: Any) -> str:
917
- args = _get_error_message_args(orig_exc, failed_obj)
918
-
919
- # This needs to have zero indentation otherwise %(hash_stack)s will
920
- # render incorrectly in Markdown.
921
- return (
922
- """
923
- %(orig_exception_desc)s
924
-
925
- While caching %(object_part)s %(object_desc)s, Streamlit encountered an
926
- object of type `%(failed_obj_type_str)s`, which it does not know how to hash.
927
-
928
- **In this specific case, it's very likely you found a Streamlit bug so please
929
- [file a bug report here.]
930
- (https://github.com/streamlit/streamlit/issues/new/choose)**
931
-
932
- In the meantime, you can try bypassing this error by registering a custom
933
- hash function via the `hash_funcs` keyword in @st.cache(). For example:
934
-
935
- ```
936
- @st.cache(hash_funcs={%(failed_obj_type_str)s: my_hash_func})
937
- def my_func(...):
938
- ...
939
- ```
940
-
941
- If you don't know where the object of type `%(failed_obj_type_str)s` is coming
942
- from, try looking at the hash chain below for an object that you do recognize,
943
- then pass that to `hash_funcs` instead:
944
-
945
- ```
946
- %(hash_stack)s
947
- ```
948
-
949
- Please see the `hash_funcs` [documentation](https://docs.streamlit.io/library/advanced-features/caching#the-hash_funcs-parameter)
950
- for more details.
951
- """
952
- % args
953
- ).strip("\n")
954
-
955
-
956
- def _get_error_message_args(orig_exc: BaseException, failed_obj: Any) -> dict[str, Any]:
957
- hash_reason = hash_stacks.current.hash_reason
958
- hash_source = hash_stacks.current.hash_source
959
-
960
- failed_obj_type_str = type_util.get_fqn_type(failed_obj)
961
- object_part = ""
962
-
963
- if hash_source is None or hash_reason is None:
964
- object_desc = "something"
965
-
966
- elif hash_reason is HashReason.CACHING_BLOCK:
967
- object_desc = "a code block"
968
-
969
- else:
970
- if hasattr(hash_source, "__name__"):
971
- object_desc = f"`{hash_source.__name__}()`"
972
- else:
973
- object_desc = "a function"
974
-
975
- if hash_reason is HashReason.CACHING_FUNC_ARGS:
976
- object_part = "the arguments of"
977
- elif hash_reason is HashReason.CACHING_FUNC_BODY:
978
- object_part = "the body of"
979
- elif hash_reason is HashReason.CACHING_FUNC_OUTPUT:
980
- object_part = "the return value of"
981
-
982
- return {
983
- "orig_exception_desc": str(orig_exc),
984
- "failed_obj_type_str": failed_obj_type_str,
985
- "hash_stack": hash_stacks.current.pretty_print(),
986
- "object_desc": object_desc,
987
- "object_part": object_part,
988
- }
989
-
990
-
991
- def _get_failing_lines(code, lineno: int) -> list[str]:
992
- """Get list of strings (lines of code) from lineno to lineno+3.
993
-
994
- Ideally we'd return the exact line where the error took place, but there
995
- are reasons why this is not possible without a lot of work, including
996
- playing with the AST. So for now we're returning 3 lines near where
997
- the error took place.
998
- """
999
- source_lines, source_lineno = inspect.getsourcelines(code)
1000
-
1001
- start = lineno - source_lineno
1002
- end = min(start + 3, len(source_lines))
1003
- lines = source_lines[start:end]
1004
-
1005
- return lines