spinta 0.2.dev27__py3-none-any.whl → 0.2.dev28__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
spinta/api/__init__.py CHANGED
@@ -50,7 +50,7 @@ from spinta.exceptions import (
50
50
  error_response,
51
51
  )
52
52
  from spinta.formats.html.helpers import get_templates
53
- from spinta.middlewares import ContextMiddleware, StrictTransportSecurityMiddleware
53
+ from spinta.middlewares import ContextMiddleware, PathNormalizationMiddleware, StrictTransportSecurityMiddleware
54
54
  from spinta.urlparams import Version, get_response_type
55
55
 
56
56
  log = logging.getLogger(__name__)
@@ -370,7 +370,7 @@ async def error(request, exc):
370
370
  elif isinstance(exc, BaseError):
371
371
  status_code = exc.status_code
372
372
  errors = [error_response(exc)]
373
- headers = exc.headers
373
+ headers = dict(exc.headers)
374
374
  else:
375
375
  if isinstance(exc, HTTPException):
376
376
  status_code = exc.status_code
@@ -400,6 +400,11 @@ async def error(request, exc):
400
400
 
401
401
  response = {"errors": errors}
402
402
 
403
+ # Error responses can reflect request input (e.g. the requested path) and
404
+ # must never be stored by shared caches, otherwise they can be used for
405
+ # web cache poisoning.
406
+ headers["Cache-Control"] = "no-store"
407
+
403
408
  fmt = get_response_type(request.state.context, request)
404
409
  if fmt == "json" or fmt is None:
405
410
  return JSONResponse(
@@ -467,6 +472,7 @@ def init(context: Context):
467
472
  StrictTransportSecurityMiddleware,
468
473
  value=config.http_strict_transport_security,
469
474
  ),
475
+ Middleware(PathNormalizationMiddleware),
470
476
  Middleware(ContextMiddleware, context=context),
471
477
  ]
472
478
 
@@ -653,9 +653,20 @@ class PropertyReader(TabularReader):
653
653
  enums: Set[str]
654
654
 
655
655
  def read(self, row: Dict[str, str]) -> None:
656
- complete_structure, parent_structure, prop_name = _extract_and_create_parent_data(self, row, row["property"])
656
+ # Parse the data type once, here at the boundary, and pass the parsed
657
+ # result inward. Downstream code must not re-parse `row["type"]` or
658
+ # compare against the raw string: modifiers (`required`, `unique`) and
659
+ # arguments would make a type such as `backref required` or
660
+ # `object required` unrecognisable. Parent and nesting resolution only
661
+ # needs the bare type, while `_handle_datatype` keeps the modifiers.
662
+ dtype = _resolve_dtype(self, row)
663
+ bare_row = {**row, "type": dtype["type"]}
664
+ complete_structure, parent_structure, prop_name = _extract_and_create_parent_data(
665
+ self, bare_row, bare_row["property"]
666
+ )
667
+
668
+ prop_data = _handle_datatype(self, row, dtype)
657
669
 
658
- prop_data = _handle_datatype(self, row)
659
670
  prop_name = _combine_parent_with_prop(prop_name, prop_data, parent_structure, complete_structure)
660
671
 
661
672
  # Edge case where there is no nesting, need to couple `prop_data` with `complete_structure`
@@ -748,8 +759,11 @@ def _initial_text_property_schema(given_name: str, dtype: dict, row: dict):
748
759
  return result
749
760
 
750
761
 
751
- def _datatype_handler(reader: PropertyReader, row: dict, initial_data_loader: Callable[[str, dict, dict], dict]):
752
- dtype: dict = _resolve_dtype(reader, row)
762
+ def _datatype_handler(
763
+ reader: PropertyReader, row: dict, initial_data_loader: Callable[[str, dict, dict], dict], dtype: dict = None
764
+ ):
765
+ if dtype is None:
766
+ dtype = _resolve_dtype(reader, row)
753
767
  given_name = row["property"]
754
768
  reader.name = _clean_up_prop_name(row["property"].split(".")[-1])
755
769
 
@@ -759,7 +773,7 @@ def _datatype_handler(reader: PropertyReader, row: dict, initial_data_loader: Ca
759
773
  f"Property {reader.name!r} must be defined in a model context. "
760
774
  f"Now it is defined in {context.name!r} {context.type} context."
761
775
  )
762
- _check_if_property_already_set(reader, row, given_name)
776
+ _check_if_property_already_set(reader, dtype["type"], given_name)
763
777
 
764
778
  if reader.state.base and not dtype["type"]:
765
779
  dtype["type"] = "inherit"
@@ -804,8 +818,9 @@ def _datatype_handler(reader: PropertyReader, row: dict, initial_data_loader: Ca
804
818
  return new_data
805
819
 
806
820
 
807
- def _string_datatype_handler(reader: PropertyReader, row: dict):
808
- dtype: dict = _resolve_dtype(reader, row)
821
+ def _string_datatype_handler(reader: PropertyReader, row: dict, dtype: dict = None):
822
+ if dtype is None:
823
+ dtype = _resolve_dtype(reader, row)
809
824
  given_name = row["property"]
810
825
  reader.name = _clean_up_prop_name(row["property"].split(".")[-1])
811
826
 
@@ -815,7 +830,7 @@ def _string_datatype_handler(reader: PropertyReader, row: dict):
815
830
  f"Property {reader.name!r} must be defined in a model context. "
816
831
  f"Now it is defined in {context.name!r} {context.type} context."
817
832
  )
818
- existing_data = _check_if_property_already_set(reader, row, given_name)
833
+ existing_data = _check_if_property_already_set(reader, dtype["type"], given_name)
819
834
  if dtype["type"] == DataTypeEnum.TEXT.value and existing_data:
820
835
  reader.error(
821
836
  f"Property {reader.name!r} with the same name is already "
@@ -835,8 +850,9 @@ def _string_datatype_handler(reader: PropertyReader, row: dict):
835
850
  return new_data
836
851
 
837
852
 
838
- def _text_datatype_handler(reader: PropertyReader, row: dict):
839
- dtype: dict = _resolve_dtype(reader, row)
853
+ def _text_datatype_handler(reader: PropertyReader, row: dict, dtype: dict = None):
854
+ if dtype is None:
855
+ dtype = _resolve_dtype(reader, row)
840
856
  given_name = row["property"]
841
857
  reader.name = _clean_up_prop_name(row["property"].split(".")[-1])
842
858
 
@@ -846,7 +862,7 @@ def _text_datatype_handler(reader: PropertyReader, row: dict):
846
862
  f"Property {reader.name!r} must be defined in a model context. "
847
863
  f"Now it is defined in {context.name!r} {context.type} context."
848
864
  )
849
- result = _check_if_property_already_set(reader, row, given_name)
865
+ result = _check_if_property_already_set(reader, dtype["type"], given_name)
850
866
  if not (result and result["explicitly_given"] is False and result["type"] == DataTypeEnum.TEXT.value or not result):
851
867
  reader.error(
852
868
  f"Property {reader.name!r} with the same name is already "
@@ -893,22 +909,23 @@ def _text_datatype_handler(reader: PropertyReader, row: dict):
893
909
  return new_data
894
910
 
895
911
 
896
- def _default_datatype_handler(reader: PropertyReader, row: dict):
897
- return _datatype_handler(reader, row, _initial_normal_property_schema)
912
+ def _default_datatype_handler(reader: PropertyReader, row: dict, dtype: dict = None):
913
+ return _datatype_handler(reader, row, _initial_normal_property_schema, dtype)
898
914
 
899
915
 
900
- def _array_datatype_handler(reader: PropertyReader, row: dict):
901
- return _datatype_handler(reader, row, _initial_array_property_schema)
916
+ def _array_datatype_handler(reader: PropertyReader, row: dict, dtype: dict = None):
917
+ return _datatype_handler(reader, row, _initial_array_property_schema, dtype)
902
918
 
903
919
 
904
- def _partial_datatype_handler(reader: PropertyReader, row: dict):
905
- return _datatype_handler(reader, row, _initial_partial_property_schema)
920
+ def _partial_datatype_handler(reader: PropertyReader, row: dict, dtype: dict = None):
921
+ return _datatype_handler(reader, row, _initial_partial_property_schema, dtype)
906
922
 
907
923
 
908
- def _handle_datatype(reader: PropertyReader, row: dict):
909
- dtype: dict = _resolve_dtype(reader, row)
924
+ def _handle_datatype(reader: PropertyReader, row: dict, dtype: dict = None):
925
+ if dtype is None:
926
+ dtype = _resolve_dtype(reader, row)
910
927
  handler = DATATYPE_HANDLERS.get(dtype["type"], DATATYPE_HANDLERS["_default"])
911
- return handler(reader, row)
928
+ return handler(reader, row, dtype)
912
929
 
913
930
 
914
931
  DATATYPE_HANDLERS = {
@@ -1204,7 +1221,7 @@ def _extract_children_from_nested(base: dict, children_name: str) -> dict:
1204
1221
  return base
1205
1222
 
1206
1223
 
1207
- def _check_if_property_already_set(reader: PropertyReader, given_row: dict, full_name: str):
1224
+ def _check_if_property_already_set(reader: PropertyReader, given_type: str, full_name: str):
1208
1225
  # Treat '@' as normal '.', since '_extract_children_from_nested' is able to extract based on type
1209
1226
  split = full_name.replace("@", ".").split(".")
1210
1227
  base = {}
@@ -1250,11 +1267,14 @@ def _check_if_property_already_set(reader: PropertyReader, given_row: dict, full
1250
1267
  f"Property {full_name!r} with the same name is already defined for this {reader.state.model.name!r} model."
1251
1268
  )
1252
1269
 
1270
+ # `given_type` is the bare data type (modifiers and arguments already
1271
+ # stripped by the caller), so a nesting type such as `object` is recognised
1272
+ # even when it was declared as `object required`.
1253
1273
  if base and (
1254
- (base["type"] in ALLOWED_PARTIAL_TYPES and given_row["type"] not in ALLOWED_PARTIAL_TYPES)
1255
- or (base["type"] in ALLOWED_ARRAY_TYPES and given_row["type"] not in ALLOWED_ARRAY_TYPES)
1274
+ (base["type"] in ALLOWED_PARTIAL_TYPES and given_type not in ALLOWED_PARTIAL_TYPES)
1275
+ or (base["type"] in ALLOWED_ARRAY_TYPES and given_type not in ALLOWED_ARRAY_TYPES)
1256
1276
  ):
1257
- raise DataTypeCannotBeUsedForNesting(dtype=given_row["type"])
1277
+ raise DataTypeCannotBeUsedForNesting(dtype=given_type)
1258
1278
  return base
1259
1279
 
1260
1280
 
spinta/middlewares.py CHANGED
@@ -1,8 +1,66 @@
1
+ import posixpath
2
+
3
+ from starlette.responses import JSONResponse
1
4
  from starlette.types import ASGIApp, Message, Receive, Scope, Send
2
5
 
3
6
  from spinta.components import Context
4
7
 
5
8
 
9
+ def _is_normalized_path(scope: Scope) -> bool:
10
+ raw_path = (scope.get("raw_path") or b"").lower()
11
+ # Percent-encoded slashes, backslashes and dots decode into extra path
12
+ # delimiters and dot segments that shared caches do not see when they
13
+ # build the cache key.
14
+ if b"%2f" in raw_path or b"%5c" in raw_path or b"%2e" in raw_path:
15
+ return False
16
+ path = scope["path"]
17
+ if "\\" in path:
18
+ return False
19
+ normalized = posixpath.normpath(path)
20
+ # posixpath.normpath preserves a leading "//" (POSIX leaves two leading
21
+ # slashes implementation-defined), but shared caches collapse it to "/",
22
+ # so treat such a path as non-normalized.
23
+ if normalized.startswith("//"):
24
+ normalized = normalized[1:]
25
+ if path.endswith("/") and normalized != "/":
26
+ normalized += "/"
27
+ return path == normalized
28
+
29
+
30
+ class PathNormalizationMiddleware:
31
+ """Rejects requests whose path is not in normalized form.
32
+
33
+ Shared caches (proxies, CDNs) normalize URLs (resolve `.` and `..`
34
+ segments, decode percent-encoded slashes, collapse duplicate slashes)
35
+ when building the cache key, while the application receives the original
36
+ path and may reflect it in the response. This discrepancy enables web
37
+ cache poisoning: a response generated for a malicious non-normalized path
38
+ gets cached under the key of a legitimate URL. Rejecting such requests
39
+ with a non-cacheable response closes that gap.
40
+ """
41
+
42
+ def __init__(self, app: ASGIApp) -> None:
43
+ self.app = app
44
+
45
+ async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None:
46
+ if scope["type"] == "http" and not _is_normalized_path(scope):
47
+ response = JSONResponse(
48
+ {
49
+ "errors": [
50
+ {
51
+ "code": "InvalidRequestPath",
52
+ "message": "Request path is not normalized.",
53
+ }
54
+ ]
55
+ },
56
+ status_code=404,
57
+ headers={"Cache-Control": "no-store"},
58
+ )
59
+ await response(scope, receive, send)
60
+ return
61
+ await self.app(scope, receive, send)
62
+
63
+
6
64
  class StrictTransportSecurityMiddleware:
7
65
  """Adds `Strict-Transport-Security` (HSTS) header to every HTTP response.
8
66
 
spinta/utils/response.py CHANGED
@@ -387,6 +387,9 @@ def cache_control_response_headers(context: Context, model: Model, target_id: st
387
387
 
388
388
  cache_control = {
389
389
  "Cache-Control": config.cache_control,
390
+ # Response body depends on these request headers (content negotiation,
391
+ # auth scopes), so shared caches must include them in the cache key.
392
+ "Vary": "Accept, Accept-Language, Authorization",
390
393
  "Last-Modified": last_modified,
391
394
  "ETag": revision,
392
395
  }
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: spinta
3
- Version: 0.2.dev27
3
+ Version: 0.2.dev28
4
4
  Summary: A platform for describing, extracting, transforming, loading and serving open data.
5
5
  License: MIT
6
6
  License-File: LICENSE
@@ -7,7 +7,7 @@ spinta/adapters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
7
  spinta/adapters/rc/__init__.py,sha256=FhuhKn-tFIafQVGqSocMpw4bL_TobtgyNWTBZibKjXg,75
8
8
  spinta/adapters/rc/signature_adapter.py,sha256=jOWaISD2jZQdYEb_zDuPufJFPvaVyK2bU936Comt93w,4371
9
9
  spinta/adapters/soap_plugins.py,sha256=wPtoX83qBC7rr3JyCyFBAAsRgQGnocYHQnqsEvURSNw,4463
10
- spinta/api/__init__.py,sha256=64I1ecMrBhRWmi3MBKjqJe7XPHz63gZubmgRc4Hploo,15978
10
+ spinta/api/__init__.py,sha256=vCCROIgtmqoI17GRMqCNZoOlMmGASxd33_xGEV0xOx4,16286
11
11
  spinta/api/inspect.py,sha256=zWry4uSiYGn7FHLaqrn9k_8OVKRHLs3EH2Ni7IwgSqY,6775
12
12
  spinta/api/schema.py,sha256=annABS0b7yFKxNoCyu8kbZ0eMMv6qwCoapMKnljj0Jc,7634
13
13
  spinta/api/validators.py,sha256=FpCFd2EA1-aPU5totb5-bNDioM-67OnQ_7gt6s3qH0U,666
@@ -590,7 +590,7 @@ spinta/manifests/tabular/components.py,sha256=_SUFrsFxEFetLcdXymDiWVKOCSLK7pylyv
590
590
  spinta/manifests/tabular/constants.py,sha256=fgFnIcPhozim_aQj5f8u4PGcloifDl497T589aJ8It4,720
591
591
  spinta/manifests/tabular/formats/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
592
592
  spinta/manifests/tabular/formats/gsheets.py,sha256=iu-rJrzr6Inz_8-B75xzIryR8KWTO60xg0TNKizSijk,937
593
- spinta/manifests/tabular/helpers.py,sha256=qt2SNNEwoCCzzheCA2E1aRA5_3fIovgOmTVILoayUIs,94874
593
+ spinta/manifests/tabular/helpers.py,sha256=DrzxCP9mph6IlTcd734memdYqsE2BdI5Q-HnnPIWeQ4,95934
594
594
  spinta/manifests/xsd/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
595
595
  spinta/manifests/xsd/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
596
596
  spinta/manifests/xsd/commands/configure.py,sha256=FgVOwTy8SosDjHbgeIRWzYocX1mAIW_jAymbzwxDDro,539
@@ -609,7 +609,7 @@ spinta/manifests/yaml/commands/sync.py,sha256=hj9RLApoZvhK8I2ftx-KU4yGqi1uxqZPfw
609
609
  spinta/manifests/yaml/components.py,sha256=HPK_rJya_F47p3E6tYsZWUROvfycfPLFnrgGeMS0nMA,317
610
610
  spinta/manifests/yaml/helpers.py,sha256=xjOY3ascGPhaLgdSjJJmacBM0If5pFjSuo82WnpbQ34,4236
611
611
  spinta/methods.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
612
- spinta/middlewares.py,sha256=XjNRKtaPfxOfZwvVgU34CHkxSHzQ3VaclA7UiWdIwtY,2143
612
+ spinta/middlewares.py,sha256=DyOG8tBB9674f4fzHQQbmWNNnrDx4ZUNbymMa3IJfZI,4421
613
613
  spinta/naming/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
614
614
  spinta/naming/components.py,sha256=BPlv9F91v_KyS7-4rJi1UfG9b21qKgiD6vouXRRgHt4,181
615
615
  spinta/naming/helpers.py,sha256=H22uq33wgGh-c0Cdud7oz5MN33LVU50N_PVY5zQails,4462
@@ -742,7 +742,7 @@ spinta/utils/nin.py,sha256=eNkYDtph6dFF-4d1SbYdLzz9FZhdpSJcFAJX9OsEw14,2295
742
742
  spinta/utils/passwords.py,sha256=v6UPoJADbc4bRmd1CRWDvxtUWcT9ePxURbvnB-BLeR4,1424
743
743
  spinta/utils/path.py,sha256=fbEBizEzHPpDRKodL_9cQXuIkoLsSuRBv44MFuRQxIk,1289
744
744
  spinta/utils/refs.py,sha256=E2cF2KKJka7jmCCQxYhSckKTCIkkorrtWb6wRs8UY4Q,530
745
- spinta/utils/response.py,sha256=fECmQD_--g6XHE3iz0vqGSBMptaH22SWYwnb6YB2lk4,12921
745
+ spinta/utils/response.py,sha256=LcSldFOGQvUfJO_Q8WFkZVoz3R_y7bEZVvuCcNgmcXw,13135
746
746
  spinta/utils/schema.py,sha256=K0eCDeiHlh1WchuXfJoS5M6xm1CK7Co8h13-QPNzSro,2737
747
747
  spinta/utils/scopes.py,sha256=ffxg15O63tifEVWmFY-aWcdM5-xUXkRxtbu8bTmVNks,796
748
748
  spinta/utils/sqlalchemy.py,sha256=jJGteC63szzQIJv6zXKMdig6lUjubb7z8mGTVsG9dnM,1623
@@ -753,8 +753,8 @@ spinta/utils/tree.py,sha256=14USJGfLvfErh15zihJsLunorMql2KXbRuB_Eqsys_w,590
753
753
  spinta/utils/types.py,sha256=lfYSxKGPuPeUsO14d2OYodtbRY3zsa-o-z8HveVH3t0,801
754
754
  spinta/utils/units.py,sha256=CFFLv1NHYsoSSzwiar3zXYmt4m3sccW5niUgkZQgo3k,747
755
755
  spinta/utils/url.py,sha256=SLnwEEpXfJZzjvHP8ZPPlb0QXzkeAzY2e72qD1edy-A,3470
756
- spinta-0.2.dev27.dist-info/METADATA,sha256=892ytUFXKAxzCFQDO1a15fBPCyECUpoH-Ka-pmEy7PI,10295
757
- spinta-0.2.dev27.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
758
- spinta-0.2.dev27.dist-info/entry_points.txt,sha256=-jdsOQZcMu3rUOwgIJNS3gZS4rwWPACuXXy128F676w,46
759
- spinta-0.2.dev27.dist-info/licenses/LICENSE,sha256=JKmjfBLapeFWNI_qdVr5bXGlsuMPa6nRarKPK5davKw,1071
760
- spinta-0.2.dev27.dist-info/RECORD,,
756
+ spinta-0.2.dev28.dist-info/METADATA,sha256=FjmBsVZsxTdtmlgYTmI0sjVZohRGGk-6GZ0sECkeitc,10295
757
+ spinta-0.2.dev28.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
758
+ spinta-0.2.dev28.dist-info/entry_points.txt,sha256=-jdsOQZcMu3rUOwgIJNS3gZS4rwWPACuXXy128F676w,46
759
+ spinta-0.2.dev28.dist-info/licenses/LICENSE,sha256=JKmjfBLapeFWNI_qdVr5bXGlsuMPa6nRarKPK5davKw,1071
760
+ spinta-0.2.dev28.dist-info/RECORD,,