valediction 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. valediction/__init__.py +8 -0
  2. valediction/convenience.py +50 -0
  3. valediction/data_types/__init__.py +0 -0
  4. valediction/data_types/data_type_helpers.py +75 -0
  5. valediction/data_types/data_types.py +58 -0
  6. valediction/data_types/type_inference.py +541 -0
  7. valediction/datasets/__init__.py +0 -0
  8. valediction/datasets/datasets.py +870 -0
  9. valediction/datasets/datasets_helpers.py +46 -0
  10. valediction/demo/DEMO - Data Dictionary.xlsx +0 -0
  11. valediction/demo/DEMOGRAPHICS.csv +101 -0
  12. valediction/demo/DIAGNOSES.csv +650 -0
  13. valediction/demo/LAB_TESTS.csv +1001 -0
  14. valediction/demo/VITALS.csv +1001 -0
  15. valediction/demo/__init__.py +6 -0
  16. valediction/demo/demo_dictionary.py +129 -0
  17. valediction/dictionary/__init__.py +0 -0
  18. valediction/dictionary/exporting.py +501 -0
  19. valediction/dictionary/exporting_helpers.py +371 -0
  20. valediction/dictionary/generation.py +357 -0
  21. valediction/dictionary/helpers.py +174 -0
  22. valediction/dictionary/importing.py +494 -0
  23. valediction/dictionary/integrity.py +37 -0
  24. valediction/dictionary/model.py +582 -0
  25. valediction/dictionary/template/PROJECT - Data Dictionary.xltx +0 -0
  26. valediction/exceptions.py +22 -0
  27. valediction/integrity.py +97 -0
  28. valediction/io/__init__.py +0 -0
  29. valediction/io/csv_readers.py +307 -0
  30. valediction/progress.py +206 -0
  31. valediction/support.py +72 -0
  32. valediction/validation/__init__.py +0 -0
  33. valediction/validation/helpers.py +315 -0
  34. valediction/validation/issues.py +280 -0
  35. valediction/validation/validation.py +598 -0
  36. valediction-1.0.0.dist-info/METADATA +15 -0
  37. valediction-1.0.0.dist-info/RECORD +38 -0
  38. valediction-1.0.0.dist-info/WHEEL +4 -0
@@ -0,0 +1,541 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ import warnings
5
+
6
+ import pandas as pd
7
+
8
+ from valediction.data_types.data_type_helpers import infer_datetime_format
9
+ from valediction.data_types.data_types import DataType
10
+ from valediction.integrity import get_config
11
+ from valediction.progress import Progress
12
+
13
+ # ---------- compiled patterns ----------
14
+ _INT_RE = re.compile(r"^[+-]?\d+$")
15
+ # FLOAT: allow decimals OR integers, plus optional scientific notation
16
+ _FLOAT_RE = re.compile(r"^[+-]?(?:\d+\.\d*|\.\d+|\d+)(?:[eE][+-]?\d+)?$")
17
+ # integers written as 123, 123.0, 123.
18
+ _INT_EQ_RE = re.compile(r"^[+-]?\d+(?:\.0*)?$")
19
+ _LEAD0_RE = re.compile(r"^[+-]?0\d+$")
20
+ _DATE_HINT_RE = re.compile(r"[-/T]") # cheap prefilter
21
+ COLUMN_STEPS = 8
22
+
23
+
24
+ class ColumnState:
25
+ def __init__(self, name: str) -> None:
26
+ self.name = name
27
+ self.data_type: DataType = DataType.TEXT
28
+ self.nullable: bool = False
29
+ self.max_length: int = 0
30
+
31
+ # Locks / disqualifiers
32
+ self.lock_text_due_to_leading_zero: bool = False
33
+ self.lock_text_permanent: bool = False
34
+ self.disqualify_numeric: bool = False
35
+ self.disqualify_datetime: bool = False
36
+
37
+ # Datetime speed hint
38
+ self.cached_datetime_format: str | None = None
39
+ self.prefer_date_first: bool = False
40
+
41
+ def final_data_type_and_length(self) -> tuple[DataType, int | None]:
42
+ def _len1() -> int:
43
+ return max(1, self.max_length or 0)
44
+
45
+ if self.lock_text_due_to_leading_zero or self.lock_text_permanent:
46
+ return DataType.TEXT, _len1()
47
+ if self.data_type == DataType.TEXT:
48
+ return DataType.TEXT, _len1()
49
+
50
+ if self.data_type == DataType.INTEGER:
51
+ return DataType.INTEGER, None
52
+ if self.data_type == DataType.FLOAT:
53
+ return DataType.FLOAT, None
54
+ if self.data_type == DataType.DATE:
55
+ return DataType.DATE, None
56
+ if self.data_type == DataType.DATETIME:
57
+ return DataType.DATETIME, None
58
+
59
+ return DataType.TEXT, _len1()
60
+
61
+
62
+ class TypeInferer:
63
+ """
64
+ Chunk-friendly type inference with:
65
+ - compiled regex reuse
66
+ - cached datetime formats
67
+ - sticky TEXT on contradictions
68
+ - unified debug logging via __say()
69
+ """
70
+
71
+ def __init__(
72
+ self,
73
+ *,
74
+ dayfirst: bool,
75
+ debug: bool = False,
76
+ progress: Progress = None,
77
+ ) -> None:
78
+ config = get_config()
79
+ self.dayfirst = dayfirst
80
+ self.datetime_formats = config.date_formats
81
+ self.null_tokens = {v.strip().lower() for v in config.null_values}
82
+ self.states: dict[str, ColumnState] = {}
83
+ self.debug = debug
84
+ self.progress: Progress = progress
85
+ self.__current_column: str | None = None
86
+
87
+ # Inference
88
+ def update_with_chunk(self, df: pd.DataFrame) -> None:
89
+ if df.empty:
90
+ return
91
+
92
+ for col in df.columns:
93
+ self.__current_column = col
94
+ self.__begin_step(step="Preparing column")
95
+ series = self._ensure_string_series(df[col])
96
+ state = self.states.setdefault(col, ColumnState(name=col))
97
+ self.__complete_step() # 1 step
98
+
99
+ trimmed, nulls, nonnull_mask, max_len = self._preprocess_column(
100
+ series
101
+ ) # 4 steps
102
+ state.nullable |= bool(nulls.any())
103
+ if max_len is not None and max_len > state.max_length:
104
+ state.max_length = max_len
105
+
106
+ if not bool(nonnull_mask.any()):
107
+ self.__complete_step(n=3, save_as="Skipped")
108
+ continue # nothing to learn in this chunk
109
+
110
+ non_nulls = trimmed[nonnull_mask]
111
+
112
+ # Hard TEXT locks
113
+ if self._apply_hard_text_locks(state, non_nulls): # 1 step
114
+ self.__complete_step(n=2, save_as="Skipped")
115
+ continue
116
+
117
+ # Datetime fast path
118
+ if self._apply_datetime_fast_path(state, non_nulls): # 1 step
119
+ self.__complete_step(n=1, save_as="Skipped")
120
+ continue
121
+
122
+ # State-specific handling
123
+ _handling_function: callable = {
124
+ DataType.TEXT: self._handle_state_text,
125
+ DataType.DATE: self._handle_state_date,
126
+ DataType.DATETIME: self._handle_state_datetime,
127
+ DataType.INTEGER: self._handle_state_integer,
128
+ DataType.FLOAT: self._handle_state_float,
129
+ }.get(state.data_type, self._handle_state_text)
130
+
131
+ _handling_function(state, non_nulls) # 1 of 5 steps
132
+
133
+ # Inference Helpers
134
+ @staticmethod
135
+ def _ensure_string_series(s: pd.Series) -> pd.Series:
136
+ if not pd.api.types.is_string_dtype(s.dtype):
137
+ return s.astype("string")
138
+ return s
139
+
140
+ def _preprocess_column(
141
+ self, s: pd.Series
142
+ ) -> tuple[pd.Series, pd.Series, pd.Series, int | None]:
143
+ self.__begin_step(step="Trimming whitespace")
144
+ trimmed = s.str.strip()
145
+ self.__complete_step()
146
+
147
+ self.__begin_step(step="Checking nulls")
148
+ nulls = trimmed.isna() | trimmed.str.lower().isin(self.null_tokens)
149
+ self.__complete_step()
150
+
151
+ self.__begin_step(step="Checking max length")
152
+ lengths = s.str.len()
153
+ max_len = int(lengths.max(skipna=True)) if lengths.notna().any() else None
154
+ self.__complete_step()
155
+
156
+ self.__begin_step(step="Setting non-null mask")
157
+ nonnull_mask = (~nulls) & s.notna()
158
+ self.__complete_step()
159
+
160
+ return trimmed, nulls, nonnull_mask, max_len
161
+
162
+ # Early Locks
163
+ @staticmethod
164
+ def _looks_dateish(nn: pd.Series) -> bool:
165
+ return bool(nn.str.contains(_DATE_HINT_RE).any())
166
+
167
+ @staticmethod
168
+ def _has_leading_zero(nn: pd.Series) -> bool:
169
+ return bool(nn.str.match(_LEAD0_RE, na=False).any())
170
+
171
+ def _apply_hard_text_locks(self, st: ColumnState, nn: pd.Series) -> bool:
172
+ if st.lock_text_due_to_leading_zero or st.lock_text_permanent:
173
+ self._transition(st, DataType.TEXT, "locked to TEXT")
174
+ self.__complete_step()
175
+ return True
176
+
177
+ if self._has_leading_zero(nn):
178
+ self._debug_leading_zero_examples(st, nn)
179
+ st.lock_text_due_to_leading_zero = True
180
+ self._transition(st, DataType.TEXT, "leading-zero integer tokens")
181
+ self.__complete_step()
182
+ return True
183
+
184
+ self.__complete_step()
185
+ return False
186
+
187
+ def _apply_datetime_fast_path(self, st: ColumnState, nn: pd.Series) -> bool:
188
+ self.__begin_step(step="Applying datetime locks")
189
+
190
+ # Cached single format
191
+ if st.cached_datetime_format is not None:
192
+ ok, has_time = self._parse_with_cached_format(nn, st.cached_datetime_format)
193
+ if ok.all():
194
+ self._transition(
195
+ st,
196
+ DataType.DATETIME if has_time.any() else DataType.DATE,
197
+ f"cached datetime format={st.cached_datetime_format!r}",
198
+ )
199
+ self.__complete_step()
200
+ return True
201
+
202
+ st.cached_datetime_format = None
203
+ st.prefer_date_first = False
204
+
205
+ # Date-first hint (explicit formats)
206
+ if st.prefer_date_first and not st.disqualify_datetime:
207
+ for fmt in self.datetime_formats:
208
+ ok, has_time = self._parse_with_cached_format(nn, fmt)
209
+ if ok.all():
210
+ st.cached_datetime_format = fmt
211
+ self._transition(
212
+ st,
213
+ DataType.DATETIME if has_time.any() else DataType.DATE,
214
+ f"explicit datetime format={fmt!r}",
215
+ )
216
+ self.__complete_step()
217
+ return True
218
+
219
+ self.__complete_step()
220
+ return False
221
+
222
+ # State Handlers
223
+ def _handle_state_text(self, st: ColumnState, nn: pd.Series) -> None:
224
+ self.__begin_step(step="Handling text")
225
+ # DATETIME attempt
226
+ if not st.disqualify_datetime and self._looks_dateish(nn):
227
+ if self._try_parse_datetime_then_cache(st, nn):
228
+ self.__complete_step()
229
+ return
230
+
231
+ # NUMERIC attempt
232
+ if not st.disqualify_numeric:
233
+ int_equiv = nn.str.fullmatch(_INT_EQ_RE, na=False)
234
+ float_like = nn.str.fullmatch(_FLOAT_RE, na=False)
235
+
236
+ if int_equiv.all():
237
+ self._transition(st, DataType.INTEGER, "all integer-equivalent")
238
+ self.__complete_step()
239
+ return
240
+
241
+ if (int_equiv | float_like).all():
242
+ self._debug_float_promotion(st, nn, int_equiv, float_like)
243
+ self._transition(st, DataType.FLOAT, "mixed numeric (int/float)")
244
+ self.__complete_step()
245
+ return
246
+
247
+ # Otherwise: non-numeric → TEXT (sticky)
248
+ self._debug_offenders_numeric(st, nn, int_equiv, float_like)
249
+ st.disqualify_numeric = True
250
+ st.lock_text_permanent = True
251
+ self._transition(st, DataType.TEXT, "non-numeric tokens present")
252
+ self.__complete_step()
253
+ return
254
+
255
+ # If both numeric and datetime are disqualified, permanently TEXT
256
+ if st.disqualify_numeric and st.disqualify_datetime:
257
+ st.lock_text_permanent = True
258
+ self._transition(
259
+ st, DataType.TEXT, "both numeric and datetime disqualified"
260
+ )
261
+ self.__complete_step()
262
+
263
+ def _handle_state_date(self, st: ColumnState, nn: pd.Series) -> None:
264
+ self.__begin_step(step="Handling dates")
265
+ if not self._looks_dateish(nn):
266
+ st.disqualify_datetime = True
267
+ st.lock_text_permanent = True
268
+ self._transition(st, DataType.TEXT, "lost date-ish pattern")
269
+ self.__complete_step()
270
+ return
271
+
272
+ ok, has_time = self._datetime_parse_ok(nn)
273
+ if not ok.all():
274
+ self._debug_offenders_datetime(st, nn, ok)
275
+ st.disqualify_datetime = True
276
+ st.lock_text_permanent = True
277
+ self._transition(st, DataType.TEXT, "datetime parse failures")
278
+ elif has_time.any():
279
+ self._transition(st, DataType.DATETIME, "time component detected")
280
+
281
+ self.__complete_step()
282
+
283
+ def _handle_state_datetime(self, st: ColumnState, nn: pd.Series) -> None:
284
+ self.__begin_step(step="Handling datetimes")
285
+ if not self._looks_dateish(nn):
286
+ st.disqualify_datetime = True
287
+ st.lock_text_permanent = True
288
+ self._transition(st, DataType.TEXT, "lost date-ish pattern")
289
+ self.__complete_step()
290
+ return
291
+
292
+ ok, _ = self._datetime_parse_ok(nn)
293
+ if not ok.all():
294
+ self._debug_offenders_datetime(st, nn, ok)
295
+ st.disqualify_datetime = True
296
+ st.lock_text_permanent = True
297
+ self._transition(st, DataType.TEXT, "datetime parse failures")
298
+
299
+ self.__complete_step()
300
+
301
+ def _handle_state_integer(self, st: ColumnState, nn: pd.Series) -> None:
302
+ self.__begin_step(step="Handling integers")
303
+ int_equiv = nn.str.fullmatch(_INT_EQ_RE, na=False)
304
+ float_like = nn.str.fullmatch(_FLOAT_RE, na=False)
305
+
306
+ if not (int_equiv | float_like).all():
307
+ self._debug_offenders_numeric(st, nn, int_equiv, float_like)
308
+ st.disqualify_numeric = True
309
+ st.lock_text_permanent = True
310
+ self._transition(st, DataType.TEXT, "non-numeric tokens introduced")
311
+ elif float_like.any() and not int_equiv.all():
312
+ self._debug_float_promotion(st, nn, int_equiv, float_like)
313
+ self._transition(st, DataType.FLOAT, "decimals/scientific detected")
314
+
315
+ self.__complete_step()
316
+ # else remain INTEGER
317
+
318
+ def _handle_state_float(self, st: ColumnState, nn: pd.Series) -> None:
319
+ self.__begin_step(step="Handling floats")
320
+ int_like = nn.str.fullmatch(_INT_RE, na=False)
321
+ fl_like = nn.str.fullmatch(_FLOAT_RE, na=False)
322
+ if not (int_like | fl_like).all():
323
+ self._debug_offenders_numeric(st, nn, int_like, fl_like)
324
+ st.disqualify_numeric = True
325
+ st.lock_text_permanent = True
326
+ self._transition(st, DataType.TEXT, "non-numeric tokens introduced")
327
+ self.__complete_step()
328
+
329
+ # Datetime Parsing
330
+ def _try_parse_datetime_then_cache(self, st: ColumnState, nn: pd.Series) -> bool:
331
+ # 1) If we’ve already cached a format, try it fast
332
+ if st.cached_datetime_format is not None:
333
+ ok, has_time = self._parse_with_cached_format(nn, st.cached_datetime_format)
334
+ if ok.all():
335
+ self._transition(
336
+ st,
337
+ DataType.DATETIME if has_time.any() else DataType.DATE,
338
+ f"cached datetime format={st.cached_datetime_format!r}",
339
+ )
340
+ return True
341
+ # cache failed on this chunk; clear and fall through to re-infer once
342
+ st.cached_datetime_format = None
343
+ st.prefer_date_first = False
344
+
345
+ # 2) Infer with the new helper (efficient: unique, batched, intersects across slices)
346
+ # Work on uniques only for speed and stability.
347
+ uniq = (
348
+ nn.astype("string", copy=False)
349
+ .str.strip()
350
+ .replace("", pd.NA)
351
+ .dropna()
352
+ .unique()
353
+ )
354
+ if len(uniq) == 0:
355
+ return False
356
+
357
+ try:
358
+ fmt_or_false = infer_datetime_format(pd.Series(uniq, dtype="string"))
359
+ except ValueError as e:
360
+ # ambiguous after scanning – treat as “can’t determine” and disqualify
361
+ self.__say(f"[{st.name}] datetime ambiguous: {e}")
362
+ st.disqualify_datetime = True
363
+ return False
364
+
365
+ if fmt_or_false is False:
366
+ # helper couldn’t find any valid explicit format
367
+ st.disqualify_datetime = True
368
+ self._transition(
369
+ st, DataType.TEXT, "datetime helper found no matching format"
370
+ )
371
+ return False
372
+
373
+ # 3) Cache and confirm on current (non-unique) values
374
+ st.cached_datetime_format = fmt_or_false
375
+ st.prefer_date_first = True
376
+ ok, has_time = self._parse_with_cached_format(nn, st.cached_datetime_format)
377
+ if ok.all():
378
+ self._transition(
379
+ st,
380
+ DataType.DATETIME if has_time.any() else DataType.DATE,
381
+ f"explicit datetime format={st.cached_datetime_format!r}",
382
+ )
383
+ return True
384
+
385
+ self.__say(
386
+ f"[{st.name}] cached format failed on live slice; disqualifying datetime."
387
+ )
388
+ st.cached_datetime_format = None
389
+ st.disqualify_datetime = True
390
+ return False
391
+
392
+ def _parse_with_cached_format(
393
+ self, s: pd.Series, fmt: str
394
+ ) -> tuple[pd.Series, pd.Series]:
395
+ with warnings.catch_warnings():
396
+ warnings.simplefilter("ignore", UserWarning)
397
+ parsed = pd.to_datetime(s, format=fmt, errors="coerce", utc=False)
398
+
399
+ ok = parsed.notna()
400
+ has_time = ok & (
401
+ (parsed.dt.hour != 0)
402
+ | (parsed.dt.minute != 0)
403
+ | (parsed.dt.second != 0)
404
+ | (parsed.dt.microsecond != 0)
405
+ )
406
+ return ok, has_time
407
+
408
+ def _datetime_parse_ok(self, s: pd.Series) -> tuple[pd.Series, pd.Series]:
409
+ with warnings.catch_warnings():
410
+ warnings.simplefilter("ignore", UserWarning)
411
+ parsed = pd.to_datetime(
412
+ s, errors="coerce", dayfirst=self.dayfirst, utc=False
413
+ )
414
+
415
+ ok = parsed.notna()
416
+ has_time = ok & (
417
+ (parsed.dt.hour != 0)
418
+ | (parsed.dt.minute != 0)
419
+ | (parsed.dt.second != 0)
420
+ | (parsed.dt.microsecond != 0)
421
+ )
422
+ return ok, has_time
423
+
424
+ # Debug/Log
425
+ def __say(self, *values: object, sep: str = " ", end: str = "\n") -> None:
426
+ if self.debug:
427
+ print("TypeInferer:", *values, sep=sep, end=end)
428
+
429
+ def _transition(self, st: ColumnState, to_type: DataType, reason: str) -> None:
430
+ """Set st.data_type and emit a standardised debug line if changed."""
431
+ from_type = st.data_type
432
+ st.data_type = to_type
433
+ if self.debug:
434
+ if from_type != to_type:
435
+ self.__say(f"[{st.name}] {from_type.name} → {to_type.name} ({reason})")
436
+ else:
437
+ self.__say(f"[{st.name}] stays {to_type.name} ({reason})")
438
+
439
+ def _fmt_examples(
440
+ self,
441
+ vc: pd.Series,
442
+ *,
443
+ max_examples: int = 5,
444
+ max_value_len: int = 80,
445
+ ) -> str:
446
+ shown = vc.head(max_examples)
447
+ parts: list[str] = []
448
+ for val in shown.index:
449
+ s = repr(val)
450
+ if len(s) > max_value_len:
451
+ s = s[: max_value_len - 1] + "…"
452
+ parts.append(s)
453
+ extra = vc.shape[0] - shown.shape[0]
454
+ suffix = f"; …+{extra}" if extra > 0 else ""
455
+ return "[" + "; ".join(parts) + suffix + "]"
456
+
457
+ def _debug_offenders_numeric(
458
+ self,
459
+ st: ColumnState,
460
+ nn: pd.Series,
461
+ int_like: pd.Series,
462
+ float_like: pd.Series,
463
+ *,
464
+ max_examples: int = 5,
465
+ note: str = "non-numeric present",
466
+ ) -> None:
467
+ if not self.debug:
468
+ return
469
+ bad = ~(int_like | float_like)
470
+ if not bool(bad.any()):
471
+ return
472
+ vc = nn[bad].value_counts(dropna=False)
473
+ examples = self._fmt_examples(vc, max_examples=max_examples)
474
+ self.__say(f"[{st.name}] numeric disqualified: {note}. Examples {examples}")
475
+
476
+ def _debug_offenders_datetime(
477
+ self,
478
+ st: ColumnState,
479
+ nn: pd.Series,
480
+ ok_mask: pd.Series,
481
+ *,
482
+ max_examples: int = 5,
483
+ ) -> None:
484
+ if not self.debug:
485
+ return
486
+ bad = ~ok_mask
487
+ if not bool(bad.any()):
488
+ return
489
+ vc = nn[bad].value_counts(dropna=False)
490
+ examples = self._fmt_examples(vc, max_examples=max_examples)
491
+ self.__say(f"[{st.name}] datetime disqualified. Examples {examples}")
492
+
493
+ def _debug_leading_zero_examples(
494
+ self,
495
+ st: ColumnState,
496
+ nn: pd.Series,
497
+ *,
498
+ max_examples: int = 5,
499
+ ) -> None:
500
+ if not self.debug:
501
+ return
502
+ m = nn.str.match(_LEAD0_RE, na=False)
503
+ if not bool(m.any()):
504
+ return
505
+ vc = nn[m].value_counts(dropna=False)
506
+ examples = self._fmt_examples(vc, max_examples=max_examples)
507
+ self.__say(f"[{st.name}] leading-zero lock. Examples {examples}")
508
+
509
+ def _debug_float_promotion(
510
+ self,
511
+ st: ColumnState,
512
+ nn: pd.Series,
513
+ int_equiv: pd.Series,
514
+ float_like: pd.Series,
515
+ *,
516
+ max_examples: int = 5,
517
+ ) -> None:
518
+ if not self.debug:
519
+ return
520
+ non_integer_numeric = float_like & ~int_equiv
521
+ if not bool(non_integer_numeric.any()):
522
+ self.__say(f"[{st.name}] promoted to FLOAT.")
523
+ return
524
+ sample = nn[non_integer_numeric]
525
+ reasons = []
526
+ if bool(sample.str.contains(r"\.", na=False).any()):
527
+ reasons.append("decimals present")
528
+ if bool(sample.str.contains(r"[eE][+-]?\d+", na=False).any()):
529
+ reasons.append("scientific notation present")
530
+ reason_msg = (": " + ", ".join(reasons)) if reasons else ""
531
+ vc = sample.value_counts(dropna=False)
532
+ examples = self._fmt_examples(vc, max_examples=max_examples)
533
+ self.__say(f"[{st.name}] promoted to FLOAT{reason_msg}. Examples {examples}")
534
+
535
+ def __begin_step(self, step: str):
536
+ self.progress.begin_step(
537
+ step=step, alt_postfix=f"{self.__current_column}: {step}"
538
+ )
539
+
540
+ def __complete_step(self, n: int = 1, save_as: str = None):
541
+ self.progress.complete_step(n=n, save_as=save_as)
File without changes