typemonkey 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
typemonkey/__init__.py ADDED
@@ -0,0 +1,49 @@
1
+ """typemonkey — column type inference and type-aware cleaning.
2
+
3
+ Public API (import ``from typemonkey import X``):
4
+
5
+ * :func:`infer_type` — profile a column, returning a :class:`ColumnProfile`.
6
+ * :func:`clean_column` — clean a column to an inferred or given type.
7
+ * :func:`clean_numeric` / :func:`clean_boolean` — type-specific cleaners.
8
+ * :func:`normalize_nulls` / :func:`is_null` — null handling.
9
+ * :func:`detect_number_locale` — US vs European number format detection.
10
+ * :func:`looks_like_preserve_string` — zip/phone/zero-padded-ID detection.
11
+ * :class:`ColumnProfile`, :class:`CleanResult`, :class:`TypeName` — result types.
12
+ * :data:`DEFAULT_NULLS`, :data:`TRUE_VALUES`, :data:`FALSE_VALUES` — vocabularies.
13
+
14
+ See LIMITATIONS.md for deliberate design tradeoffs and SKILL.md for an
15
+ LLM-oriented quick reference.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ from .booleans import FALSE_VALUES, TRUE_VALUES, clean_boolean, parse_boolean
21
+ from .clean import clean_column
22
+ from .infer import infer_type
23
+ from .locale import detect_number_locale
24
+ from .models import CleanResult, ColumnProfile, TypeName
25
+ from .nulls import DEFAULT_NULLS, is_null, normalize_nulls
26
+ from .numbers import clean_numeric, parse_number
27
+ from .preserve import looks_like_preserve_string
28
+
29
+ __version__ = "1.0.0"
30
+
31
+ __all__ = [
32
+ "infer_type",
33
+ "clean_column",
34
+ "clean_numeric",
35
+ "clean_boolean",
36
+ "parse_number",
37
+ "parse_boolean",
38
+ "normalize_nulls",
39
+ "is_null",
40
+ "detect_number_locale",
41
+ "looks_like_preserve_string",
42
+ "ColumnProfile",
43
+ "CleanResult",
44
+ "TypeName",
45
+ "DEFAULT_NULLS",
46
+ "TRUE_VALUES",
47
+ "FALSE_VALUES",
48
+ "__version__",
49
+ ]
typemonkey/booleans.py ADDED
@@ -0,0 +1,124 @@
1
+ """Boolean vocabulary and normalisation.
2
+
3
+ This module exists because "true" arrives as ``yes``, ``Y``, ``1``, ``on``,
4
+ ``T``, ``true``, ``TRUE`` and a dozen other spellings, and inference needs one
5
+ authoritative mapping from token to ``bool``.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from dataclasses import dataclass
11
+
12
+ from .models import CleanResult, TypeName
13
+ from .nulls import is_null
14
+
15
+ # Tokens that mean True / False. Matching is case-insensitive and
16
+ # whitespace-trimmed, so only one casing of each spelling is listed.
17
+ TRUE_VALUES: frozenset[str] = frozenset(
18
+ {"true", "t", "yes", "y", "1", "on", "enabled", "active"}
19
+ )
20
+ FALSE_VALUES: frozenset[str] = frozenset(
21
+ {"false", "f", "no", "n", "0", "off", "disabled", "inactive"}
22
+ )
23
+ # The numeric pair is recognised as boolean but is *also* valid integer data.
24
+ # Inference uses this set to avoid mislabelling a 0/1 integer column as boolean.
25
+ NUMERIC_BOOLEANS: frozenset[str] = frozenset({"0", "1"})
26
+
27
+
28
+ @dataclass
29
+ class ParsedBoolean:
30
+ """A parsed boolean token.
31
+
32
+ Attributes:
33
+ value: The resulting ``bool``.
34
+ numeric: ``True`` when the source token was ``"0"`` or ``"1"`` — the
35
+ ambiguous case that is equally valid integer data.
36
+ """
37
+
38
+ value: bool
39
+ numeric: bool
40
+
41
+
42
+ def parse_boolean(
43
+ token: object,
44
+ *,
45
+ true_values=None,
46
+ false_values=None,
47
+ ) -> ParsedBoolean:
48
+ """Parse one boolean token, raising :class:`ValueError` if unrecognised.
49
+
50
+ Args:
51
+ token: The value to parse. Non-strings are stringified; the result is
52
+ trimmed and lower-cased before lookup.
53
+ true_values: Override truthy vocabulary (case-insensitive). Defaults
54
+ to :data:`TRUE_VALUES`.
55
+ false_values: Override falsy vocabulary. Defaults to
56
+ :data:`FALSE_VALUES`.
57
+
58
+ Returns:
59
+ A :class:`ParsedBoolean`.
60
+
61
+ Raises:
62
+ ValueError: If the token is in neither vocabulary.
63
+ """
64
+ trues = TRUE_VALUES if true_values is None else frozenset(v.lower() for v in true_values)
65
+ falses = FALSE_VALUES if false_values is None else frozenset(v.lower() for v in false_values)
66
+ key = str(token).strip().lower()
67
+ if key in trues:
68
+ return ParsedBoolean(value=True, numeric=key in NUMERIC_BOOLEANS)
69
+ if key in falses:
70
+ return ParsedBoolean(value=False, numeric=key in NUMERIC_BOOLEANS)
71
+ raise ValueError(f"{token!r} is not a recognised boolean")
72
+
73
+
74
+ def is_boolean(token: object, **kwargs) -> bool:
75
+ """Return ``True`` when :func:`parse_boolean` would succeed on ``token``."""
76
+ try:
77
+ parse_boolean(token, **kwargs)
78
+ return True
79
+ except ValueError:
80
+ return False
81
+
82
+
83
+ def clean_boolean(
84
+ values,
85
+ *,
86
+ true_values=None,
87
+ false_values=None,
88
+ null_values=None,
89
+ ) -> CleanResult:
90
+ """Clean a column of messy boolean strings into Python ``bool`` values.
91
+
92
+ Args:
93
+ values: The column to clean.
94
+ true_values: Override truthy vocabulary (see :func:`parse_boolean`).
95
+ false_values: Override falsy vocabulary.
96
+ null_values: Null spellings (see :func:`typemonkey.nulls.is_null`);
97
+ recognised nulls become ``None`` and are not counted as failures.
98
+
99
+ Returns:
100
+ A :class:`CleanResult` whose ``values`` align 1:1 with ``values`` and
101
+ whose ``failures`` records ``(index, original)`` for non-null tokens
102
+ that matched neither vocabulary.
103
+ """
104
+ out: list[object] = []
105
+ failures: list[tuple[int, str]] = []
106
+ null_count = 0
107
+ for i, raw in enumerate(values):
108
+ if is_null(raw, null_values):
109
+ out.append(None)
110
+ null_count += 1
111
+ continue
112
+ try:
113
+ parsed = parse_boolean(raw, true_values=true_values, false_values=false_values)
114
+ except ValueError:
115
+ out.append(None)
116
+ failures.append((i, str(raw)))
117
+ continue
118
+ out.append(parsed.value)
119
+ return CleanResult(
120
+ values=out,
121
+ target_type=TypeName.BOOLEAN,
122
+ null_count=null_count,
123
+ failures=failures,
124
+ )
typemonkey/clean.py ADDED
@@ -0,0 +1,162 @@
1
+ """``clean_column`` — infer (or accept) a target type, then clean to it.
2
+
3
+ This module exists as the one-call convenience entry point: hand it a column
4
+ and it figures out the type and returns cleaned values, or pass ``target_type``
5
+ to force the conversion. It dispatches to the type-specific cleaners in
6
+ :mod:`typemonkey.numbers` and :mod:`typemonkey.booleans`, and to datemonkey for
7
+ dates.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import cleanmonkey
13
+ from datemonkey import parse_dates
14
+
15
+ from .booleans import clean_boolean
16
+ from .infer import infer_type
17
+ from .locale import detect_number_locale
18
+ from .models import CleanResult, TypeName
19
+ from .nulls import is_null
20
+ from .numbers import clean_numeric
21
+
22
+ _NUMERIC = {TypeName.INTEGER, TypeName.FLOAT, TypeName.CURRENCY, TypeName.PERCENTAGE}
23
+
24
+
25
+ def clean_column(
26
+ values,
27
+ *,
28
+ target_type: TypeName | str | None = None,
29
+ null_values=None,
30
+ locale: str | None = None,
31
+ locale_preference: str | None = None,
32
+ true_values=None,
33
+ false_values=None,
34
+ percent_as_fraction: bool = True,
35
+ integers: bool = True,
36
+ ) -> CleanResult:
37
+ """Clean a column to ``target_type``, inferring the type when not given.
38
+
39
+ ``values`` is materialised once up front, so a one-shot iterable
40
+ (generator) is safe: inference and cleaning see the same data, and counts
41
+ stay consistent.
42
+
43
+ Args:
44
+ values: The column to clean. Consumed exactly once.
45
+ target_type: A :class:`TypeName` (or its string value) to force. When
46
+ ``None`` the type is inferred via :func:`typemonkey.infer.infer_type`
47
+ and that result — including its detected ``locale`` — drives the
48
+ cleaning.
49
+ null_values: Null spellings (see :func:`typemonkey.nulls.is_null`).
50
+ locale: Number locale for numeric targets. When ``None`` it is taken
51
+ from inference (auto-detected from the data), or detected directly
52
+ for a forced numeric ``target_type``.
53
+ locale_preference: ``"us"``/``"eu"`` hint for date parsing.
54
+ true_values: Override truthy boolean vocabulary.
55
+ false_values: Override falsy boolean vocabulary.
56
+ percent_as_fraction: Scale percents by 1/100 (see
57
+ :func:`typemonkey.numbers.clean_numeric`).
58
+ integers: Return whole numbers as ``int`` (see
59
+ :func:`typemonkey.numbers.clean_numeric`).
60
+
61
+ Returns:
62
+ A :class:`CleanResult` whose ``values`` align 1:1 with ``values`` and
63
+ whose ``target_type`` echoes the inferred or forced type. For ``STRING``
64
+ targets every non-null value is cleanmonkey-normalised and never a
65
+ failure; for ``NULL`` every value is ``None`` and ``null_count`` equals
66
+ the input length.
67
+ """
68
+ vals = list(values) # materialise once — safe for generators
69
+
70
+ inferred_locale: str | None = None
71
+ if target_type is None:
72
+ profile = infer_type(
73
+ vals,
74
+ null_values=null_values,
75
+ locale=locale,
76
+ locale_preference=locale_preference,
77
+ true_values=true_values,
78
+ false_values=false_values,
79
+ )
80
+ target = profile.type
81
+ inferred_locale = profile.locale
82
+ else:
83
+ target = TypeName(target_type)
84
+
85
+ if target in _NUMERIC:
86
+ # Honour an explicit locale, else the one inference detected, else
87
+ # detect it directly (forced numeric target with no inference step).
88
+ effective_locale = locale or inferred_locale or detect_number_locale(vals)
89
+ return clean_numeric(
90
+ vals,
91
+ locale=effective_locale,
92
+ null_values=null_values,
93
+ percent_as_fraction=percent_as_fraction,
94
+ integers=integers,
95
+ target_type=target,
96
+ )
97
+ if target is TypeName.BOOLEAN:
98
+ return clean_boolean(
99
+ vals,
100
+ true_values=true_values,
101
+ false_values=false_values,
102
+ null_values=null_values,
103
+ )
104
+ if target is TypeName.DATE:
105
+ return _clean_dates(vals, null_values, locale_preference)
106
+ if target is TypeName.NULL:
107
+ return CleanResult(
108
+ values=[None] * len(vals),
109
+ target_type=TypeName.NULL,
110
+ null_count=len(vals),
111
+ )
112
+ return _clean_strings(vals, null_values)
113
+
114
+
115
+ def _clean_dates(values, null_values, locale_preference) -> CleanResult:
116
+ """Parse a date column via datemonkey, preserving null/failure distinction."""
117
+ vals = list(values)
118
+ null_mask = [is_null(v, null_values) for v in vals]
119
+ # datemonkey treats None as a null/failure; feed it None for our nulls so
120
+ # the row indices line up.
121
+ feed = [None if null_mask[i] else vals[i] for i in range(len(vals))]
122
+ batch = parse_dates(feed, locale_preference=locale_preference)
123
+ # datemonkey returns an *empty* ``results`` list when no value in the batch
124
+ # parses, and otherwise one result per row tagged with ``row_index``. Map by
125
+ # ``row_index`` and walk our own indices so output stays 1:1 with the input
126
+ # even when zero values parsed (every non-null then becomes a failure).
127
+ by_index = {r.row_index: r for r in batch.results}
128
+ out: list[object] = []
129
+ failures: list[tuple[int, str]] = []
130
+ for i in range(len(vals)):
131
+ if null_mask[i]:
132
+ out.append(None)
133
+ continue
134
+ result = by_index.get(i)
135
+ if result is not None and result.parsed is not None:
136
+ out.append(result.parsed)
137
+ else:
138
+ out.append(None)
139
+ failures.append((i, str(vals[i])))
140
+ return CleanResult(
141
+ values=out,
142
+ target_type=TypeName.DATE,
143
+ null_count=sum(null_mask),
144
+ failures=failures,
145
+ )
146
+
147
+
148
+ def _clean_strings(values, null_values) -> CleanResult:
149
+ """Normalise a string column with cleanmonkey; nulls become ``None``."""
150
+ out: list[object] = []
151
+ null_count = 0
152
+ for v in values:
153
+ if is_null(v, null_values):
154
+ out.append(None)
155
+ null_count += 1
156
+ else:
157
+ out.append(cleanmonkey.clean(str(v)))
158
+ return CleanResult(
159
+ values=out,
160
+ target_type=TypeName.STRING,
161
+ null_count=null_count,
162
+ )
typemonkey/cli.py ADDED
@@ -0,0 +1,100 @@
1
+ """Thin CLI wrapper around the typemonkey library.
2
+
3
+ This module exists only to parse arguments, read input columns, and format
4
+ output; all logic lives in the library. Commands:
5
+
6
+ * ``typemonkey profile`` — infer a column's type and print a JSON report.
7
+ * ``typemonkey clean`` — clean a column to an inferred or given type.
8
+
9
+ Input is read one value per line from a file argument or stdin.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import argparse
15
+ import json
16
+ import sys
17
+ from typing import Sequence
18
+
19
+ from .clean import clean_column
20
+ from .infer import infer_type
21
+ from .models import TypeName, _jsonable
22
+
23
+
24
+ def _read_values(path: str | None) -> list[str]:
25
+ """Read one value per line from ``path`` (or stdin when ``None``/``"-"``)."""
26
+ if path in (None, "-"):
27
+ text = sys.stdin.read()
28
+ else:
29
+ with open(path, "r", encoding="utf-8") as fh:
30
+ text = fh.read()
31
+ # Strip only the trailing newline of the file, then split on line breaks so
32
+ # genuinely empty lines survive as empty-string values.
33
+ text = text.rstrip("\n")
34
+ return text.split("\n") if text != "" else []
35
+
36
+
37
+ def _build_parser() -> argparse.ArgumentParser:
38
+ parser = argparse.ArgumentParser(
39
+ prog="typemonkey",
40
+ description="Infer column types and clean values to those types.",
41
+ )
42
+ sub = parser.add_subparsers(dest="command", required=True)
43
+
44
+ p_profile = sub.add_parser("profile", help="Infer a column's type (JSON report).")
45
+ p_profile.add_argument("file", nargs="?", default="-", help="Input file, or - for stdin.")
46
+ p_profile.add_argument("--threshold", type=float, default=None, help="Conformance threshold (0-1).")
47
+ p_profile.add_argument("--locale", choices=["us", "eu"], default=None, help="Force number locale.")
48
+
49
+ p_clean = sub.add_parser("clean", help="Clean a column to a type (one value per line).")
50
+ p_clean.add_argument("file", nargs="?", default="-", help="Input file, or - for stdin.")
51
+ p_clean.add_argument(
52
+ "--type",
53
+ choices=[t.value for t in TypeName],
54
+ default=None,
55
+ help="Force target type instead of inferring.",
56
+ )
57
+ p_clean.add_argument("--locale", choices=["us", "eu"], default=None, help="Force number locale.")
58
+ p_clean.add_argument(
59
+ "--keep-percent",
60
+ action="store_true",
61
+ help="Keep percents as whole numbers (12%% -> 12) instead of fractions.",
62
+ )
63
+ return parser
64
+
65
+
66
+ def main(argv: Sequence[str] | None = None) -> int:
67
+ """CLI entry point. Returns a process exit code."""
68
+ parser = _build_parser()
69
+ args = parser.parse_args(argv)
70
+ values = _read_values(args.file)
71
+
72
+ if args.command == "profile":
73
+ kwargs = {}
74
+ if args.threshold is not None:
75
+ kwargs["threshold"] = args.threshold
76
+ if args.locale is not None:
77
+ kwargs["locale"] = args.locale
78
+ profile = infer_type(values, **kwargs)
79
+ json.dump(profile.to_dict(), sys.stdout, indent=2, sort_keys=True)
80
+ sys.stdout.write("\n")
81
+ return 0
82
+
83
+ if args.command == "clean":
84
+ result = clean_column(
85
+ values,
86
+ target_type=args.type,
87
+ locale=args.locale,
88
+ percent_as_fraction=not args.keep_percent,
89
+ )
90
+ for value in result.values:
91
+ jv = _jsonable(value)
92
+ sys.stdout.write("" if jv is None else str(jv))
93
+ sys.stdout.write("\n")
94
+ return 1 if result.failures else 0
95
+
96
+ return 2 # pragma: no cover - argparse enforces a valid command
97
+
98
+
99
+ if __name__ == "__main__": # pragma: no cover
100
+ raise SystemExit(main())
@@ -0,0 +1,16 @@
1
+ """Pluggable per-column type detectors.
2
+
3
+ Each detector is a single exported function taking a list of non-null values
4
+ and returning a :class:`Detection`. They are intentionally independent so a
5
+ caller (or :mod:`typemonkey.infer`) can run a subset, and so new strategies
6
+ can be added without touching the orchestrator.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from .base import Detection
12
+ from .boolean import detect_boolean
13
+ from .date import detect_date
14
+ from .numeric import detect_numeric
15
+
16
+ __all__ = ["Detection", "detect_boolean", "detect_date", "detect_numeric"]
@@ -0,0 +1,28 @@
1
+ """Shared result type for detectors."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+
7
+
8
+ @dataclass
9
+ class Detection:
10
+ """The outcome of running one detector over a column's non-null values.
11
+
12
+ Attributes:
13
+ match_count: Number of non-null values the detector recognised.
14
+ sample_size: Number of non-null values examined.
15
+ confidence: ``match_count / sample_size`` in ``[0.0, 1.0]``; ``0.0``
16
+ for an empty sample.
17
+ detail: Detector-specific extras (e.g. fine kind counts for numeric,
18
+ detected date format for date).
19
+ """
20
+
21
+ match_count: int
22
+ sample_size: int
23
+ detail: dict = field(default_factory=dict)
24
+
25
+ @property
26
+ def confidence(self) -> float:
27
+ """Fraction of the sample the detector matched (0.0 if empty)."""
28
+ return self.match_count / self.sample_size if self.sample_size else 0.0
@@ -0,0 +1,39 @@
1
+ """Boolean detector."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from ..booleans import NUMERIC_BOOLEANS, is_boolean
6
+ from .base import Detection
7
+
8
+
9
+ def detect_boolean(values, *, true_values=None, false_values=None) -> Detection:
10
+ """Detect whether a column of non-null values is boolean.
11
+
12
+ Args:
13
+ values: Non-null values to test.
14
+ true_values: Override truthy vocabulary (see
15
+ :func:`typemonkey.booleans.parse_boolean`).
16
+ false_values: Override falsy vocabulary.
17
+
18
+ Returns:
19
+ A :class:`Detection`. ``detail["numeric_only"]`` is ``True`` when every
20
+ matched value was ``"0"``/``"1"`` — the ambiguous case that is equally
21
+ valid integer data, which the orchestrator uses to avoid labelling a
22
+ 0/1 integer column as boolean. ``detail["distinct"]`` holds the count
23
+ of distinct normalised tokens matched.
24
+ """
25
+ matched = 0
26
+ distinct: set[str] = set()
27
+ numeric_only = True
28
+ for v in values:
29
+ if is_boolean(v, true_values=true_values, false_values=false_values):
30
+ matched += 1
31
+ key = str(v).strip().lower()
32
+ distinct.add(key)
33
+ if key not in NUMERIC_BOOLEANS:
34
+ numeric_only = False
35
+ return Detection(
36
+ match_count=matched,
37
+ sample_size=len(values),
38
+ detail={"numeric_only": numeric_only, "distinct": len(distinct)},
39
+ )
@@ -0,0 +1,53 @@
1
+ """Date detector — delegates to datemonkey.
2
+
3
+ typemonkey does not reimplement date parsing. This detector is a thin adapter
4
+ around :func:`datemonkey.detect_format` that reports its batch confidence in
5
+ typemonkey's :class:`Detection` shape.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from datemonkey import Confidence, detect_format
11
+
12
+ from .base import Detection
13
+
14
+ # Map datemonkey's categorical confidence onto a fraction of the sample we are
15
+ # willing to treat as matched. FAILED means no date format fit at all.
16
+ _CONFIDENCE_WEIGHT = {
17
+ Confidence.HIGH: 1.0,
18
+ Confidence.MEDIUM: 0.75,
19
+ Confidence.LOW: 0.4,
20
+ Confidence.FAILED: 0.0,
21
+ }
22
+
23
+
24
+ def detect_date(values, *, locale_preference: str | None = None) -> Detection:
25
+ """Detect whether a column of non-null values is a date column.
26
+
27
+ Args:
28
+ values: Non-null values to test.
29
+ locale_preference: ``"us"`` or ``"eu"`` hint forwarded to datemonkey to
30
+ resolve DD/MM vs MM/DD ambiguity.
31
+
32
+ Returns:
33
+ A :class:`Detection` whose ``match_count`` is datemonkey's
34
+ ``match_count`` and whose ``detail`` carries the detected ``format``
35
+ (strftime pattern or ``None``), the categorical ``confidence`` value,
36
+ and any ``ambiguities`` datemonkey reported.
37
+ """
38
+ if not values:
39
+ return Detection(
40
+ 0, 0, {"format": None, "confidence": "failed", "weight": 0.0, "ambiguities": []}
41
+ )
42
+ result = detect_format(values, locale_preference=locale_preference)
43
+ fmt = result.format.pattern if result.format is not None else None
44
+ return Detection(
45
+ match_count=result.match_count,
46
+ sample_size=result.sample_size,
47
+ detail={
48
+ "format": fmt,
49
+ "confidence": result.confidence.value,
50
+ "weight": _CONFIDENCE_WEIGHT.get(result.confidence, 0.0),
51
+ "ambiguities": [a.value for a in result.ambiguities],
52
+ },
53
+ )
@@ -0,0 +1,46 @@
1
+ """Numeric detector — classifies each value into a fine numeric kind."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from collections import Counter
6
+
7
+ from ..numbers import parse_number
8
+ from .base import Detection
9
+
10
+
11
+ def detect_numeric(values, *, locale: str = "us") -> Detection:
12
+ """Detect numeric values and bucket each into a fine kind.
13
+
14
+ Every value is run through :func:`typemonkey.numbers.parse_number`. Each
15
+ success is tallied into exactly one of ``currency``, ``percentage``,
16
+ ``integer``, or ``float`` (currency and percentage take precedence over
17
+ the plain int/float distinction when their marker is present).
18
+
19
+ Args:
20
+ values: Non-null values to test.
21
+ locale: Number locale for parsing (``"us"`` or ``"eu"``).
22
+
23
+ Returns:
24
+ A :class:`Detection` whose ``match_count`` is the count of values that
25
+ parsed as numbers and whose ``detail["kinds"]`` is a
26
+ :class:`collections.Counter` over the fine kinds.
27
+ """
28
+ kinds: Counter[str] = Counter()
29
+ for v in values:
30
+ try:
31
+ p = parse_number(v, locale)
32
+ except ValueError:
33
+ continue
34
+ if p.had_currency:
35
+ kinds["currency"] += 1
36
+ elif p.had_percent:
37
+ kinds["percentage"] += 1
38
+ elif p.is_integer:
39
+ kinds["integer"] += 1
40
+ else:
41
+ kinds["float"] += 1
42
+ return Detection(
43
+ match_count=sum(kinds.values()),
44
+ sample_size=len(values),
45
+ detail={"kinds": kinds, "locale": locale},
46
+ )