toonify 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
toon/encoder.py ADDED
@@ -0,0 +1,317 @@
1
+ """TOON encoder - convert Python objects to TOON format."""
2
+ from typing import Any, Dict, List, Optional
3
+ from .constants import (
4
+ COMMA, TAB, PIPE, COLON, NEWLINE,
5
+ DEFAULT_DELIMITER, DEFAULT_INDENT,
6
+ KEY_FOLDING_OFF, KEY_FOLDING_SAFE,
7
+ DELIMITER_TAB, DELIMITER_PIPE, DELIMITER_COMMA,
8
+ LEFT_BRACKET, RIGHT_BRACKET, LEFT_BRACE, RIGHT_BRACE
9
+ )
10
+ from .utils import (
11
+ needs_quoting, quote_string, is_primitive,
12
+ is_uniform_array_of_objects, get_indent
13
+ )
14
+
15
+
16
+ class EncoderOptions:
17
+ """Options for TOON encoding."""
18
+
19
+ def __init__(
20
+ self,
21
+ delimiter: str = DEFAULT_DELIMITER,
22
+ indent: int = DEFAULT_INDENT,
23
+ key_folding: str = KEY_FOLDING_OFF,
24
+ flatten_depth: Optional[int] = None
25
+ ):
26
+ """
27
+ Initialize encoder options.
28
+
29
+ Args:
30
+ delimiter: Array value delimiter (',' | '\t' | '|')
31
+ indent: Number of spaces per indentation level
32
+ key_folding: Key folding mode ('off' | 'safe')
33
+ flatten_depth: Maximum depth for key folding (None = unlimited)
34
+ """
35
+ # Normalize delimiter names
36
+ if delimiter == DELIMITER_TAB:
37
+ delimiter = TAB
38
+ elif delimiter == DELIMITER_PIPE:
39
+ delimiter = PIPE
40
+ elif delimiter == DELIMITER_COMMA:
41
+ delimiter = COMMA
42
+
43
+ self.delimiter = delimiter
44
+ self.indent = indent
45
+ self.key_folding = key_folding
46
+ self.flatten_depth = flatten_depth
47
+
48
+
49
+ def encode(data: Any, options: Optional[Dict[str, Any]] = None) -> str:
50
+ """
51
+ Encode Python data structure to TOON format.
52
+
53
+ Args:
54
+ data: Python object to encode (dict or list)
55
+ options: Encoding options
56
+ - delimiter: ',' (default), '\t', or '|'
57
+ - indent: int (default 2)
58
+ - key_folding: 'off' (default) or 'safe'
59
+ - flatten_depth: int or None
60
+
61
+ Returns:
62
+ TOON formatted string
63
+
64
+ Example:
65
+ >>> data = {'users': [{'id': 1, 'name': 'Alice'}]}
66
+ >>> print(encode(data))
67
+ users[1]{id,name}:
68
+ 1,Alice
69
+ """
70
+ if options is None:
71
+ options = {}
72
+
73
+ opts = EncoderOptions(
74
+ delimiter=options.get('delimiter', DEFAULT_DELIMITER),
75
+ indent=options.get('indent', DEFAULT_INDENT),
76
+ key_folding=options.get('key_folding', KEY_FOLDING_OFF),
77
+ flatten_depth=options.get('flatten_depth')
78
+ )
79
+
80
+ return _encode_value(data, 0, opts)
81
+
82
+
83
+ def _encode_value(value: Any, level: int, opts: EncoderOptions) -> str:
84
+ """Encode a value at a given indentation level."""
85
+ if value is None:
86
+ return 'null'
87
+ elif isinstance(value, bool):
88
+ return 'true' if value else 'false'
89
+ elif isinstance(value, (int, float)):
90
+ # Handle special float values
91
+ if isinstance(value, float):
92
+ if value != value: # NaN
93
+ return 'null'
94
+ elif value == float('inf') or value == float('-inf'):
95
+ return 'null'
96
+ return str(value)
97
+ elif isinstance(value, str):
98
+ if needs_quoting(value):
99
+ return quote_string(value)
100
+ return value
101
+ elif isinstance(value, list):
102
+ return _encode_array(value, level, opts)
103
+ elif isinstance(value, dict):
104
+ return _encode_object(value, level, opts)
105
+ else:
106
+ # Handle other types (dates, etc.) as null
107
+ return 'null'
108
+
109
+
110
+ def _encode_object(obj: dict, level: int, opts: EncoderOptions) -> str:
111
+ """Encode a dictionary object."""
112
+ if not obj:
113
+ return '{}'
114
+
115
+ # Apply key folding if enabled
116
+ if opts.key_folding == KEY_FOLDING_SAFE:
117
+ obj = _apply_key_folding(obj, opts.flatten_depth)
118
+
119
+ lines = []
120
+ indent = get_indent(level, opts.indent)
121
+
122
+ for key, value in obj.items():
123
+ # Special handling for arrays to include key in header
124
+ if isinstance(value, list):
125
+ encoded_value = _encode_array_with_key(key, value, level, opts)
126
+ if NEWLINE in encoded_value:
127
+ lines.append(encoded_value)
128
+ else:
129
+ lines.append(f'{indent}{key}{COLON} {encoded_value}')
130
+ elif isinstance(value, dict):
131
+ # Nested object handling
132
+ if not value:
133
+ # Empty object - inline
134
+ lines.append(f'{indent}{key}{COLON} {{}}')
135
+ else:
136
+ # Non-empty object - multiline
137
+ encoded_value = _encode_value(value, level + 1, opts)
138
+ lines.append(f'{indent}{key}{COLON}')
139
+ lines.append(encoded_value)
140
+ else:
141
+ # Primitive value
142
+ encoded_value = _encode_value(value, level + 1, opts)
143
+ lines.append(f'{indent}{key}{COLON} {encoded_value}')
144
+
145
+ return NEWLINE.join(lines)
146
+
147
+
148
+ def _encode_array(arr: list, level: int, opts: EncoderOptions) -> str:
149
+ """Encode an array."""
150
+ if not arr:
151
+ return '[]'
152
+
153
+ # Check if it's a uniform array of objects (tabular format)
154
+ fields = is_uniform_array_of_objects(arr)
155
+ if fields:
156
+ return _encode_tabular_array(arr, fields, level, opts, key=None)
157
+
158
+ # Check if all elements are primitives (inline format)
159
+ if all(is_primitive(item) for item in arr):
160
+ return _encode_primitive_array(arr, opts)
161
+
162
+ # Mixed array (list format)
163
+ return _encode_list_array(arr, level, opts, key=None)
164
+
165
+
166
+ def _encode_array_with_key(key: str, arr: list, level: int, opts: EncoderOptions) -> str:
167
+ """Encode an array with its key prefix for object context."""
168
+ if not arr:
169
+ return '[]'
170
+
171
+ indent = get_indent(level, opts.indent)
172
+
173
+ # Check if it's a uniform array of objects (tabular format)
174
+ fields = is_uniform_array_of_objects(arr)
175
+ if fields:
176
+ return _encode_tabular_array(arr, fields, level, opts, key=key)
177
+
178
+ # Check if all elements are primitives (inline format)
179
+ if all(is_primitive(item) for item in arr):
180
+ return _encode_primitive_array(arr, opts)
181
+
182
+ # Mixed array (list format)
183
+ return _encode_list_array(arr, level, opts, key=key)
184
+
185
+
186
+
187
+ def _encode_primitive_array(arr: list, opts: EncoderOptions) -> str:
188
+ """Encode an array of primitives as inline values."""
189
+ encoded_values = []
190
+ for item in arr:
191
+ if item is None:
192
+ encoded_values.append('null')
193
+ elif isinstance(item, bool):
194
+ encoded_values.append('true' if item else 'false')
195
+ elif isinstance(item, (int, float)):
196
+ if isinstance(item, float) and (item != item or item == float('inf') or item == float('-inf')):
197
+ encoded_values.append('null')
198
+ else:
199
+ encoded_values.append(str(item))
200
+ elif isinstance(item, str):
201
+ if needs_quoting(item):
202
+ encoded_values.append(quote_string(item))
203
+ else:
204
+ encoded_values.append(item)
205
+
206
+ return f'[{opts.delimiter.join(encoded_values)}]'
207
+
208
+
209
+ def _encode_tabular_array(arr: list, fields: list, level: int, opts: EncoderOptions, key: Optional[str] = None) -> str:
210
+ """Encode a uniform array of objects in tabular format."""
211
+ indent = get_indent(level, opts.indent)
212
+
213
+ # Header: [N]{field1,field2,...}: or key[N]{field1,field2,...}:
214
+ if key:
215
+ header = f'{indent}{key}[{len(arr)}]{LEFT_BRACE}{COMMA.join(fields)}{RIGHT_BRACE}{COLON}'
216
+ else:
217
+ header = f'[{len(arr)}]{LEFT_BRACE}{COMMA.join(fields)}{RIGHT_BRACE}{COLON}'
218
+
219
+ lines = [header]
220
+
221
+ # Rows: indented values separated by delimiter
222
+ for obj in arr:
223
+ row_values = []
224
+ for field in fields:
225
+ value = obj.get(field)
226
+ encoded = _encode_primitive_value(value)
227
+ row_values.append(encoded)
228
+
229
+ row = opts.delimiter.join(row_values)
230
+ lines.append(f'{indent} {row}')
231
+
232
+ return NEWLINE.join(lines)
233
+
234
+
235
+ def _encode_primitive_value(value: Any) -> str:
236
+ """Encode a primitive value for use in arrays."""
237
+ if value is None:
238
+ return 'null'
239
+ elif isinstance(value, bool):
240
+ return 'true' if value else 'false'
241
+ elif isinstance(value, (int, float)):
242
+ if isinstance(value, float) and (value != value or value == float('inf') or value == float('-inf')):
243
+ return 'null'
244
+ return str(value)
245
+ elif isinstance(value, str):
246
+ if needs_quoting(value):
247
+ return quote_string(value)
248
+ return value
249
+ else:
250
+ return 'null'
251
+
252
+
253
+ def _encode_list_array(arr: list, level: int, opts: EncoderOptions, key: Optional[str] = None) -> str:
254
+ """Encode a non-uniform array in list format."""
255
+ indent = get_indent(level, opts.indent)
256
+
257
+ # Header: [N]: or key[N]:
258
+ if key:
259
+ header = f'{indent}{key}[{len(arr)}]{COLON}'
260
+ else:
261
+ header = f'[{len(arr)}]{COLON}'
262
+
263
+ lines = [header]
264
+
265
+ # Items: indented encoded values
266
+ for item in arr:
267
+ encoded = _encode_value(item, level + 1, opts)
268
+ if NEWLINE in encoded:
269
+ lines.append(encoded)
270
+ else:
271
+ lines.append(f'{indent} {encoded}')
272
+
273
+ return NEWLINE.join(lines)
274
+
275
+
276
+ def _apply_key_folding(obj: dict, max_depth: Optional[int] = None) -> dict:
277
+ """
278
+ Apply key folding to collapse single-key chains into dotted paths.
279
+
280
+ Args:
281
+ obj: Object to fold
282
+ max_depth: Maximum depth for folding (None = unlimited)
283
+
284
+ Returns:
285
+ Folded object
286
+ """
287
+ result = {}
288
+
289
+ for key, value in obj.items():
290
+ if isinstance(value, dict) and len(value) == 1:
291
+ # Single-key object - check if we can fold
292
+ nested_key = list(value.keys())[0]
293
+ nested_value = value[nested_key]
294
+
295
+ # Calculate current depth
296
+ depth = 1
297
+ current = nested_value
298
+ while isinstance(current, dict) and len(current) == 1 and (max_depth is None or depth < max_depth):
299
+ depth += 1
300
+ current = list(current.values())[0]
301
+
302
+ # Fold if within depth limit
303
+ if max_depth is None or depth <= max_depth:
304
+ folded_key = f'{key}.{nested_key}'
305
+ # Recursively fold
306
+ if isinstance(nested_value, dict) and len(nested_value) == 1:
307
+ folded = _apply_key_folding({nested_key: nested_value}, max_depth)
308
+ for fk, fv in folded.items():
309
+ result[f'{key}.{fk}'] = fv
310
+ else:
311
+ result[folded_key] = nested_value
312
+ else:
313
+ result[key] = value
314
+ else:
315
+ result[key] = value
316
+
317
+ return result
toon/utils.py ADDED
@@ -0,0 +1,243 @@
1
+ """Utility functions for the TOON library."""
2
+ from typing import Any, Optional
3
+ from .constants import (
4
+ QUOTE, BACKSLASH, NEWLINE, COMMA, TAB, PIPE,
5
+ TRUE_LITERAL, FALSE_LITERAL, NULL_LITERAL,
6
+ SPACE, COLON
7
+ )
8
+
9
+
10
+ def needs_quoting(value: str) -> bool:
11
+ """
12
+ Check if a string value needs to be quoted.
13
+
14
+ Quoting is needed when:
15
+ - Value contains special characters (comma, colon, newline, quotes)
16
+ - Value has leading or trailing whitespace
17
+ - Value looks like a boolean or null literal
18
+ - Value is empty
19
+
20
+ Args:
21
+ value: String to check
22
+
23
+ Returns:
24
+ True if quoting is needed, False otherwise
25
+ """
26
+ if not value:
27
+ return True
28
+
29
+ # Check for leading/trailing whitespace
30
+ if value != value.strip():
31
+ return True
32
+
33
+ # Check if it looks like a literal
34
+ lower_value = value.lower()
35
+ if lower_value in (TRUE_LITERAL, FALSE_LITERAL, NULL_LITERAL):
36
+ return True
37
+
38
+ # Check for special characters
39
+ special_chars = {COMMA, COLON, NEWLINE, QUOTE, TAB, PIPE, BACKSLASH, '[', ']', '{', '}'}
40
+ if any(char in value for char in special_chars):
41
+ return True
42
+
43
+ # Check if it looks like a number but has trailing content
44
+ # This handles cases like "123abc" which should be quoted
45
+ try:
46
+ float(value)
47
+ return False
48
+ except ValueError:
49
+ pass
50
+
51
+ return False
52
+
53
+
54
+ def escape_string(value: str) -> str:
55
+ """
56
+ Escape special characters in a string for TOON encoding.
57
+
58
+ Args:
59
+ value: String to escape
60
+
61
+ Returns:
62
+ Escaped string
63
+ """
64
+ # Escape backslashes first
65
+ value = value.replace(BACKSLASH, BACKSLASH + BACKSLASH)
66
+ # Escape quotes
67
+ value = value.replace(QUOTE, BACKSLASH + QUOTE)
68
+ # Escape newlines
69
+ value = value.replace(NEWLINE, BACKSLASH + 'n')
70
+ # Escape tabs
71
+ value = value.replace('\t', BACKSLASH + 't')
72
+ # Escape carriage returns
73
+ value = value.replace('\r', BACKSLASH + 'r')
74
+ return value
75
+
76
+
77
+ def unescape_string(value: str) -> str:
78
+ """
79
+ Unescape special characters in a TOON string.
80
+
81
+ Args:
82
+ value: Escaped string
83
+
84
+ Returns:
85
+ Unescaped string
86
+ """
87
+ result = []
88
+ i = 0
89
+ while i < len(value):
90
+ if value[i] == BACKSLASH and i + 1 < len(value):
91
+ next_char = value[i + 1]
92
+ if next_char == 'n':
93
+ result.append(NEWLINE)
94
+ i += 2
95
+ elif next_char == 't':
96
+ result.append('\t')
97
+ i += 2
98
+ elif next_char == 'r':
99
+ result.append('\r')
100
+ i += 2
101
+ elif next_char == QUOTE:
102
+ result.append(QUOTE)
103
+ i += 2
104
+ elif next_char == BACKSLASH:
105
+ result.append(BACKSLASH)
106
+ i += 2
107
+ else:
108
+ result.append(value[i])
109
+ i += 1
110
+ else:
111
+ result.append(value[i])
112
+ i += 1
113
+ return ''.join(result)
114
+
115
+
116
+ def quote_string(value: str) -> str:
117
+ """
118
+ Quote and escape a string for TOON encoding.
119
+
120
+ Args:
121
+ value: String to quote
122
+
123
+ Returns:
124
+ Quoted and escaped string
125
+ """
126
+ escaped = escape_string(value)
127
+ return f'{QUOTE}{escaped}{QUOTE}'
128
+
129
+
130
+ def is_primitive(value: Any) -> bool:
131
+ """
132
+ Check if a value is a primitive type (str, int, float, bool, None).
133
+
134
+ Args:
135
+ value: Value to check
136
+
137
+ Returns:
138
+ True if primitive, False otherwise
139
+ """
140
+ return isinstance(value, (str, int, float, bool, type(None)))
141
+
142
+
143
+ def is_array_of_objects(value: Any) -> bool:
144
+ """
145
+ Check if a value is an array of objects (list of dicts).
146
+
147
+ Args:
148
+ value: Value to check
149
+
150
+ Returns:
151
+ True if array of objects, False otherwise
152
+ """
153
+ if not isinstance(value, list) or not value:
154
+ return False
155
+ return all(isinstance(item, dict) for item in value)
156
+
157
+
158
+ def is_uniform_array_of_objects(value: list) -> Optional[list]:
159
+ """
160
+ Check if an array contains objects with identical primitive fields.
161
+
162
+ Args:
163
+ value: Array to check
164
+
165
+ Returns:
166
+ List of field names if uniform, None otherwise
167
+ """
168
+ if not value or not all(isinstance(item, dict) for item in value):
169
+ return None
170
+
171
+ # Get fields from first object
172
+ first_obj = value[0]
173
+ fields = []
174
+
175
+ for key, val in first_obj.items():
176
+ if is_primitive(val):
177
+ fields.append(key)
178
+
179
+ if not fields:
180
+ return None
181
+
182
+ # Check all objects have the same primitive fields
183
+ for obj in value[1:]:
184
+ obj_fields = [k for k, v in obj.items() if is_primitive(v)]
185
+ if set(obj_fields) != set(fields):
186
+ return None
187
+
188
+ return fields
189
+
190
+
191
+ def get_indent(level: int, indent_size: int = 2) -> str:
192
+ """
193
+ Get indentation string for a given level.
194
+
195
+ Args:
196
+ level: Indentation level
197
+ indent_size: Number of spaces per level
198
+
199
+ Returns:
200
+ Indentation string
201
+ """
202
+ return SPACE * (level * indent_size)
203
+
204
+
205
+ def parse_number(value: str) -> Any:
206
+ """
207
+ Parse a string as a number (int or float).
208
+
209
+ Args:
210
+ value: String to parse
211
+
212
+ Returns:
213
+ Parsed number or original string if not a number
214
+ """
215
+ try:
216
+ # Try integer first
217
+ if '.' not in value and 'e' not in value.lower():
218
+ return int(value)
219
+ # Try float
220
+ return float(value)
221
+ except ValueError:
222
+ return value
223
+
224
+
225
+ def parse_literal(value: str) -> Any:
226
+ """
227
+ Parse a string as a boolean, null, or number literal.
228
+
229
+ Args:
230
+ value: String to parse
231
+
232
+ Returns:
233
+ Parsed value or original string if not a literal
234
+ """
235
+ lower_value = value.lower()
236
+ if lower_value == TRUE_LITERAL:
237
+ return True
238
+ elif lower_value == FALSE_LITERAL:
239
+ return False
240
+ elif lower_value == NULL_LITERAL:
241
+ return None
242
+ else:
243
+ return parse_number(value)