symbolicai 0.21.0__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. symai/__init__.py +269 -173
  2. symai/backend/base.py +123 -110
  3. symai/backend/engines/drawing/engine_bfl.py +45 -44
  4. symai/backend/engines/drawing/engine_gpt_image.py +112 -97
  5. symai/backend/engines/embedding/engine_llama_cpp.py +63 -52
  6. symai/backend/engines/embedding/engine_openai.py +25 -21
  7. symai/backend/engines/execute/engine_python.py +19 -18
  8. symai/backend/engines/files/engine_io.py +104 -95
  9. symai/backend/engines/imagecaptioning/engine_blip2.py +28 -24
  10. symai/backend/engines/imagecaptioning/engine_llavacpp_client.py +102 -79
  11. symai/backend/engines/index/engine_pinecone.py +124 -97
  12. symai/backend/engines/index/engine_qdrant.py +1011 -0
  13. symai/backend/engines/index/engine_vectordb.py +84 -56
  14. symai/backend/engines/lean/engine_lean4.py +96 -52
  15. symai/backend/engines/neurosymbolic/__init__.py +41 -13
  16. symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_chat.py +330 -248
  17. symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_reasoning.py +329 -264
  18. symai/backend/engines/neurosymbolic/engine_cerebras.py +328 -0
  19. symai/backend/engines/neurosymbolic/engine_deepseekX_reasoning.py +118 -88
  20. symai/backend/engines/neurosymbolic/engine_google_geminiX_reasoning.py +344 -299
  21. symai/backend/engines/neurosymbolic/engine_groq.py +173 -115
  22. symai/backend/engines/neurosymbolic/engine_huggingface.py +114 -84
  23. symai/backend/engines/neurosymbolic/engine_llama_cpp.py +144 -118
  24. symai/backend/engines/neurosymbolic/engine_openai_gptX_chat.py +415 -307
  25. symai/backend/engines/neurosymbolic/engine_openai_gptX_reasoning.py +394 -231
  26. symai/backend/engines/ocr/engine_apilayer.py +23 -27
  27. symai/backend/engines/output/engine_stdout.py +10 -13
  28. symai/backend/engines/{webscraping → scrape}/engine_requests.py +101 -54
  29. symai/backend/engines/search/engine_openai.py +100 -88
  30. symai/backend/engines/search/engine_parallel.py +665 -0
  31. symai/backend/engines/search/engine_perplexity.py +44 -45
  32. symai/backend/engines/search/engine_serpapi.py +37 -34
  33. symai/backend/engines/speech_to_text/engine_local_whisper.py +54 -51
  34. symai/backend/engines/symbolic/engine_wolframalpha.py +15 -9
  35. symai/backend/engines/text_to_speech/engine_openai.py +20 -26
  36. symai/backend/engines/text_vision/engine_clip.py +39 -37
  37. symai/backend/engines/userinput/engine_console.py +5 -6
  38. symai/backend/mixin/__init__.py +13 -0
  39. symai/backend/mixin/anthropic.py +48 -38
  40. symai/backend/mixin/deepseek.py +6 -5
  41. symai/backend/mixin/google.py +7 -4
  42. symai/backend/mixin/groq.py +2 -4
  43. symai/backend/mixin/openai.py +140 -110
  44. symai/backend/settings.py +87 -20
  45. symai/chat.py +216 -123
  46. symai/collect/__init__.py +7 -1
  47. symai/collect/dynamic.py +80 -70
  48. symai/collect/pipeline.py +67 -51
  49. symai/collect/stats.py +161 -109
  50. symai/components.py +707 -360
  51. symai/constraints.py +24 -12
  52. symai/core.py +1857 -1233
  53. symai/core_ext.py +83 -80
  54. symai/endpoints/api.py +166 -104
  55. symai/extended/.DS_Store +0 -0
  56. symai/extended/__init__.py +46 -12
  57. symai/extended/api_builder.py +29 -21
  58. symai/extended/arxiv_pdf_parser.py +23 -14
  59. symai/extended/bibtex_parser.py +9 -6
  60. symai/extended/conversation.py +156 -126
  61. symai/extended/document.py +50 -30
  62. symai/extended/file_merger.py +57 -14
  63. symai/extended/graph.py +51 -32
  64. symai/extended/html_style_template.py +18 -14
  65. symai/extended/interfaces/blip_2.py +2 -3
  66. symai/extended/interfaces/clip.py +4 -3
  67. symai/extended/interfaces/console.py +9 -1
  68. symai/extended/interfaces/dall_e.py +4 -2
  69. symai/extended/interfaces/file.py +2 -0
  70. symai/extended/interfaces/flux.py +4 -2
  71. symai/extended/interfaces/gpt_image.py +16 -7
  72. symai/extended/interfaces/input.py +2 -1
  73. symai/extended/interfaces/llava.py +1 -2
  74. symai/extended/interfaces/{naive_webscraping.py → naive_scrape.py} +4 -3
  75. symai/extended/interfaces/naive_vectordb.py +9 -10
  76. symai/extended/interfaces/ocr.py +5 -3
  77. symai/extended/interfaces/openai_search.py +2 -0
  78. symai/extended/interfaces/parallel.py +30 -0
  79. symai/extended/interfaces/perplexity.py +2 -0
  80. symai/extended/interfaces/pinecone.py +12 -9
  81. symai/extended/interfaces/python.py +2 -0
  82. symai/extended/interfaces/serpapi.py +3 -1
  83. symai/extended/interfaces/terminal.py +2 -4
  84. symai/extended/interfaces/tts.py +3 -2
  85. symai/extended/interfaces/whisper.py +3 -2
  86. symai/extended/interfaces/wolframalpha.py +2 -1
  87. symai/extended/metrics/__init__.py +11 -1
  88. symai/extended/metrics/similarity.py +14 -13
  89. symai/extended/os_command.py +39 -29
  90. symai/extended/packages/__init__.py +29 -3
  91. symai/extended/packages/symdev.py +51 -43
  92. symai/extended/packages/sympkg.py +41 -35
  93. symai/extended/packages/symrun.py +63 -50
  94. symai/extended/repo_cloner.py +14 -12
  95. symai/extended/seo_query_optimizer.py +15 -13
  96. symai/extended/solver.py +116 -91
  97. symai/extended/summarizer.py +12 -10
  98. symai/extended/taypan_interpreter.py +17 -18
  99. symai/extended/vectordb.py +122 -92
  100. symai/formatter/__init__.py +9 -1
  101. symai/formatter/formatter.py +51 -47
  102. symai/formatter/regex.py +70 -69
  103. symai/functional.py +325 -176
  104. symai/imports.py +190 -147
  105. symai/interfaces.py +57 -28
  106. symai/memory.py +45 -35
  107. symai/menu/screen.py +28 -19
  108. symai/misc/console.py +66 -56
  109. symai/misc/loader.py +8 -5
  110. symai/models/__init__.py +17 -1
  111. symai/models/base.py +395 -236
  112. symai/models/errors.py +1 -2
  113. symai/ops/__init__.py +32 -22
  114. symai/ops/measures.py +24 -25
  115. symai/ops/primitives.py +1149 -731
  116. symai/post_processors.py +58 -50
  117. symai/pre_processors.py +86 -82
  118. symai/processor.py +21 -13
  119. symai/prompts.py +764 -685
  120. symai/server/huggingface_server.py +135 -49
  121. symai/server/llama_cpp_server.py +21 -11
  122. symai/server/qdrant_server.py +206 -0
  123. symai/shell.py +100 -42
  124. symai/shellsv.py +700 -492
  125. symai/strategy.py +630 -346
  126. symai/symbol.py +368 -322
  127. symai/utils.py +100 -78
  128. {symbolicai-0.21.0.dist-info → symbolicai-1.1.0.dist-info}/METADATA +22 -10
  129. symbolicai-1.1.0.dist-info/RECORD +168 -0
  130. symbolicai-0.21.0.dist-info/RECORD +0 -162
  131. {symbolicai-0.21.0.dist-info → symbolicai-1.1.0.dist-info}/WHEEL +0 -0
  132. {symbolicai-0.21.0.dist-info → symbolicai-1.1.0.dist-info}/entry_points.txt +0 -0
  133. {symbolicai-0.21.0.dist-info → symbolicai-1.1.0.dist-info}/licenses/LICENSE +0 -0
  134. {symbolicai-0.21.0.dist-info → symbolicai-1.1.0.dist-info}/top_level.txt +0 -0
symai/formatter/regex.py CHANGED
@@ -1,6 +1,7 @@
1
1
  import re
2
- import os
2
+ from pathlib import Path
3
3
 
4
+ from ..utils import UserMessage
4
5
 
5
6
  # Define variables for magic numbers
6
7
  MAX_HEADING_LENGTH = 7
@@ -31,19 +32,20 @@ MAX_HTML_TAG_ATTRIBUTES_LENGTH = 100
31
32
  MAX_HTML_TAG_CONTENT_LENGTH = 1000
32
33
  LOOKAHEAD_RANGE = 100 # Number of characters to look ahead for a sentence boundary
33
34
 
35
+
34
36
  # Define emoji ranges
35
37
  def generate_emoji_pattern(file_name):
36
- current_dir = os.path.dirname(os.path.abspath(__file__))
37
- file_path = os.path.join(current_dir, file_name)
38
+ current_dir = Path(__file__).resolve().parent
39
+ file_path = current_dir / file_name
38
40
 
39
41
  emoji_codes = set()
40
42
 
41
43
  try:
42
- with open(file_path, 'r', encoding='utf-8') as file:
44
+ with file_path.open(encoding="utf-8") as file:
43
45
  for line in file:
44
46
  # Skip comments and empty lines
45
- if line.strip() and not line.startswith('#'):
46
- fields = line.strip().split(';')
47
+ if line.strip() and not line.startswith("#"):
48
+ fields = line.strip().split(";")
47
49
  unicode_codes = fields[0].strip().split()
48
50
 
49
51
  if len(unicode_codes) == 1:
@@ -51,7 +53,7 @@ def generate_emoji_pattern(file_name):
51
53
  emoji_codes.add(chr(int(unicode_codes[0], 16)))
52
54
  elif len(unicode_codes) > 1:
53
55
  # Sequence of Unicode characters
54
- emoji_sequence = ''.join(chr(int(code, 16)) for code in unicode_codes)
56
+ emoji_sequence = "".join(chr(int(code, 16)) for code in unicode_codes)
55
57
  emoji_codes.add(emoji_sequence)
56
58
 
57
59
  # We could also process vendor-specific codes here if needed
@@ -61,10 +63,10 @@ def generate_emoji_pattern(file_name):
61
63
  # pass
62
64
 
63
65
  except FileNotFoundError:
64
- print(f"Error: File '{file_name}' not found in {current_dir}")
66
+ UserMessage(f"Error: File '{file_name}' not found in {current_dir}")
65
67
  return None
66
68
  except Exception as e:
67
- print(f"Error reading file: {e}")
69
+ UserMessage(f"Error reading file: {e}")
68
70
  return None
69
71
 
70
72
  # Sort the emoji codes
@@ -74,88 +76,87 @@ def generate_emoji_pattern(file_name):
74
76
  pattern_parts = []
75
77
  for code in sorted_codes:
76
78
  if len(code) == 1:
77
- pattern_parts.append(f'\\U{ord(code):08x}')
79
+ pattern_parts.append(f"\\U{ord(code):08x}")
78
80
  else:
79
- pattern_parts.append(''.join(f'\\U{ord(c):08x}' for c in code))
81
+ pattern_parts.append("".join(f"\\U{ord(c):08x}" for c in code))
80
82
 
81
- emoji_pattern = '(?:' + '|'.join(pattern_parts) + ')'
83
+ return "(?:" + "|".join(pattern_parts) + ")"
82
84
 
83
- return emoji_pattern
84
85
 
85
86
  # Usage
86
- file_name = 'emoji.pytxt'
87
+ file_name = "emoji.pytxt"
87
88
  EMOJI_PATTERN = generate_emoji_pattern(file_name)
88
89
 
89
90
  # Define the regex pattern
90
91
  CHUNK_REGEX = re.compile(
91
- r"(" +
92
+ r"("
92
93
  # 1. Headings (Setext-style, Markdown, and HTML-style, with length constraints)
93
- fr"(?:^(?:[#*=-]{{1,{MAX_HEADING_LENGTH}}}|\w[^\r\n]{{0,{MAX_HEADING_CONTENT_LENGTH}}}\r?\n[-=]{{2,{MAX_HEADING_UNDERLINE_LENGTH}}}|<h[1-6][^>]{{0,{MAX_HTML_HEADING_ATTRIBUTES_LENGTH}}}>)[^\r\n]{{1,{MAX_HEADING_CONTENT_LENGTH}}}(?:</h[1-6]>)?(?:\r?\n|$))" +
94
- "|" +
94
+ rf"(?:^(?:[#*=-]{{1,{MAX_HEADING_LENGTH}}}|\w[^\r\n]{{0,{MAX_HEADING_CONTENT_LENGTH}}}\r?\n[-=]{{2,{MAX_HEADING_UNDERLINE_LENGTH}}}|<h[1-6][^>]{{0,{MAX_HTML_HEADING_ATTRIBUTES_LENGTH}}}>)[^\r\n]{{1,{MAX_HEADING_CONTENT_LENGTH}}}(?:</h[1-6]>)?(?:\r?\n|$))"
95
+ "|"
95
96
  # New pattern for citations
96
- fr"(?:\[[0-9]+\][^\r\n]{{1,{MAX_STANDALONE_LINE_LENGTH}}})" +
97
- "|" +
97
+ rf"(?:\[[0-9]+\][^\r\n]{{1,{MAX_STANDALONE_LINE_LENGTH}}})"
98
+ "|"
98
99
  # 2. List items (bulleted, numbered, lettered, or task lists, including nested, up to three levels, with length constraints)
99
- fr"(?:(?:^|\r?\n)[ \t]{{0,3}}(?:[-*+•]|\d{{1,3}}\.|\[[ xX]\])[ \t]+" +
100
- fr"(?:(?:\b[^\r\n]{{1,{MAX_LIST_ITEM_LENGTH}}}\b(?:[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))|" +
101
- fr"(?:\b[^\r\n]{{1,{MAX_LIST_ITEM_LENGTH}}}\b(?=[\r\n]|$))|" +
102
- fr"(?:\b[^\r\n]{{1,{MAX_LIST_ITEM_LENGTH}}}\b(?=[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})" +
103
- fr"(?:.{{1,{LOOKAHEAD_RANGE}}}(?:[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))?))" +
104
- fr"(?:(?:\r?\n[ \t]{{2,5}}(?:[-*+•]|\d{{1,3}}\.|\[[ xX]\])[ \t]+" +
105
- fr"(?:(?:\b[^\r\n]{{1,{MAX_LIST_ITEM_LENGTH}}}\b(?:[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))|" +
106
- fr"(?:\b[^\r\n]{{1,{MAX_LIST_ITEM_LENGTH}}}\b(?=[\r\n]|$))|" +
107
- fr"(?:\b[^\r\n]{{1,{MAX_LIST_ITEM_LENGTH}}}\b(?=[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})" +
108
- fr"(?:.{{1,{LOOKAHEAD_RANGE}}}(?:[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))?))" +
109
- fr"){{0,{MAX_NESTED_LIST_ITEMS}}}" +
110
- fr"(?:\r?\n[ \t]{{4,{MAX_LIST_INDENT_SPACES}}}(?:[-*+•]|\d{{1,3}}\.|\[[ xX]\])[ \t]+" +
111
- fr"(?:(?:\b[^\r\n]{{1,{MAX_LIST_ITEM_LENGTH}}}\b(?:[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))|" +
112
- fr"(?:\b[^\r\n]{{1,{MAX_LIST_ITEM_LENGTH}}}\b(?=[\r\n]|$))|" +
113
- fr"(?:\b[^\r\n]{{1,{MAX_LIST_ITEM_LENGTH}}}\b(?=[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})" +
114
- fr"(?:.{{1,{LOOKAHEAD_RANGE}}}(?:[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))?))" +
115
- fr"){{0,{MAX_NESTED_LIST_ITEMS}}})" +
116
- ")" +
100
+ r"(?:(?:^|\r?\n)[ \t]{0,3}(?:[-*+•]|\d{1,3}\.|\[[ xX]\])[ \t]+"
101
+ rf"(?:(?:\b[^\r\n]{{1,{MAX_LIST_ITEM_LENGTH}}}\b(?:[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))|"
102
+ rf"(?:\b[^\r\n]{{1,{MAX_LIST_ITEM_LENGTH}}}\b(?=[\r\n]|$))|"
103
+ rf"(?:\b[^\r\n]{{1,{MAX_LIST_ITEM_LENGTH}}}\b(?=[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})"
104
+ rf"(?:.{{1,{LOOKAHEAD_RANGE}}}(?:[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))?))"
105
+ r"(?:(?:\r?\n[ \t]{2,5}(?:[-*+•]|\d{1,3}\.|\[[ xX]\])[ \t]+"
106
+ rf"(?:(?:\b[^\r\n]{{1,{MAX_LIST_ITEM_LENGTH}}}\b(?:[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))|"
107
+ rf"(?:\b[^\r\n]{{1,{MAX_LIST_ITEM_LENGTH}}}\b(?=[\r\n]|$))|"
108
+ rf"(?:\b[^\r\n]{{1,{MAX_LIST_ITEM_LENGTH}}}\b(?=[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})"
109
+ rf"(?:.{{1,{LOOKAHEAD_RANGE}}}(?:[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))?))"
110
+ rf"){{0,{MAX_NESTED_LIST_ITEMS}}}"
111
+ rf"(?:\r?\n[ \t]{{4,{MAX_LIST_INDENT_SPACES}}}(?:[-*+•]|\d{{1,3}}\.|\[[ xX]\])[ \t]+"
112
+ rf"(?:(?:\b[^\r\n]{{1,{MAX_LIST_ITEM_LENGTH}}}\b(?:[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))|"
113
+ rf"(?:\b[^\r\n]{{1,{MAX_LIST_ITEM_LENGTH}}}\b(?=[\r\n]|$))|"
114
+ rf"(?:\b[^\r\n]{{1,{MAX_LIST_ITEM_LENGTH}}}\b(?=[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})"
115
+ rf"(?:.{{1,{LOOKAHEAD_RANGE}}}(?:[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))?))"
116
+ rf"){{0,{MAX_NESTED_LIST_ITEMS}}})"
117
+ ")"
117
118
  # 3. Block quotes (including nested quotes and citations, up to three levels, with length constraints)
118
- fr"(?:(?:^>(?:>|\s{{2,}}){{0,2}}(?:(?:\b[^\r\n]{{0,{MAX_BLOCKQUOTE_LINE_LENGTH}}}\b(?:[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))|(?:\b[^\r\n]{{0,{MAX_BLOCKQUOTE_LINE_LENGTH}}}\b(?=[\r\n]|$))|(?:\b[^\r\n]{{0,{MAX_BLOCKQUOTE_LINE_LENGTH}}}\b(?=[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?:.{{1,{LOOKAHEAD_RANGE}}}(?:[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))?))\\r?\\n?){{1,{MAX_BLOCKQUOTE_LINES}}})" +
119
- "|" +
119
+ rf"(?:(?:^>(?:>|\s{{2,}}){{0,2}}(?:(?:\b[^\r\n]{{0,{MAX_BLOCKQUOTE_LINE_LENGTH}}}\b(?:[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))|(?:\b[^\r\n]{{0,{MAX_BLOCKQUOTE_LINE_LENGTH}}}\b(?=[\r\n]|$))|(?:\b[^\r\n]{{0,{MAX_BLOCKQUOTE_LINE_LENGTH}}}\b(?=[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?:.{{1,{LOOKAHEAD_RANGE}}}(?:[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))?))\\r?\\n?){{1,{MAX_BLOCKQUOTE_LINES}}})"
120
+ "|"
120
121
  # 4. Code blocks (fenced, indented, or HTML pre/code tags, with length constraints)
121
- fr"(?:(?:^|\r?\n)(?:```|~~~)(?:\w{{0,{MAX_CODE_LANGUAGE_LENGTH}}})?\r?\n[\s\S]{{0,{MAX_CODE_BLOCK_LENGTH}}}?(?:```|~~~)\r?\n?" +
122
- fr"|(?:(?:^|\r?\n)(?: {{4}}|\t)[^\r\n]{{0,{MAX_LIST_ITEM_LENGTH}}}(?:\r?\n(?: {{4}}|\t)[^\r\n]{{0,{MAX_LIST_ITEM_LENGTH}}}){{0,{MAX_INDENTED_CODE_LINES}}}\r?\n?)" +
123
- fr"|(?:<pre>(?:<code>)?[\s\S]{{0,{MAX_CODE_BLOCK_LENGTH}}}?(?:</code>)?</pre>))" +
124
- "|" +
122
+ rf"(?:(?:^|\r?\n)(?:```|~~~)(?:\w{{0,{MAX_CODE_LANGUAGE_LENGTH}}})?\r?\n[\s\S]{{0,{MAX_CODE_BLOCK_LENGTH}}}?(?:```|~~~)\r?\n?"
123
+ rf"|(?:(?:^|\r?\n)(?: {{4}}|\t)[^\r\n]{{0,{MAX_LIST_ITEM_LENGTH}}}(?:\r?\n(?: {{4}}|\t)[^\r\n]{{0,{MAX_LIST_ITEM_LENGTH}}}){{0,{MAX_INDENTED_CODE_LINES}}}\r?\n?)"
124
+ rf"|(?:<pre>(?:<code>)?[\s\S]{{0,{MAX_CODE_BLOCK_LENGTH}}}?(?:</code>)?</pre>))"
125
+ "|"
125
126
  # 5. Tables (Markdown, grid tables, and HTML tables, with length constraints)
126
- fr"(?:(?:^|\r?\n)(?:\|[^\r\n]{{0,{MAX_TABLE_CELL_LENGTH}}}\|(?:\r?\n\|[-:]{{1,{MAX_TABLE_CELL_LENGTH}}}\|){{0,1}}(?:\r?\n\|[^\r\n]{{0,{MAX_TABLE_CELL_LENGTH}}}\|){{0,{MAX_TABLE_ROWS}}}" +
127
- fr"|<table>[\s\S]{{0,{MAX_HTML_TABLE_LENGTH}}}?</table>))" +
128
- "|" +
127
+ rf"(?:(?:^|\r?\n)(?:\|[^\r\n]{{0,{MAX_TABLE_CELL_LENGTH}}}\|(?:\r?\n\|[-:]{{1,{MAX_TABLE_CELL_LENGTH}}}\|){{0,1}}(?:\r?\n\|[^\r\n]{{0,{MAX_TABLE_CELL_LENGTH}}}\|){{0,{MAX_TABLE_ROWS}}}"
128
+ rf"|<table>[\s\S]{{0,{MAX_HTML_TABLE_LENGTH}}}?</table>))"
129
+ "|"
129
130
  # 6. Horizontal rules (Markdown and HTML hr tag)
130
- fr"(?:^(?:[-*_]){{{MIN_HORIZONTAL_RULE_LENGTH},}}\s*$|<hr\s*/?>\r?\n)" +
131
- "|" +
131
+ rf"(?:^(?:[-*_]){{{MIN_HORIZONTAL_RULE_LENGTH},}}\s*$|<hr\s*/?>\r?\n)"
132
+ "|"
132
133
  # 7. Standalone lines or phrases (including single-line blocks and HTML elements, with length constraints)
133
- fr"(?:^(?:<[a-zA-Z][^>]{{0,{MAX_HTML_TAG_ATTRIBUTES_LENGTH}}}>)?(?:(?:[^\r\n]{{1,{MAX_STANDALONE_LINE_LENGTH}}}(?:[.!?…]|\.\.\.|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))|(?:[^\r\n]{{1,{MAX_STANDALONE_LINE_LENGTH}}}(?=[\r\n]|$))|(?:[^\r\n]{{1,{MAX_STANDALONE_LINE_LENGTH}}}(?=[.!?…]|\.\.\.|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?:.{{1,{LOOKAHEAD_RANGE}}}(?:[.!?…]|\.\.\.|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))?))(?:</[a-zA-Z]+>)?(?:\r?\n|$))" +
134
- "|" +
134
+ rf"(?:^(?:<[a-zA-Z][^>]{{0,{MAX_HTML_TAG_ATTRIBUTES_LENGTH}}}>)?(?:(?:[^\r\n]{{1,{MAX_STANDALONE_LINE_LENGTH}}}(?:[.!?…]|\.\.\.|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))|(?:[^\r\n]{{1,{MAX_STANDALONE_LINE_LENGTH}}}(?=[\r\n]|$))|(?:[^\r\n]{{1,{MAX_STANDALONE_LINE_LENGTH}}}(?=[.!?…]|\.\.\.|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?:.{{1,{LOOKAHEAD_RANGE}}}(?:[.!?…]|\.\.\.|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))?))(?:</[a-zA-Z]+>)?(?:\r?\n|$))"
135
+ "|"
135
136
  # 8. Sentences or phrases ending with punctuation (including ellipsis and Unicode punctuation)
136
- fr"(?:(?:[^\r\n]{{1,{MAX_SENTENCE_LENGTH}}}(?:[.!?…]|\.\.\.|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))|(?:[^\r\n]{{1,{MAX_SENTENCE_LENGTH}}}(?=[\r\n]|$))|(?:[^\r\n]{{1,{MAX_SENTENCE_LENGTH}}}(?=[.!?…]|\.\.\.|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?:.{{1,{LOOKAHEAD_RANGE}}}(?:[.!?…]|\.\.\.|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))?))" +
137
- "|" +
137
+ rf"(?:(?:[^\r\n]{{1,{MAX_SENTENCE_LENGTH}}}(?:[.!?…]|\.\.\.|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))|(?:[^\r\n]{{1,{MAX_SENTENCE_LENGTH}}}(?=[\r\n]|$))|(?:[^\r\n]{{1,{MAX_SENTENCE_LENGTH}}}(?=[.!?…]|\.\.\.|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?:.{{1,{LOOKAHEAD_RANGE}}}(?:[.!?…]|\.\.\.|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))?))"
138
+ "|"
138
139
  # 9. Quoted text, parenthetical phrases, or bracketed content (with length constraints)
139
- r"(?:" +
140
- fr'(?<!\w)\"\"\"[^\""]{{0,{MAX_QUOTED_TEXT_LENGTH}}}\"\"\"(?!\w)' +
141
- fr"|(?<!\w)['\"`''""][^\r\n]{{0,{MAX_QUOTED_TEXT_LENGTH}}}['\"`''""](?!\w)" +
142
- fr"|\([^\r\n(){{0,{MAX_PARENTHETICAL_CONTENT_LENGTH}}}(?:\([^\r\n(){{0,{MAX_PARENTHETICAL_CONTENT_LENGTH}}}\)[^\r\n(){{0,{MAX_PARENTHETICAL_CONTENT_LENGTH}}}){{0,{MAX_NESTED_PARENTHESES}}}\)" +
143
- fr"|\[[^\r\n\[\]{{0,{MAX_PARENTHETICAL_CONTENT_LENGTH}}}(?:\[[^\r\n\[\]{{0,{MAX_PARENTHETICAL_CONTENT_LENGTH}}}\][^\r\n\[\]{{0,{MAX_PARENTHETICAL_CONTENT_LENGTH}}}){{0,{MAX_NESTED_PARENTHESES}}}\]" +
144
- fr"|\$[^\r\n$]{{0,{MAX_MATH_INLINE_LENGTH}}}\$" +
145
- fr"|`[^`\r\n]{{0,{MAX_MATH_INLINE_LENGTH}}}`" +
146
- r")" +
147
- "|" +
140
+ r"(?:"
141
+ rf'(?<!\w)\"\"\"[^\""]{{0,{MAX_QUOTED_TEXT_LENGTH}}}\"\"\"(?!\w)'
142
+ r"|(?<!\w)['\"`][^\r\n]{{0,{MAX_QUOTED_TEXT_LENGTH}}}['\"`](?!\w)"
143
+ rf"|\([^\r\n(){{0,{MAX_PARENTHETICAL_CONTENT_LENGTH}}}(?:\([^\r\n(){{0,{MAX_PARENTHETICAL_CONTENT_LENGTH}}}\)[^\r\n(){{0,{MAX_PARENTHETICAL_CONTENT_LENGTH}}}){{0,{MAX_NESTED_PARENTHESES}}}\)"
144
+ rf"|\[[^\r\n\[\]{{0,{MAX_PARENTHETICAL_CONTENT_LENGTH}}}(?:\[[^\r\n\[\]{{0,{MAX_PARENTHETICAL_CONTENT_LENGTH}}}\][^\r\n\[\]{{0,{MAX_PARENTHETICAL_CONTENT_LENGTH}}}){{0,{MAX_NESTED_PARENTHESES}}}\]"
145
+ rf"|\$[^\r\n$]{{0,{MAX_MATH_INLINE_LENGTH}}}\$"
146
+ rf"|`[^`\r\n]{{0,{MAX_MATH_INLINE_LENGTH}}}`"
147
+ r")"
148
+ "|"
148
149
  # 10. Paragraphs (with length constraints)
149
- fr"(?:(?:^|\r?\n\r?\n)(?:<p>)?(?:(?:[^\r\n]{{1,{MAX_PARAGRAPH_LENGTH}}}(?:[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))|(?:[^\r\n]{{1,{MAX_PARAGRAPH_LENGTH}}}(?=[\r\n]|$))|(?:[^\r\n]{{1,{MAX_PARAGRAPH_LENGTH}}}(?=[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?:.{{1,{LOOKAHEAD_RANGE}}}(?:[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))?))(?:</p>)?(?=\r?\n\r?\n|$))" +
150
- "|" +
150
+ rf"(?:(?:^|\r?\n\r?\n)(?:<p>)?(?:(?:[^\r\n]{{1,{MAX_PARAGRAPH_LENGTH}}}(?:[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))|(?:[^\r\n]{{1,{MAX_PARAGRAPH_LENGTH}}}(?=[\r\n]|$))|(?:[^\r\n]{{1,{MAX_PARAGRAPH_LENGTH}}}(?=[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?:.{{1,{LOOKAHEAD_RANGE}}}(?:[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))?))(?:</p>)?(?=\r?\n\r?\n|$))"
151
+ "|"
151
152
  # 11. HTML-like tags and their content (including self-closing tags and attributes, with length constraints)
152
- fr"(?:<[a-zA-Z][^>]{{0,{MAX_HTML_TAG_ATTRIBUTES_LENGTH}}}(?:>[\s\S]{{0,{MAX_HTML_TAG_CONTENT_LENGTH}}}?</[a-zA-Z]+>|\s*/>))" +
153
- "|" +
153
+ rf"(?:<[a-zA-Z][^>]{{0,{MAX_HTML_TAG_ATTRIBUTES_LENGTH}}}(?:>[\s\S]{{0,{MAX_HTML_TAG_CONTENT_LENGTH}}}?</[a-zA-Z]+>|\s*/>))"
154
+ "|"
154
155
  # 12. LaTeX-style math expressions (inline and block, with length constraints)
155
- fr"(?:(?:\$\$[\s\S]{{0,{MAX_MATH_BLOCK_LENGTH}}}?\$\$)|(?:\$[^\$\r\n]{{0,{MAX_MATH_INLINE_LENGTH}}}\$))" +
156
- "|" +
156
+ rf"(?:(?:\$\$[\s\S]{{0,{MAX_MATH_BLOCK_LENGTH}}}?\$\$)|(?:\$[^\$\r\n]{{0,{MAX_MATH_INLINE_LENGTH}}}\$))"
157
+ "|"
157
158
  # 13. Fallback for any remaining content (with length constraints)
158
- fr"(?:(?:[^\r\n]{{1,{MAX_STANDALONE_LINE_LENGTH}}}(?:[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))|(?:[^\r\n]{{1,{MAX_STANDALONE_LINE_LENGTH}}}(?=[\r\n]|$))|(?:[^\r\n]{{1,{MAX_STANDALONE_LINE_LENGTH}}}(?=[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?:.{{1,{LOOKAHEAD_RANGE}}}(?:[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))?))" +
159
+ rf"(?:(?:[^\r\n]{{1,{MAX_STANDALONE_LINE_LENGTH}}}(?:[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))|(?:[^\r\n]{{1,{MAX_STANDALONE_LINE_LENGTH}}}(?=[\r\n]|$))|(?:[^\r\n]{{1,{MAX_STANDALONE_LINE_LENGTH}}}(?=[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?:.{{1,{LOOKAHEAD_RANGE}}}(?:[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))?))"
159
160
  ")",
160
- re.MULTILINE | re.UNICODE
161
+ re.MULTILINE | re.UNICODE,
161
162
  )