symbolicai 1.0.0__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. symai/__init__.py +198 -134
  2. symai/backend/base.py +51 -51
  3. symai/backend/engines/drawing/engine_bfl.py +33 -33
  4. symai/backend/engines/drawing/engine_gpt_image.py +4 -10
  5. symai/backend/engines/embedding/engine_llama_cpp.py +50 -35
  6. symai/backend/engines/embedding/engine_openai.py +22 -16
  7. symai/backend/engines/execute/engine_python.py +16 -16
  8. symai/backend/engines/files/engine_io.py +51 -49
  9. symai/backend/engines/imagecaptioning/engine_blip2.py +27 -23
  10. symai/backend/engines/imagecaptioning/engine_llavacpp_client.py +53 -46
  11. symai/backend/engines/index/engine_pinecone.py +116 -88
  12. symai/backend/engines/index/engine_qdrant.py +1011 -0
  13. symai/backend/engines/index/engine_vectordb.py +78 -52
  14. symai/backend/engines/lean/engine_lean4.py +65 -25
  15. symai/backend/engines/neurosymbolic/__init__.py +28 -28
  16. symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_chat.py +137 -135
  17. symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_reasoning.py +145 -152
  18. symai/backend/engines/neurosymbolic/engine_cerebras.py +328 -0
  19. symai/backend/engines/neurosymbolic/engine_deepseekX_reasoning.py +75 -49
  20. symai/backend/engines/neurosymbolic/engine_google_geminiX_reasoning.py +199 -155
  21. symai/backend/engines/neurosymbolic/engine_groq.py +106 -72
  22. symai/backend/engines/neurosymbolic/engine_huggingface.py +100 -67
  23. symai/backend/engines/neurosymbolic/engine_llama_cpp.py +121 -93
  24. symai/backend/engines/neurosymbolic/engine_openai_gptX_chat.py +213 -132
  25. symai/backend/engines/neurosymbolic/engine_openai_gptX_reasoning.py +180 -137
  26. symai/backend/engines/ocr/engine_apilayer.py +18 -20
  27. symai/backend/engines/output/engine_stdout.py +9 -9
  28. symai/backend/engines/{webscraping → scrape}/engine_requests.py +25 -11
  29. symai/backend/engines/search/engine_openai.py +95 -83
  30. symai/backend/engines/search/engine_parallel.py +665 -0
  31. symai/backend/engines/search/engine_perplexity.py +40 -41
  32. symai/backend/engines/search/engine_serpapi.py +33 -28
  33. symai/backend/engines/speech_to_text/engine_local_whisper.py +37 -27
  34. symai/backend/engines/symbolic/engine_wolframalpha.py +14 -8
  35. symai/backend/engines/text_to_speech/engine_openai.py +15 -19
  36. symai/backend/engines/text_vision/engine_clip.py +34 -28
  37. symai/backend/engines/userinput/engine_console.py +3 -4
  38. symai/backend/mixin/anthropic.py +48 -40
  39. symai/backend/mixin/deepseek.py +4 -5
  40. symai/backend/mixin/google.py +5 -4
  41. symai/backend/mixin/groq.py +2 -4
  42. symai/backend/mixin/openai.py +132 -110
  43. symai/backend/settings.py +14 -14
  44. symai/chat.py +164 -94
  45. symai/collect/dynamic.py +13 -11
  46. symai/collect/pipeline.py +39 -31
  47. symai/collect/stats.py +109 -69
  48. symai/components.py +556 -238
  49. symai/constraints.py +14 -5
  50. symai/core.py +1495 -1210
  51. symai/core_ext.py +55 -50
  52. symai/endpoints/api.py +113 -58
  53. symai/extended/api_builder.py +22 -17
  54. symai/extended/arxiv_pdf_parser.py +13 -5
  55. symai/extended/bibtex_parser.py +8 -4
  56. symai/extended/conversation.py +88 -69
  57. symai/extended/document.py +40 -27
  58. symai/extended/file_merger.py +45 -7
  59. symai/extended/graph.py +38 -24
  60. symai/extended/html_style_template.py +17 -11
  61. symai/extended/interfaces/blip_2.py +1 -1
  62. symai/extended/interfaces/clip.py +4 -2
  63. symai/extended/interfaces/console.py +5 -3
  64. symai/extended/interfaces/dall_e.py +3 -1
  65. symai/extended/interfaces/file.py +2 -0
  66. symai/extended/interfaces/flux.py +3 -1
  67. symai/extended/interfaces/gpt_image.py +15 -6
  68. symai/extended/interfaces/input.py +2 -1
  69. symai/extended/interfaces/llava.py +1 -1
  70. symai/extended/interfaces/{naive_webscraping.py → naive_scrape.py} +3 -2
  71. symai/extended/interfaces/naive_vectordb.py +2 -2
  72. symai/extended/interfaces/ocr.py +4 -2
  73. symai/extended/interfaces/openai_search.py +2 -0
  74. symai/extended/interfaces/parallel.py +30 -0
  75. symai/extended/interfaces/perplexity.py +2 -0
  76. symai/extended/interfaces/pinecone.py +6 -4
  77. symai/extended/interfaces/python.py +2 -0
  78. symai/extended/interfaces/serpapi.py +2 -0
  79. symai/extended/interfaces/terminal.py +0 -1
  80. symai/extended/interfaces/tts.py +2 -1
  81. symai/extended/interfaces/whisper.py +2 -1
  82. symai/extended/interfaces/wolframalpha.py +1 -0
  83. symai/extended/metrics/__init__.py +1 -1
  84. symai/extended/metrics/similarity.py +5 -2
  85. symai/extended/os_command.py +31 -22
  86. symai/extended/packages/symdev.py +39 -34
  87. symai/extended/packages/sympkg.py +30 -27
  88. symai/extended/packages/symrun.py +46 -35
  89. symai/extended/repo_cloner.py +10 -9
  90. symai/extended/seo_query_optimizer.py +15 -12
  91. symai/extended/solver.py +104 -76
  92. symai/extended/summarizer.py +8 -7
  93. symai/extended/taypan_interpreter.py +10 -9
  94. symai/extended/vectordb.py +28 -15
  95. symai/formatter/formatter.py +39 -31
  96. symai/formatter/regex.py +46 -44
  97. symai/functional.py +184 -86
  98. symai/imports.py +85 -51
  99. symai/interfaces.py +1 -1
  100. symai/memory.py +33 -24
  101. symai/menu/screen.py +28 -19
  102. symai/misc/console.py +27 -27
  103. symai/misc/loader.py +4 -3
  104. symai/models/base.py +147 -76
  105. symai/models/errors.py +1 -1
  106. symai/ops/__init__.py +1 -1
  107. symai/ops/measures.py +17 -14
  108. symai/ops/primitives.py +933 -635
  109. symai/post_processors.py +28 -24
  110. symai/pre_processors.py +58 -52
  111. symai/processor.py +15 -9
  112. symai/prompts.py +714 -649
  113. symai/server/huggingface_server.py +115 -32
  114. symai/server/llama_cpp_server.py +14 -6
  115. symai/server/qdrant_server.py +206 -0
  116. symai/shell.py +98 -39
  117. symai/shellsv.py +307 -223
  118. symai/strategy.py +135 -81
  119. symai/symbol.py +276 -225
  120. symai/utils.py +62 -46
  121. {symbolicai-1.0.0.dist-info → symbolicai-1.1.0.dist-info}/METADATA +19 -9
  122. symbolicai-1.1.0.dist-info/RECORD +168 -0
  123. symbolicai-1.0.0.dist-info/RECORD +0 -163
  124. {symbolicai-1.0.0.dist-info → symbolicai-1.1.0.dist-info}/WHEEL +0 -0
  125. {symbolicai-1.0.0.dist-info → symbolicai-1.1.0.dist-info}/entry_points.txt +0 -0
  126. {symbolicai-1.0.0.dist-info → symbolicai-1.1.0.dist-info}/licenses/LICENSE +0 -0
  127. {symbolicai-1.0.0.dist-info → symbolicai-1.1.0.dist-info}/top_level.txt +0 -0
symai/formatter/regex.py CHANGED
@@ -32,6 +32,7 @@ MAX_HTML_TAG_ATTRIBUTES_LENGTH = 100
32
32
  MAX_HTML_TAG_CONTENT_LENGTH = 1000
33
33
  LOOKAHEAD_RANGE = 100 # Number of characters to look ahead for a sentence boundary
34
34
 
35
+
35
36
  # Define emoji ranges
36
37
  def generate_emoji_pattern(file_name):
37
38
  current_dir = Path(__file__).resolve().parent
@@ -40,11 +41,11 @@ def generate_emoji_pattern(file_name):
40
41
  emoji_codes = set()
41
42
 
42
43
  try:
43
- with file_path.open(encoding='utf-8') as file:
44
+ with file_path.open(encoding="utf-8") as file:
44
45
  for line in file:
45
46
  # Skip comments and empty lines
46
- if line.strip() and not line.startswith('#'):
47
- fields = line.strip().split(';')
47
+ if line.strip() and not line.startswith("#"):
48
+ fields = line.strip().split(";")
48
49
  unicode_codes = fields[0].strip().split()
49
50
 
50
51
  if len(unicode_codes) == 1:
@@ -52,7 +53,7 @@ def generate_emoji_pattern(file_name):
52
53
  emoji_codes.add(chr(int(unicode_codes[0], 16)))
53
54
  elif len(unicode_codes) > 1:
54
55
  # Sequence of Unicode characters
55
- emoji_sequence = ''.join(chr(int(code, 16)) for code in unicode_codes)
56
+ emoji_sequence = "".join(chr(int(code, 16)) for code in unicode_codes)
56
57
  emoji_codes.add(emoji_sequence)
57
58
 
58
59
  # We could also process vendor-specific codes here if needed
@@ -75,86 +76,87 @@ def generate_emoji_pattern(file_name):
75
76
  pattern_parts = []
76
77
  for code in sorted_codes:
77
78
  if len(code) == 1:
78
- pattern_parts.append(f'\\U{ord(code):08x}')
79
+ pattern_parts.append(f"\\U{ord(code):08x}")
79
80
  else:
80
- pattern_parts.append(''.join(f'\\U{ord(c):08x}' for c in code))
81
+ pattern_parts.append("".join(f"\\U{ord(c):08x}" for c in code))
82
+
83
+ return "(?:" + "|".join(pattern_parts) + ")"
81
84
 
82
- return '(?:' + '|'.join(pattern_parts) + ')'
83
85
 
84
86
  # Usage
85
- file_name = 'emoji.pytxt'
87
+ file_name = "emoji.pytxt"
86
88
  EMOJI_PATTERN = generate_emoji_pattern(file_name)
87
89
 
88
90
  # Define the regex pattern
89
91
  CHUNK_REGEX = re.compile(
90
92
  r"("
91
93
  # 1. Headings (Setext-style, Markdown, and HTML-style, with length constraints)
92
- fr"(?:^(?:[#*=-]{{1,{MAX_HEADING_LENGTH}}}|\w[^\r\n]{{0,{MAX_HEADING_CONTENT_LENGTH}}}\r?\n[-=]{{2,{MAX_HEADING_UNDERLINE_LENGTH}}}|<h[1-6][^>]{{0,{MAX_HTML_HEADING_ATTRIBUTES_LENGTH}}}>)[^\r\n]{{1,{MAX_HEADING_CONTENT_LENGTH}}}(?:</h[1-6]>)?(?:\r?\n|$))"
94
+ rf"(?:^(?:[#*=-]{{1,{MAX_HEADING_LENGTH}}}|\w[^\r\n]{{0,{MAX_HEADING_CONTENT_LENGTH}}}\r?\n[-=]{{2,{MAX_HEADING_UNDERLINE_LENGTH}}}|<h[1-6][^>]{{0,{MAX_HTML_HEADING_ATTRIBUTES_LENGTH}}}>)[^\r\n]{{1,{MAX_HEADING_CONTENT_LENGTH}}}(?:</h[1-6]>)?(?:\r?\n|$))"
93
95
  "|"
94
96
  # New pattern for citations
95
- fr"(?:\[[0-9]+\][^\r\n]{{1,{MAX_STANDALONE_LINE_LENGTH}}})"
97
+ rf"(?:\[[0-9]+\][^\r\n]{{1,{MAX_STANDALONE_LINE_LENGTH}}})"
96
98
  "|"
97
99
  # 2. List items (bulleted, numbered, lettered, or task lists, including nested, up to three levels, with length constraints)
98
100
  r"(?:(?:^|\r?\n)[ \t]{0,3}(?:[-*+•]|\d{1,3}\.|\[[ xX]\])[ \t]+"
99
- fr"(?:(?:\b[^\r\n]{{1,{MAX_LIST_ITEM_LENGTH}}}\b(?:[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))|"
100
- fr"(?:\b[^\r\n]{{1,{MAX_LIST_ITEM_LENGTH}}}\b(?=[\r\n]|$))|"
101
- fr"(?:\b[^\r\n]{{1,{MAX_LIST_ITEM_LENGTH}}}\b(?=[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})"
102
- fr"(?:.{{1,{LOOKAHEAD_RANGE}}}(?:[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))?))"
101
+ rf"(?:(?:\b[^\r\n]{{1,{MAX_LIST_ITEM_LENGTH}}}\b(?:[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))|"
102
+ rf"(?:\b[^\r\n]{{1,{MAX_LIST_ITEM_LENGTH}}}\b(?=[\r\n]|$))|"
103
+ rf"(?:\b[^\r\n]{{1,{MAX_LIST_ITEM_LENGTH}}}\b(?=[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})"
104
+ rf"(?:.{{1,{LOOKAHEAD_RANGE}}}(?:[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))?))"
103
105
  r"(?:(?:\r?\n[ \t]{2,5}(?:[-*+•]|\d{1,3}\.|\[[ xX]\])[ \t]+"
104
- fr"(?:(?:\b[^\r\n]{{1,{MAX_LIST_ITEM_LENGTH}}}\b(?:[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))|"
105
- fr"(?:\b[^\r\n]{{1,{MAX_LIST_ITEM_LENGTH}}}\b(?=[\r\n]|$))|"
106
- fr"(?:\b[^\r\n]{{1,{MAX_LIST_ITEM_LENGTH}}}\b(?=[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})"
107
- fr"(?:.{{1,{LOOKAHEAD_RANGE}}}(?:[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))?))"
108
- fr"){{0,{MAX_NESTED_LIST_ITEMS}}}"
109
- fr"(?:\r?\n[ \t]{{4,{MAX_LIST_INDENT_SPACES}}}(?:[-*+•]|\d{{1,3}}\.|\[[ xX]\])[ \t]+"
110
- fr"(?:(?:\b[^\r\n]{{1,{MAX_LIST_ITEM_LENGTH}}}\b(?:[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))|"
111
- fr"(?:\b[^\r\n]{{1,{MAX_LIST_ITEM_LENGTH}}}\b(?=[\r\n]|$))|"
112
- fr"(?:\b[^\r\n]{{1,{MAX_LIST_ITEM_LENGTH}}}\b(?=[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})"
113
- fr"(?:.{{1,{LOOKAHEAD_RANGE}}}(?:[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))?))"
114
- fr"){{0,{MAX_NESTED_LIST_ITEMS}}})"
106
+ rf"(?:(?:\b[^\r\n]{{1,{MAX_LIST_ITEM_LENGTH}}}\b(?:[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))|"
107
+ rf"(?:\b[^\r\n]{{1,{MAX_LIST_ITEM_LENGTH}}}\b(?=[\r\n]|$))|"
108
+ rf"(?:\b[^\r\n]{{1,{MAX_LIST_ITEM_LENGTH}}}\b(?=[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})"
109
+ rf"(?:.{{1,{LOOKAHEAD_RANGE}}}(?:[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))?))"
110
+ rf"){{0,{MAX_NESTED_LIST_ITEMS}}}"
111
+ rf"(?:\r?\n[ \t]{{4,{MAX_LIST_INDENT_SPACES}}}(?:[-*+•]|\d{{1,3}}\.|\[[ xX]\])[ \t]+"
112
+ rf"(?:(?:\b[^\r\n]{{1,{MAX_LIST_ITEM_LENGTH}}}\b(?:[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))|"
113
+ rf"(?:\b[^\r\n]{{1,{MAX_LIST_ITEM_LENGTH}}}\b(?=[\r\n]|$))|"
114
+ rf"(?:\b[^\r\n]{{1,{MAX_LIST_ITEM_LENGTH}}}\b(?=[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})"
115
+ rf"(?:.{{1,{LOOKAHEAD_RANGE}}}(?:[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))?))"
116
+ rf"){{0,{MAX_NESTED_LIST_ITEMS}}})"
115
117
  ")"
116
118
  # 3. Block quotes (including nested quotes and citations, up to three levels, with length constraints)
117
- fr"(?:(?:^>(?:>|\s{{2,}}){{0,2}}(?:(?:\b[^\r\n]{{0,{MAX_BLOCKQUOTE_LINE_LENGTH}}}\b(?:[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))|(?:\b[^\r\n]{{0,{MAX_BLOCKQUOTE_LINE_LENGTH}}}\b(?=[\r\n]|$))|(?:\b[^\r\n]{{0,{MAX_BLOCKQUOTE_LINE_LENGTH}}}\b(?=[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?:.{{1,{LOOKAHEAD_RANGE}}}(?:[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))?))\\r?\\n?){{1,{MAX_BLOCKQUOTE_LINES}}})"
119
+ rf"(?:(?:^>(?:>|\s{{2,}}){{0,2}}(?:(?:\b[^\r\n]{{0,{MAX_BLOCKQUOTE_LINE_LENGTH}}}\b(?:[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))|(?:\b[^\r\n]{{0,{MAX_BLOCKQUOTE_LINE_LENGTH}}}\b(?=[\r\n]|$))|(?:\b[^\r\n]{{0,{MAX_BLOCKQUOTE_LINE_LENGTH}}}\b(?=[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?:.{{1,{LOOKAHEAD_RANGE}}}(?:[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))?))\\r?\\n?){{1,{MAX_BLOCKQUOTE_LINES}}})"
118
120
  "|"
119
121
  # 4. Code blocks (fenced, indented, or HTML pre/code tags, with length constraints)
120
- fr"(?:(?:^|\r?\n)(?:```|~~~)(?:\w{{0,{MAX_CODE_LANGUAGE_LENGTH}}})?\r?\n[\s\S]{{0,{MAX_CODE_BLOCK_LENGTH}}}?(?:```|~~~)\r?\n?"
121
- fr"|(?:(?:^|\r?\n)(?: {{4}}|\t)[^\r\n]{{0,{MAX_LIST_ITEM_LENGTH}}}(?:\r?\n(?: {{4}}|\t)[^\r\n]{{0,{MAX_LIST_ITEM_LENGTH}}}){{0,{MAX_INDENTED_CODE_LINES}}}\r?\n?)"
122
- fr"|(?:<pre>(?:<code>)?[\s\S]{{0,{MAX_CODE_BLOCK_LENGTH}}}?(?:</code>)?</pre>))"
122
+ rf"(?:(?:^|\r?\n)(?:```|~~~)(?:\w{{0,{MAX_CODE_LANGUAGE_LENGTH}}})?\r?\n[\s\S]{{0,{MAX_CODE_BLOCK_LENGTH}}}?(?:```|~~~)\r?\n?"
123
+ rf"|(?:(?:^|\r?\n)(?: {{4}}|\t)[^\r\n]{{0,{MAX_LIST_ITEM_LENGTH}}}(?:\r?\n(?: {{4}}|\t)[^\r\n]{{0,{MAX_LIST_ITEM_LENGTH}}}){{0,{MAX_INDENTED_CODE_LINES}}}\r?\n?)"
124
+ rf"|(?:<pre>(?:<code>)?[\s\S]{{0,{MAX_CODE_BLOCK_LENGTH}}}?(?:</code>)?</pre>))"
123
125
  "|"
124
126
  # 5. Tables (Markdown, grid tables, and HTML tables, with length constraints)
125
- fr"(?:(?:^|\r?\n)(?:\|[^\r\n]{{0,{MAX_TABLE_CELL_LENGTH}}}\|(?:\r?\n\|[-:]{{1,{MAX_TABLE_CELL_LENGTH}}}\|){{0,1}}(?:\r?\n\|[^\r\n]{{0,{MAX_TABLE_CELL_LENGTH}}}\|){{0,{MAX_TABLE_ROWS}}}"
126
- fr"|<table>[\s\S]{{0,{MAX_HTML_TABLE_LENGTH}}}?</table>))"
127
+ rf"(?:(?:^|\r?\n)(?:\|[^\r\n]{{0,{MAX_TABLE_CELL_LENGTH}}}\|(?:\r?\n\|[-:]{{1,{MAX_TABLE_CELL_LENGTH}}}\|){{0,1}}(?:\r?\n\|[^\r\n]{{0,{MAX_TABLE_CELL_LENGTH}}}\|){{0,{MAX_TABLE_ROWS}}}"
128
+ rf"|<table>[\s\S]{{0,{MAX_HTML_TABLE_LENGTH}}}?</table>))"
127
129
  "|"
128
130
  # 6. Horizontal rules (Markdown and HTML hr tag)
129
- fr"(?:^(?:[-*_]){{{MIN_HORIZONTAL_RULE_LENGTH},}}\s*$|<hr\s*/?>\r?\n)"
131
+ rf"(?:^(?:[-*_]){{{MIN_HORIZONTAL_RULE_LENGTH},}}\s*$|<hr\s*/?>\r?\n)"
130
132
  "|"
131
133
  # 7. Standalone lines or phrases (including single-line blocks and HTML elements, with length constraints)
132
- fr"(?:^(?:<[a-zA-Z][^>]{{0,{MAX_HTML_TAG_ATTRIBUTES_LENGTH}}}>)?(?:(?:[^\r\n]{{1,{MAX_STANDALONE_LINE_LENGTH}}}(?:[.!?…]|\.\.\.|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))|(?:[^\r\n]{{1,{MAX_STANDALONE_LINE_LENGTH}}}(?=[\r\n]|$))|(?:[^\r\n]{{1,{MAX_STANDALONE_LINE_LENGTH}}}(?=[.!?…]|\.\.\.|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?:.{{1,{LOOKAHEAD_RANGE}}}(?:[.!?…]|\.\.\.|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))?))(?:</[a-zA-Z]+>)?(?:\r?\n|$))"
134
+ rf"(?:^(?:<[a-zA-Z][^>]{{0,{MAX_HTML_TAG_ATTRIBUTES_LENGTH}}}>)?(?:(?:[^\r\n]{{1,{MAX_STANDALONE_LINE_LENGTH}}}(?:[.!?…]|\.\.\.|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))|(?:[^\r\n]{{1,{MAX_STANDALONE_LINE_LENGTH}}}(?=[\r\n]|$))|(?:[^\r\n]{{1,{MAX_STANDALONE_LINE_LENGTH}}}(?=[.!?…]|\.\.\.|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?:.{{1,{LOOKAHEAD_RANGE}}}(?:[.!?…]|\.\.\.|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))?))(?:</[a-zA-Z]+>)?(?:\r?\n|$))"
133
135
  "|"
134
136
  # 8. Sentences or phrases ending with punctuation (including ellipsis and Unicode punctuation)
135
- fr"(?:(?:[^\r\n]{{1,{MAX_SENTENCE_LENGTH}}}(?:[.!?…]|\.\.\.|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))|(?:[^\r\n]{{1,{MAX_SENTENCE_LENGTH}}}(?=[\r\n]|$))|(?:[^\r\n]{{1,{MAX_SENTENCE_LENGTH}}}(?=[.!?…]|\.\.\.|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?:.{{1,{LOOKAHEAD_RANGE}}}(?:[.!?…]|\.\.\.|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))?))"
137
+ rf"(?:(?:[^\r\n]{{1,{MAX_SENTENCE_LENGTH}}}(?:[.!?…]|\.\.\.|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))|(?:[^\r\n]{{1,{MAX_SENTENCE_LENGTH}}}(?=[\r\n]|$))|(?:[^\r\n]{{1,{MAX_SENTENCE_LENGTH}}}(?=[.!?…]|\.\.\.|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?:.{{1,{LOOKAHEAD_RANGE}}}(?:[.!?…]|\.\.\.|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))?))"
136
138
  "|"
137
139
  # 9. Quoted text, parenthetical phrases, or bracketed content (with length constraints)
138
140
  r"(?:"
139
- fr'(?<!\w)\"\"\"[^\""]{{0,{MAX_QUOTED_TEXT_LENGTH}}}\"\"\"(?!\w)'
141
+ rf'(?<!\w)\"\"\"[^\""]{{0,{MAX_QUOTED_TEXT_LENGTH}}}\"\"\"(?!\w)'
140
142
  r"|(?<!\w)['\"`][^\r\n]{{0,{MAX_QUOTED_TEXT_LENGTH}}}['\"`](?!\w)"
141
- fr"|\([^\r\n(){{0,{MAX_PARENTHETICAL_CONTENT_LENGTH}}}(?:\([^\r\n(){{0,{MAX_PARENTHETICAL_CONTENT_LENGTH}}}\)[^\r\n(){{0,{MAX_PARENTHETICAL_CONTENT_LENGTH}}}){{0,{MAX_NESTED_PARENTHESES}}}\)"
142
- fr"|\[[^\r\n\[\]{{0,{MAX_PARENTHETICAL_CONTENT_LENGTH}}}(?:\[[^\r\n\[\]{{0,{MAX_PARENTHETICAL_CONTENT_LENGTH}}}\][^\r\n\[\]{{0,{MAX_PARENTHETICAL_CONTENT_LENGTH}}}){{0,{MAX_NESTED_PARENTHESES}}}\]"
143
- fr"|\$[^\r\n$]{{0,{MAX_MATH_INLINE_LENGTH}}}\$"
144
- fr"|`[^`\r\n]{{0,{MAX_MATH_INLINE_LENGTH}}}`"
143
+ rf"|\([^\r\n(){{0,{MAX_PARENTHETICAL_CONTENT_LENGTH}}}(?:\([^\r\n(){{0,{MAX_PARENTHETICAL_CONTENT_LENGTH}}}\)[^\r\n(){{0,{MAX_PARENTHETICAL_CONTENT_LENGTH}}}){{0,{MAX_NESTED_PARENTHESES}}}\)"
144
+ rf"|\[[^\r\n\[\]{{0,{MAX_PARENTHETICAL_CONTENT_LENGTH}}}(?:\[[^\r\n\[\]{{0,{MAX_PARENTHETICAL_CONTENT_LENGTH}}}\][^\r\n\[\]{{0,{MAX_PARENTHETICAL_CONTENT_LENGTH}}}){{0,{MAX_NESTED_PARENTHESES}}}\]"
145
+ rf"|\$[^\r\n$]{{0,{MAX_MATH_INLINE_LENGTH}}}\$"
146
+ rf"|`[^`\r\n]{{0,{MAX_MATH_INLINE_LENGTH}}}`"
145
147
  r")"
146
148
  "|"
147
149
  # 10. Paragraphs (with length constraints)
148
- fr"(?:(?:^|\r?\n\r?\n)(?:<p>)?(?:(?:[^\r\n]{{1,{MAX_PARAGRAPH_LENGTH}}}(?:[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))|(?:[^\r\n]{{1,{MAX_PARAGRAPH_LENGTH}}}(?=[\r\n]|$))|(?:[^\r\n]{{1,{MAX_PARAGRAPH_LENGTH}}}(?=[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?:.{{1,{LOOKAHEAD_RANGE}}}(?:[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))?))(?:</p>)?(?=\r?\n\r?\n|$))"
150
+ rf"(?:(?:^|\r?\n\r?\n)(?:<p>)?(?:(?:[^\r\n]{{1,{MAX_PARAGRAPH_LENGTH}}}(?:[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))|(?:[^\r\n]{{1,{MAX_PARAGRAPH_LENGTH}}}(?=[\r\n]|$))|(?:[^\r\n]{{1,{MAX_PARAGRAPH_LENGTH}}}(?=[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?:.{{1,{LOOKAHEAD_RANGE}}}(?:[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))?))(?:</p>)?(?=\r?\n\r?\n|$))"
149
151
  "|"
150
152
  # 11. HTML-like tags and their content (including self-closing tags and attributes, with length constraints)
151
- fr"(?:<[a-zA-Z][^>]{{0,{MAX_HTML_TAG_ATTRIBUTES_LENGTH}}}(?:>[\s\S]{{0,{MAX_HTML_TAG_CONTENT_LENGTH}}}?</[a-zA-Z]+>|\s*/>))"
153
+ rf"(?:<[a-zA-Z][^>]{{0,{MAX_HTML_TAG_ATTRIBUTES_LENGTH}}}(?:>[\s\S]{{0,{MAX_HTML_TAG_CONTENT_LENGTH}}}?</[a-zA-Z]+>|\s*/>))"
152
154
  "|"
153
155
  # 12. LaTeX-style math expressions (inline and block, with length constraints)
154
- fr"(?:(?:\$\$[\s\S]{{0,{MAX_MATH_BLOCK_LENGTH}}}?\$\$)|(?:\$[^\$\r\n]{{0,{MAX_MATH_INLINE_LENGTH}}}\$))"
156
+ rf"(?:(?:\$\$[\s\S]{{0,{MAX_MATH_BLOCK_LENGTH}}}?\$\$)|(?:\$[^\$\r\n]{{0,{MAX_MATH_INLINE_LENGTH}}}\$))"
155
157
  "|"
156
158
  # 13. Fallback for any remaining content (with length constraints)
157
- fr"(?:(?:[^\r\n]{{1,{MAX_STANDALONE_LINE_LENGTH}}}(?:[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))|(?:[^\r\n]{{1,{MAX_STANDALONE_LINE_LENGTH}}}(?=[\r\n]|$))|(?:[^\r\n]{{1,{MAX_STANDALONE_LINE_LENGTH}}}(?=[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?:.{{1,{LOOKAHEAD_RANGE}}}(?:[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))?))"
159
+ rf"(?:(?:[^\r\n]{{1,{MAX_STANDALONE_LINE_LENGTH}}}(?:[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))|(?:[^\r\n]{{1,{MAX_STANDALONE_LINE_LENGTH}}}(?=[\r\n]|$))|(?:[^\r\n]{{1,{MAX_STANDALONE_LINE_LENGTH}}}(?=[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?:.{{1,{LOOKAHEAD_RANGE}}}(?:[.!?…]|\.{{3}}|\u2026\u2047-\u2049|{EMOJI_PATTERN})(?=\s|$))?))"
158
160
  ")",
159
- re.MULTILINE | re.UNICODE
161
+ re.MULTILINE | re.UNICODE,
160
162
  )