streamdown 0.16.0__py3-none-any.whl → 0.18.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
streamdown/sd.py CHANGED
@@ -1,4 +1,4 @@
1
- #!/usr/bin/env -S uv run --script
1
+ #!/bin/bash
2
2
  # /// script
3
3
  # requires-python = ">=3.8"
4
4
  # dependencies = [
@@ -9,6 +9,13 @@
9
9
  # "toml"
10
10
  # ]
11
11
  # ///
12
+ '''':
13
+ if command -v uv &> /dev/null; then
14
+ exec uv run --script "$0" "$@"
15
+ else
16
+ exec python3 "$0" "$@"
17
+ fi
18
+ '''
12
19
  import appdirs, toml
13
20
  import logging, tempfile
14
21
  import os, sys
@@ -29,7 +36,7 @@ from functools import reduce
29
36
  from argparse import ArgumentParser
30
37
  from pygments import highlight
31
38
  from pygments.lexers import get_lexer_by_name
32
- from pygments.formatters import Terminal256Formatter
39
+ from pygments.formatters import TerminalTrueColorFormatter
33
40
  from pygments.styles import get_style_by_name
34
41
 
35
42
  if __package__ is None:
@@ -39,21 +46,23 @@ else:
39
46
 
40
47
  default_toml = """
41
48
  [features]
42
- CodeSpaces = true
49
+ CodeSpaces = false
43
50
  Clipboard = true
44
51
  Logging = false
45
- Timeout = 0.5
52
+ Timeout = 0.1
53
+ Savebrace = true
46
54
 
47
55
  [style]
48
- Margin = 2
49
- ListIndent = 2
50
- PrettyPad = false
51
- Width = 0
56
+ Margin = 2
57
+ ListIndent = 2
58
+ PrettyPad = false
59
+ PrettyBroken = true
60
+ Width = 0
52
61
  HSV = [0.8, 0.5, 0.5]
53
62
  Dark = { H = 1.00, S = 1.50, V = 0.25 }
54
63
  Mid = { H = 1.00, S = 1.00, V = 0.50 }
55
64
  Symbol = { H = 1.00, S = 1.00, V = 1.50 }
56
- Head = { H = 1.00, S = 2.00, V = 1.50 }
65
+ Head = { H = 1.00, S = 1.00, V = 1.75 }
57
66
  Grey = { H = 1.00, S = 0.25, V = 1.37 }
58
67
  Bright = { H = 1.00, S = 2.00, V = 2.00 }
59
68
  Syntax = "monokai"
@@ -90,7 +99,8 @@ ANSIESCAPE = r'\033(?:\[[0-9;?]*[a-zA-Z]|][0-9]*;;.*?\\|\\)'
90
99
  KEYCODE_RE = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
91
100
 
92
101
  visible = lambda x: re.sub(ANSIESCAPE, "", x)
93
- visible_length = lambda x: len(visible(x))
102
+ # cjk characters are double width
103
+ visible_length = lambda x: len(visible(x)) + cjk_count(x)
94
104
  extract_ansi_codes = lambda text: re.findall(ESCAPE, text)
95
105
  remove_ansi = lambda line, codeList: reduce(lambda line, code: line.replace(code, ''), codeList, line)
96
106
 
@@ -102,6 +112,13 @@ def debug_write(text):
102
112
  state.Logging = tempfile.NamedTemporaryFile(dir=tmp_dir, prefix="dbg", delete=False, mode="wb")
103
113
  state.Logging.write(text)
104
114
 
115
+ def savebrace():
116
+ if state.Savebrace and state.code_buffer_raw:
117
+ path = os.path.join(tempfile.gettempdir(), "sd", 'savebrace')
118
+ with open(path, "a") as f:
119
+ f.write(state.code_buffer_raw)
120
+
121
+
105
122
  class Goto(Exception):
106
123
  pass
107
124
 
@@ -134,6 +151,7 @@ class ParseState:
134
151
  self.Clipboard = _features.get("Clipboard")
135
152
  self.Logging = _features.get("Logging")
136
153
  self.Timeout = _features.get("Timeout")
154
+ self.Savebrace = _features.get("Savebrace")
137
155
 
138
156
  self.WidthArg = None
139
157
  self.WidthFull = None
@@ -149,6 +167,7 @@ class ParseState:
149
167
  # streaming code blocks while preserving
150
168
  # multiline parsing.
151
169
  self.code_buffer = ""
170
+ self.code_buffer_raw = ""
152
171
  self.code_gen = 0
153
172
  self.code_language = None
154
173
  self.code_first_line = False
@@ -157,6 +176,7 @@ class ParseState:
157
176
 
158
177
  self.ordered_list_numbers = []
159
178
  self.list_item_stack = [] # stack of (indent, type)
179
+ self.list_indent_text = 0
160
180
 
161
181
  self.in_list = False
162
182
  self.in_code = False # (Code.[Backtick|Spaces] | False)
@@ -177,15 +197,22 @@ class ParseState:
177
197
  self.where_from = None
178
198
 
179
199
  def current(self):
180
- state = { 'inline': self.inline_code, 'code': self.in_code, 'bold': self.in_bold, 'italic': self.in_italic, 'underline': self.in_underline }
200
+ state = { 'inline': self.inline_code, 'code': self.in_code, 'bold': self.in_bold, 'italic': self.in_italic, 'underline': self.in_underline, 'strikeout': self.in_strikeout }
181
201
  state['none'] = all(item is False for item in state.values())
182
202
  return state
183
203
 
184
204
  def reset_inline(self):
185
- self.inline_code = self.in_bold = self.in_italic = self.in_underline = False
205
+ self.inline_code = self.in_bold = self.in_italic = self.in_underline = self.in_strikeout = False
206
+
207
+ def full_width(self, offset = 0):
208
+ return offset + (state.current_width(listwidth = True) if Style.PrettyBroken else self.WidthFull)
209
+
210
+ def current_width(self, listwidth = False):
211
+ return self.Width - (len(visible(self.space_left(listwidth))) + Style.Margin)
186
212
 
187
- def space_left(self):
188
- return Style.MarginSpaces + (Style.Blockquote * self.block_depth) if len(self.current_line) == 0 else ""
213
+ def space_left(self, listwidth = False):
214
+ pre = ' ' * (len(state.list_item_stack)) * Style.ListIndent if listwidth else ''
215
+ return pre + Style.MarginSpaces + (Style.Blockquote * self.block_depth) if len(self.current_line) == 0 else ""
189
216
 
190
217
  state = ParseState()
191
218
 
@@ -196,7 +223,7 @@ def format_table(rowList):
196
223
 
197
224
  # Calculate max width per column (integer division)
198
225
  # Subtract num_cols + 1 for the vertical borders '│'
199
- available_width = state.Width - (num_cols + 1)
226
+ available_width = state.current_width() - (num_cols + 1)
200
227
  col_width = max(1, available_width // num_cols)
201
228
  bg_color = Style.Mid if state.in_table == Style.Head else Style.Dark
202
229
  state.bg = f"{BG}{bg_color}"
@@ -205,7 +232,7 @@ def format_table(rowList):
205
232
  # Note this is where every cell is formatted so if
206
233
  # you are styling, do it before here!
207
234
  for row in rowList:
208
- wrapped_cell = text_wrap(row, width=col_width)
235
+ wrapped_cell = text_wrap(row, width=col_width, force_truncate=True)
209
236
 
210
237
  # Ensure at least one line, even for empty cells
211
238
  if not wrapped_cell:
@@ -234,17 +261,17 @@ def format_table(rowList):
234
261
  # Correct indentation: This should be outside the c_idx loop
235
262
  joined_line = f"{BG}{bg_color}{extra}{FG}{Style.Symbol}│{RESET}".join(line_segments)
236
263
  # Correct indentation and add missing characters
237
- yield f"{Style.MarginSpaces}{joined_line}{RESET}"
264
+ yield f"{state.space_left()}{FGRESET}{joined_line}{RESET}"
238
265
 
239
266
  state.bg = BGRESET
240
267
 
241
268
  def emit_h(level, text):
242
269
  text = line_format(text)
243
- spaces_to_center = ((state.Width - visible_length(text)) / 2)
270
+ spaces_to_center = (state.current_width() - visible_length(text)) / 2
244
271
  if level == 1: #
245
- return f"\n{state.space_left()}{BOLD[0]}{' ' * math.floor(spaces_to_center)}{text}{' ' * math.ceil(spaces_to_center)}{BOLD[1]}\n"
272
+ return f"{state.space_left()}\n{state.space_left()}{BOLD[0]}{' ' * math.floor(spaces_to_center)}{text}{BOLD[1]}"
246
273
  elif level == 2: ##
247
- return f"\n{state.space_left()}{BOLD[0]}{FG}{Style.Bright}{' ' * math.floor(spaces_to_center)}{text}{' ' * math.ceil(spaces_to_center)}{RESET}\n\n"
274
+ return f"{state.space_left()}\n{state.space_left()}{BOLD[0]}{FG}{Style.Bright}{' ' * math.floor(spaces_to_center)}{text}{' ' * math.ceil(spaces_to_center)}{BOLD[1]}{FGRESET}"
248
275
  elif level == 3: ###
249
276
  return f"{state.space_left()}{FG}{Style.Head}{BOLD[0]}{text}{RESET}"
250
277
  elif level == 4: ####
@@ -253,13 +280,13 @@ def emit_h(level, text):
253
280
  return f"{state.space_left()}{text}{RESET}"
254
281
 
255
282
  def code_wrap(text_in):
256
- if state.WidthWrap and len(text_in) > state.WidthFull:
283
+ if not Style.PrettyBroken and state.WidthWrap and len(text_in) > state.full_width():
257
284
  return (0, [text_in])
258
285
 
259
286
  # get the indentation of the first line
260
287
  indent = len(text_in) - len(text_in.lstrip())
261
288
  text = text_in.lstrip()
262
- mywidth = state.WidthFull - indent
289
+ mywidth = state.full_width(-4 if Style.PrettyBroken else 0) - indent
263
290
 
264
291
  # We take special care to preserve empty lines
265
292
  if len(text) == 0:
@@ -277,11 +304,8 @@ def code_wrap(text_in):
277
304
  def ansi_collapse(codelist, inp):
278
305
  # We break SGR strings into various classes concerning their applicate or removal
279
306
  nums = {
280
- 'fg': r'3\d',
281
- 'bg': r'4\d',
282
- 'b': r'2?1',
283
- 'i': r'2?3',
284
- 'u': r'2?2',
307
+ 'fg': r'3\d', 'bg': r'4\d',
308
+ 'b': r'2?[12]', 'i': r'2?3', 'u': r'3?2',
285
309
  'reset': '0'
286
310
  }
287
311
 
@@ -311,12 +335,19 @@ def ansi_collapse(codelist, inp):
311
335
 
312
336
  return codelist + inp
313
337
 
314
- def text_wrap(text, width = -1, indent = 0, first_line_prefix="", subsequent_line_prefix=""):
338
+
339
+ def split_text(text):
340
+ return re.split(
341
+ r'(?<=[\u4E00-\u9FFF\u3400-\u4DBF\uF900-\uFAFF])|(?=[\u4E00-\u9FFF\u3400-\u4DBF\uF900-\uFAFF])|\s+',
342
+ text
343
+ )
344
+
345
+ def text_wrap(text, width = -1, indent = 0, first_line_prefix="", subsequent_line_prefix="", force_truncate=False):
315
346
  if width == -1:
316
347
  width = state.Width
317
348
 
318
349
  # The empty word clears the buffer at the end.
319
- words = line_format(text).split() + [""]
350
+ words = split_text(line_format(text)) + [""]
320
351
  lines = []
321
352
  current_line = ""
322
353
  current_style = []
@@ -329,13 +360,21 @@ def text_wrap(text, width = -1, indent = 0, first_line_prefix="", subsequent_lin
329
360
  current_style.append(codes.pop(0))
330
361
 
331
362
  if len(word) and visible_length(current_line) + visible_length(word) + 1 <= width: # +1 for space
332
- current_line += (" " if current_line else "") + word
363
+
364
+ current_line += (" " if len(visible(word)) > 0 and current_line and not cjk_count(word) else "") + word
333
365
  else:
334
366
  # Word doesn't fit, finalize the previous line
335
367
  prefix = first_line_prefix if not lines else subsequent_line_prefix
336
368
  line_content = prefix + current_line
369
+ # This is expensive, fix.
370
+ while force_truncate and visible_length(line_content) >= width:
371
+ line_content = line_content[:len(line_content) - 2] + "…"
372
+
337
373
  margin = max(0, width - visible_length(line_content))
338
- lines.append(line_content + state.bg + ' ' * margin)
374
+
375
+ if line_content.strip() != "":
376
+ lines.append(line_content + state.bg + ' ' * margin)
377
+
339
378
  current_line = (" " * indent) + "".join(current_style) + word
340
379
 
341
380
  if len(codes):
@@ -349,8 +388,20 @@ def text_wrap(text, width = -1, indent = 0, first_line_prefix="", subsequent_lin
349
388
 
350
389
  return lines
351
390
 
391
+ def cjk_count(s):
392
+ cjk_re = re.compile(
393
+ r'[\u4E00-\u9FFF' # CJK Unified Ideographs
394
+ r'|\u3400-\u4DBF' # CJK Unified Ideographs Extension A
395
+ r'|\uF900-\uFAFF' # CJK Compatibility Ideographs
396
+ r'|\uFF00-\uFFEF' # CJK Compatibility Punctuation
397
+ r'|\u3000-\u303F' # CJK Symbols and Punctuation
398
+ r'|\U0002F800-\U0002FA1F]' # CJK Compatibility Ideographs Supplement
399
+ )
400
+
401
+ return len(cjk_re.findall(visible(s)))
402
+
352
403
  def line_format(line):
353
- not_text = lambda token: not token or len(token.rstrip()) != len(token)
404
+ not_text = lambda token: not (token.isalnum() or token == '\\') or cjk_count(token)
354
405
  footnotes = lambda match: ''.join([chr(SUPER[int(i)]) for i in match.group(1)])
355
406
 
356
407
  def process_images(match):
@@ -379,7 +430,7 @@ def line_format(line):
379
430
  result = ""
380
431
 
381
432
  for match in tokenList:
382
- token = match.group(1)
433
+ token = re.sub(r'\s+',' ', match.group(1))
383
434
  next_token = line[match.end()] if match.end() < len(line) else ""
384
435
  prev_token = line[match.start()-1] if match.start() > 0 else ""
385
436
 
@@ -435,6 +486,7 @@ def parse(stream):
435
486
  last_line_empty_cache = None
436
487
  byte = None
437
488
  TimeoutIx = 0
489
+ lexer = None
438
490
  while True:
439
491
  if state.is_pty or state.is_exec:
440
492
  byte = None
@@ -449,7 +501,7 @@ def parse(stream):
449
501
  state.exec_kb += 1
450
502
  os.write(state.exec_master, byte)
451
503
 
452
- if byte == b'\n':
504
+ if byte in [b'\n', b'\r']:
453
505
  state.buffer = b''
454
506
  print("")
455
507
  state.exec_kb = 0
@@ -484,7 +536,7 @@ def parse(stream):
484
536
 
485
537
  if not (byte == b'\n' or byte is None): continue
486
538
 
487
- line = state.buffer.decode('utf-8')
539
+ line = state.buffer.decode('utf-8').replace('\t',' ')
488
540
  state.has_newline = line.endswith('\n')
489
541
  # I hate this. There should be better ways.
490
542
  state.maybe_prompt = not state.has_newline and state.current()['none'] and re.match(r'^.*>\s+$', visible(line))
@@ -525,7 +577,7 @@ def parse(stream):
525
577
  line = line[len(block_match.group(0)):]
526
578
  else:
527
579
  if state.block_depth > 0:
528
- line = FGRESET + line
580
+ yield FGRESET
529
581
  state.block_depth = 0
530
582
 
531
583
  # --- Collapse Multiple Empty Lines if not in code blocks ---
@@ -546,15 +598,16 @@ def parse(stream):
546
598
  # \n buffer
547
599
  if not state.in_list and len(state.ordered_list_numbers) > 0:
548
600
  state.ordered_list_numbers[0] = 0
549
- else:
601
+ elif (not line.startswith(' ' * state.list_indent_text)) and line.strip() != "":
550
602
  state.in_list = False
603
+ state.list_indent_text = 0
551
604
 
552
605
  if state.first_indent is None:
553
606
  state.first_indent = len(line) - len(line.lstrip())
554
607
  if len(line) - len(line.lstrip()) >= state.first_indent:
555
608
  line = line[state.first_indent:]
556
609
  else:
557
- logging.warning("Indentation decreased from first line.")
610
+ logging.debug("Indentation decreased from first line.")
558
611
 
559
612
 
560
613
  # Indent guaranteed
@@ -564,9 +617,7 @@ def parse(stream):
564
617
  if state.in_table and not state.in_code and not re.match(r"^\s*\|.+\|\s*$", line):
565
618
  state.in_table = False
566
619
 
567
- #
568
620
  # <code><pre>
569
- #
570
621
  if not state.in_code:
571
622
  code_match = re.match(r"^\s*```\s*([^\s]+|$)\s*$", line)
572
623
  if code_match:
@@ -581,7 +632,8 @@ def parse(stream):
581
632
  state.code_language = 'Bash'
582
633
 
583
634
  if state.in_code:
584
- state.code_buffer = ""
635
+ savebrace()
636
+ state.code_buffer = state.code_buffer_raw = ""
585
637
  state.code_gen = 0
586
638
  state.code_first_line = True
587
639
  state.bg = f"{BG}{Style.Dark}"
@@ -609,7 +661,7 @@ def parse(stream):
609
661
  logging.warning(f"Can't find canonical extension for {state.code_language}")
610
662
  pass
611
663
 
612
- open(os.path.join(state.scrape, f"file_{state.scrape_ix}.{ext}"), 'w').write(state.code_buffer)
664
+ open(os.path.join(state.scrape, f"file_{state.scrape_ix}.{ext}"), 'w').write(state.code_buffer_raw)
613
665
  state.scrape_ix += 1
614
666
 
615
667
  state.code_language = None
@@ -624,7 +676,9 @@ def parse(stream):
624
676
 
625
677
  logging.debug(f"code: {state.in_code}")
626
678
  state.emit_flush = True
627
- yield RESET
679
+ # We suppress the newline - it's not an explicit style
680
+ #state.has_newline = False
681
+ #yield RESET
628
682
 
629
683
  if code_type == Code.Backtick:
630
684
  continue
@@ -633,7 +687,7 @@ def parse(stream):
633
687
  # nor do we want to be here.
634
688
  raise Goto()
635
689
 
636
- if state.code_first_line:
690
+ if state.code_first_line or lexer is None:
637
691
  state.code_first_line = False
638
692
  try:
639
693
  lexer = get_lexer_by_name(state.code_language)
@@ -642,14 +696,16 @@ def parse(stream):
642
696
  lexer = get_lexer_by_name("Bash")
643
697
  custom_style = get_style_by_name("default")
644
698
 
645
- formatter = Terminal256Formatter(style=custom_style)
646
- line = line[state.code_indent :]
699
+ formatter = TerminalTrueColorFormatter(style=custom_style)
700
+ if line.startswith(' ' * state.code_indent):
701
+ line = line[state.code_indent :]
647
702
 
648
703
  elif line.startswith(" " * state.code_indent):
649
704
  line = line[state.code_indent :]
650
705
 
651
706
  # By now we have the properly stripped code line
652
707
  # in the line variable. Add it to the buffer.
708
+ state.code_buffer_raw += line
653
709
  state.code_line += line
654
710
  if state.code_line.endswith('\n'):
655
711
  line = state.code_line
@@ -660,6 +716,8 @@ def parse(stream):
660
716
  indent, line_wrap = code_wrap(line)
661
717
 
662
718
  state.where_from = "in code"
719
+ pre = [state.space_left(listwidth = True), ' '] if Style.PrettyBroken else ['', '']
720
+
663
721
  for tline in line_wrap:
664
722
  # wrap-around is a bunch of tricks. We essentially format longer and longer portions of code. The problem is
665
723
  # the length can change based on look-ahead context so we need to use our expected place (state.code_gen) and
@@ -669,7 +727,7 @@ def parse(stream):
669
727
 
670
728
  # Sometimes the highlighter will do things like a full reset or a background reset.
671
729
  # This is not what we want
672
- highlighted_code = re.sub(r"\033\[39(;00|)m", '', highlighted_code)
730
+ highlighted_code = re.sub(r"\033\[49(;00|)m", '', highlighted_code)
673
731
 
674
732
  # Since we are streaming we ignore the resets and newlines at the end
675
733
  if highlighted_code.endswith(FGRESET + "\n"):
@@ -694,8 +752,8 @@ def parse(stream):
694
752
 
695
753
  code_line = ' ' * indent + this_batch.strip()
696
754
 
697
- margin = state.WidthFull - visible_length(code_line) % state.WidthFull
698
- yield f"{Style.Codebg}{code_line}{' ' * max(0, margin)}{BGRESET}"
755
+ margin = state.full_width( -len(pre[1]) ) - visible_length(code_line) % state.WidthFull
756
+ yield f"{pre[0]}{Style.Codebg}{pre[1]}{code_line}{' ' * max(0, margin)}{BGRESET}"
699
757
  continue
700
758
  except Goto:
701
759
  pass
@@ -705,9 +763,7 @@ def parse(stream):
705
763
  traceback.print_exc()
706
764
  pass
707
765
 
708
- #
709
766
  # <table>
710
- #
711
767
  if re.match(r"^\s*\|.+\|\s*$", line) and not state.in_code:
712
768
  cells = [c.strip() for c in line.strip().strip("|").split("|")]
713
769
 
@@ -729,14 +785,28 @@ def parse(stream):
729
785
  yield from format_table(cells)
730
786
  continue
731
787
 
732
- #
733
788
  # <li> <ul> <ol>
734
789
  # llama-4 maverick uses + and +- for lists ... for some reason
735
- list_item_match = re.match(r"^(\s*)([\+*\-]|\+\-+|\d+\.)\s+(.*)", line)
790
+ content = line
791
+ bullet = ' '
792
+ list_item_match = re.match(r"^(\s*)([\+*\-] |\+\-+|\d+\.\s+)(.*)", line)
736
793
  if list_item_match:
794
+ # llama 4 maverick does this weird output like this
795
+ # 1. blah blah blah
796
+ # this should be a list
797
+ #
798
+ # ```bash
799
+ # blah blah
800
+ # ```
801
+ #
802
+ # still in the list
803
+ # We do this here so that the first line which is the bullet
804
+ # line gets the proper hang
805
+ state.list_indent_text = len(list_item_match.group(2)) - 1
737
806
  state.in_list = True
738
807
 
739
808
  indent = len(list_item_match.group(1))
809
+
740
810
  list_type = "number" if list_item_match.group(2)[0].isdigit() else "bullet"
741
811
  content = list_item_match.group(3)
742
812
 
@@ -756,34 +826,34 @@ def parse(stream):
756
826
  if list_type == "number":
757
827
  state.ordered_list_numbers[-1] += 1
758
828
 
759
- indent = (len(state.list_item_stack) - 1) * 2
760
-
761
- wrap_width = state.Width - indent - (2 * Style.ListIndent)
762
-
763
829
  bullet = '•'
764
830
  if list_type == "number":
765
831
  list_number = int(max(state.ordered_list_numbers[-1], float(list_item_match.group(2))))
766
832
  bullet = str(list_number)
833
+
834
+ # This is intentional ... we can get here in llama 4 using
835
+ # a weird thing
836
+ if state.in_list:
837
+ indent = (len(state.list_item_stack) - 1) * Style.ListIndent + (len(bullet) - 1)
838
+ wrap_width = state.current_width() - indent - (2 * Style.ListIndent)
767
839
 
768
840
  wrapped_lineList = text_wrap(content, wrap_width, Style.ListIndent,
769
- first_line_prefix = f"{(' ' * (indent ))}{FG}{Style.Symbol}{bullet}{RESET} ",
841
+ first_line_prefix = f"{(' ' * indent)}{FG}{Style.Symbol}{bullet}{RESET} ",
770
842
  subsequent_line_prefix = " " * (indent)
771
843
  )
772
844
  for wrapped_line in wrapped_lineList:
773
845
  yield f"{state.space_left()}{wrapped_line}\n"
846
+
774
847
  continue
775
- #
848
+
776
849
  # <h1> ... <h6>
777
- #
778
850
  header_match = re.match(r"^\s*(#{1,6})\s+(.*)", line)
779
851
  if header_match:
780
852
  level = len(header_match.group(1))
781
853
  yield emit_h(level, header_match.group(2))
782
854
  continue
783
855
 
784
- #
785
856
  # <hr>
786
- #
787
857
  hr_match = re.match(r"^[\s]*([-\*=_]){3,}[\s]*$", line)
788
858
  if hr_match:
789
859
  if state.last_line_empty or last_line_empty_cache:
@@ -800,7 +870,7 @@ def parse(stream):
800
870
  if len(line) == 0: yield ""
801
871
  if len(line) < state.Width:
802
872
  # we want to prevent word wrap
803
- yield f"{state.space_left()}{line_format(line)}"
873
+ yield f"{state.space_left()}{line_format(line.lstrip())}"
804
874
  else:
805
875
  wrapped_lines = text_wrap(line)
806
876
  for wrapped_line in wrapped_lines:
@@ -845,10 +915,10 @@ def emit(inp):
845
915
  else:
846
916
  chunk = buffer.pop(0)
847
917
 
848
- print(chunk, end="", flush=True)
918
+ print(chunk, end="", file=sys.stdout, flush=True)
849
919
 
850
920
  if len(buffer):
851
- print(buffer.pop(0), end="", flush=True)
921
+ print(buffer.pop(0), file=sys.stdout, end="", flush=True)
852
922
 
853
923
  def apply_multipliers(name, H, S, V):
854
924
  m = _style.get(name)
@@ -871,15 +941,16 @@ def width_calc():
871
941
  state.WidthFull = width
872
942
 
873
943
  state.Width = state.WidthFull - 2 * Style.Margin
944
+ pre = state.space_left(listwidth=True) if Style.PrettyBroken else ''
874
945
  Style.Codepad = [
875
- f"{RESET}{FG}{Style.Dark}{'▄' * state.WidthFull}{RESET}\n",
876
- f"{RESET}{FG}{Style.Dark}{'▀' * state.WidthFull}{RESET}"
946
+ f"{pre}{RESET}{FG}{Style.Dark}{'▄' * state.full_width()}{RESET}\n",
947
+ f"{pre}{RESET}{FG}{Style.Dark}{'▀' * state.full_width()}{RESET}"
877
948
  ]
878
949
 
879
950
  def main():
880
951
  global H, S, V
881
952
 
882
- parser = ArgumentParser(description="Streamdown - A markdown renderer for modern terminals")
953
+ parser = ArgumentParser(description="Streamdown - A Streaming markdown renderer for modern terminals")
883
954
  parser.add_argument("filenameList", nargs="*", help="Input file to process (also takes stdin)")
884
955
  parser.add_argument("-l", "--loglevel", default="INFO", help="Set the logging level")
885
956
  parser.add_argument("-c", "--color", default=None, help="Set the hsv base: h,s,v")
@@ -896,20 +967,20 @@ def main():
896
967
 
897
968
  for color in ["Dark", "Mid", "Symbol", "Head", "Grey", "Bright"]:
898
969
  setattr(Style, color, apply_multipliers(color, H, S, V))
899
- for attr in ['Margin', 'ListIndent', 'Syntax']:
970
+ for attr in ['PrettyBroken', 'Margin', 'ListIndent', 'Syntax']:
900
971
  setattr(Style, attr, _style.get(attr))
901
-
972
+
902
973
  if args.scrape:
903
974
  os.makedirs(args.scrape, exist_ok=True)
904
975
  state.scrape = args.scrape
905
976
 
906
977
  Style.MarginSpaces = " " * Style.Margin
907
978
  state.WidthArg = int(args.width) or _style.get("Width") or 0
979
+ Style.Blockquote = f"{FG}{Style.Grey}│ "
908
980
  width_calc()
909
981
 
910
982
  Style.Codebg = f"{BG}{Style.Dark}"
911
983
  Style.Link = f"{FG}{Style.Symbol}{UNDERLINE[0]}"
912
- Style.Blockquote = f"{FG}{Style.Grey}│ "
913
984
 
914
985
  logging.basicConfig(stream=sys.stdout, level=args.loglevel.upper(), format=f'%(message)s')
915
986
  state.exec_master, state.exec_slave = pty.openpty()
@@ -922,6 +993,7 @@ def main():
922
993
  os.close(state.exec_slave) # We don't need slave in parent
923
994
  # Set stdin to raw mode so we don't need to press enter
924
995
  tty.setcbreak(sys.stdin.fileno())
996
+ sys.stdout.write("\x1b[?7h")
925
997
  emit(sys.stdin)
926
998
 
927
999
  elif args.filenameList:
@@ -950,15 +1022,14 @@ def main():
950
1022
  logging.warning(f"Exception thrown: {type(ex)} {ex}")
951
1023
  traceback.print_exc()
952
1024
 
953
- if state.Clipboard and state.code_buffer:
954
- code = state.code_buffer
1025
+ if state.Clipboard and state.code_buffer_raw:
1026
+ code = state.code_buffer_raw
955
1027
  # code needs to be a base64 encoded string before emitting
956
1028
  code_bytes = code.encode('utf-8')
957
1029
  base64_bytes = base64.b64encode(code_bytes)
958
1030
  base64_string = base64_bytes.decode('utf-8')
959
1031
  print(f"\033]52;c;{base64_string}\a", end="", flush=True)
960
1032
 
961
-
962
1033
  if state.terminal:
963
1034
  termios.tcsetattr(sys.stdin, termios.TCSADRAIN, state.terminal)
964
1035
  os.close(state.exec_master)
streamdown/ss ADDED
@@ -0,0 +1 @@
1
+ * **Model Card:** Always read the model card on the Hugging Face Hub ([https://huggingface.co/microsoft/bitnet-b1.58-2B-4T](https://huggingface.co/microsoft/bitnet-b1.58-2B-4T)) for important information about the model, its intended use, limitations, and potential biases.
streamdown/ss1 ADDED
@@ -0,0 +1,42 @@
1
+ * `model.safetensors`: The name of the file you want to download. You'll need to know the exact filename. You can find the files in the model repository on the Hugging Face Hub website ([https://huggingface.co/microsoft/bitnet-b1.58-2B-4T](https://huggingface.co/microsoft/bitnet-b1.58-2B-4T)). Look under the "Files and versions" tab. `safetensors` is the preferred format for model weights now. If it's a `.bin` file, you can download that instead.
2
+ * `--local-dir ./bitnet-b1.58-2B-4T`: The directory to save the file to.
3
+
4
+ * **Download using `transformers` library (recommended for most use cases):**
5
+
6
+ The `transformers` library provides a convenient way to download and cache models. This is often the easiest approach if you're using the model with `transformers`. You don't *directly* use the `huggingface-cli` for this, but it's worth knowing.
7
+
8
+ ```python
9
+ from transformers import AutoModelForCausalLM, AutoTokenizer
10
+
11
+ model_name = "microsoft/bitnet-b1.58-2B-4T"
12
+
13
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
14
+ model = AutoModelForCausalLM.from_pretrained(model_name)
15
+
16
+ # The model and tokenizer will be downloaded and cached in your
17
+ # transformers cache directory (usually ~/.cache/huggingface/transformers).
18
+ ```
19
+
20
+ This approach automatically handles downloading the necessary files and caching them for future use. It also handles the correct file formats and configurations.
21
+
22
+ **4. Checking the Download**
23
+
24
+ After the download completes, verify that the files are in the specified directory. You can use `ls` (Linux/macOS) or `dir` (Windows) to list the contents of the directory.
25
+
26
+ **Important Considerations:**
27
+
28
+ * **Disk Space:** The `bitnet-b1.58-2B-4T` model is quite large (several gigabytes). Make sure you have enough free disk space before downloading.
29
+ * **Network Connection:** A stable and fast internet connection is essential for a smooth download.
30
+ * **Caching:** The Hugging Face Hub and `transformers` library use caching to avoid re-downloading models unnecessarily. The default cache directory is usually `~/.cache/huggingface/transformers`.
31
+ * **File Formats:** Models are often stored in `safetensors` or `.bin` formats. `safetensors` is generally preferred for security and performance.
32
+ * **Model Card:** Always read the model card on the Hugging Face Hub ([https://huggingface.co/microsoft/bitnet-b1.58-2B-4T](https://huggingface.co/microsoft/bitnet-b1.58-2B-4T)) for important information about the model, its intended use, limitations, and potential biases.
33
+ * **Gated Models:** Some models require you to accept terms of use before you can download them. The `huggingface-cli login` command will guide you through this process if necessary.
34
+
35
+ **Example Workflow (Recommended):**
36
+
37
+ 1. `huggingface-cli login` (if not already logged in)
38
+ 2. Use the `transformers` library in a Python script to download and load the model (as shown in the example above). This is the most convenient and reliable method for most use cases.
39
+
40
+ Let me know if you have any other questions or if you'd like help with a specific task related to this model!
41
+
42
+ >
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: streamdown
3
- Version: 0.16.0
3
+ Version: 0.18.0
4
4
  Summary: A streaming markdown renderer for modern terminals with syntax highlighting
5
5
  Project-URL: Homepage, https://github.com/kristopolous/Streamdown
6
6
  Project-URL: Bug Tracker, https://github.com/kristopolous/Streamdown/issues
@@ -31,18 +31,22 @@ Description-Content-Type: text/markdown
31
31
  <img src=https://github.com/user-attachments/assets/0468eac0-2a00-4e98-82ca-09e6ac679357/>
32
32
  <br/>
33
33
  <a href=https://pypi.org/project/streamdown><img src=https://badge.fury.io/py/streamdown.svg/></a>
34
+ <br/><strong>Terminal streaming markdown that rocks</strong>
35
+
34
36
  </p>
35
37
 
36
- The streaming markdown renderer for the terminal that rocks!
37
- Streamdown works with [simonw's llm](https://github.com/simonw/llm) along with any other streaming markdown. You even get full readline and keyboard navigation support.
38
+
39
+ Streamdown works with [simonw's llm](https://github.com/simonw/llm) along with any other streaming markdown, even something basic like curl.
40
+ It supports standard piping like any normal pager and a clean `execvp` option for robustly wrapping around interactive programs with readline or their own ANSI stuff to manage.
38
41
  ```bash
39
42
  $ pip install streamdown
40
43
  ```
41
44
  ![Streamdown is Amazing](https://github.com/user-attachments/assets/268cb340-78cc-4df0-a773-c5ac95eceeeb)
42
45
 
43
46
  ### Provides clean copyable code for long code lines
44
- You may have noticed *inferior* renderers inject line breaks when copying code that wraps around. We're better and now you are too!
47
+ Some *inferior* renderers inject line breaks when copying code that wraps around. We're better and now you are too!
45
48
  ![Handle That Mandle](https://github.com/user-attachments/assets/a27aa70c-f691-4796-84f0-c2eb18c7de23)
49
+ **Tip**: You can make things prettier if you don't mind if this guarantee is broken. See the `PrettyBroken` flag below!
46
50
 
47
51
  ### Supports images
48
52
  Here's kitty and alacritty. Try to do that in glow...
@@ -57,6 +61,9 @@ Here's kitty and alacritty. Try to do that in glow...
57
61
  As well as everything else...
58
62
  ![dunder](https://github.com/user-attachments/assets/d41d7fec-6dec-4387-b53d-f2098f269a5e)
59
63
 
64
+ Very ... Carefully ... Supported ...
65
+ ![cjk1](https://github.com/user-attachments/assets/75162ade-4734-440e-aaa3-5ffc17a0dd46)
66
+
60
67
  ### Colors are highly (and quickly) configurable for people who care a lot, or just a little.
61
68
  ![configurable](https://github.com/user-attachments/assets/19ca2ec9-8ea1-4a79-87ca-8352789269fe)
62
69
 
@@ -67,7 +74,7 @@ For instance, here is the [latex plugin](https://github.com/kristopolous/Streamd
67
74
 
68
75
  ## TOML Configuration
69
76
 
70
- Streamdown uses a TOML configuration file located at `~/.config/streamdown/config.toml` (following the XDG Base Directory Specification). If this file does not exist upon first run, it will be created with default values.
77
+ It's located at `~/.config/streamdown/config.toml` (following the XDG Base Directory Specification). If this file does not exist upon first run, it will be created with default values.
71
78
 
72
79
  Here are the sections:
73
80
 
@@ -85,12 +92,15 @@ Defines the base Hue (H), Saturation (S), and Value (V) from which all other pal
85
92
  * `Margin` (integer, default: `2`): The left and right indent for the output.
86
93
  * `Width` (integer, default: `0`): Along with the `Margin`, `Width` specifies the base width of the content, which when set to 0, means use the terminal width. See [#6](https://github.com/kristopolous/Streamdown/issues/6) for more details
87
94
  * `PrettyPad` (boolean, default: `false`): Uses a unicode vertical pad trick to add a half height background to code blocks. This makes copy/paste have artifacts. See [#2](https://github.com/kristopolous/Streamdown/issues/2). I like it on. But that's just me
95
+ * `PrettyBroken` (boolean, default: `false`): This will break the copy/paste assurance above. The output is much prettier, but it's also broken. So it's pretty broken. Works nicely with PrettyPad.
88
96
  * `ListIndent` (integer, default: `2`): This is the recursive indent for the list styles.
89
97
  * `Syntax` (string, default `monokai`): This is the syntax [highlighting theme which come via pygments](https://pygments.org/styles/).
90
98
 
91
99
  Example:
92
100
  ```toml
93
101
  [style]
102
+ PrettyPad = true
103
+ PrettyBroken = true
94
104
  HSV = [0.7, 0.5, 0.5]
95
105
  Dark = { H = 1.0, S = 1.2, V = 0.25 } # Make dark elements less saturated and darker
96
106
  Symbol = { H = 1.0, S = 1.8, V = 1.8 } # Make symbols more vibrant
@@ -103,16 +113,13 @@ Controls optional features:
103
113
  * `CodeSpaces` (boolean, default: `true`): Enables detection of code blocks indented with 4 spaces. Set to `false` to disable this detection method (triple-backtick blocks still work).
104
114
  * `Clipboard` (boolean, default: `true`): Enables copying the last code block encountered to the system clipboard using OSC 52 escape sequences upon exit. Set to `false` to disable.
105
115
  * `Logging` (boolean, default: `false`): Enables logging to tmpdir (/tmp/sd) of the raw markdown for debugging and bug reporting. The logging uses an emoji as a record separator so the actual streaming delays can be simulated and replayed. If you use the `filename` based invocation, that is to say, `sd <filename>`, this type of logging is always off.
106
- * `Timeout` (float, default: `0.5`): This is a workaround to the [buffer parsing bugs](https://github.com/kristopolous/Streamdown/issues/4). By increasing the select timeout, the parser loop only gets triggerd on newline which means that having to resume from things like a code block, inside a list, inside a table, between buffers, without breaking formatting doesn't need to be done. I assert (2025-04-09) this is no longer a bug. Feel free to turn on `Logging` and post an issue if you find a repeatable one.
116
+ * `Savebrace` (boolean, default: `true`): Saves the code blocks of a conversation to the append file `/tmp/sd/savebrace` so you can fzf or whatever you want through it. See how it's used in my [llmehelp](https://github.com/kristopolous/llmehelp) scripts, specifically `screen-query` and `sd-picker`.
107
117
 
108
118
  Example:
109
119
  ```toml
110
120
  [features]
111
121
  CodeSpaces = false
112
122
  Clipboard = false
113
- Margin = 4
114
- Width = 120
115
- Timeout = 1.0
116
123
  ```
117
124
 
118
125
  ## Command Line
@@ -146,7 +153,7 @@ Do this
146
153
  $ ./streamdown/sd.py tests/*md
147
154
 
148
155
  ## Install from source
149
- After the git clone least one of these should work, hopefully. it's using the modern uv pip tool.
156
+ After the git clone least one of these should work, hopefully. it's using the modern uv pip tool but is also backwards compatible to the `pip3 install -r requirements.txt` flow.
150
157
 
151
158
  $ pipx install -e .
152
159
  $ pip install -e .
@@ -154,9 +161,5 @@ After the git clone least one of these should work, hopefully. it's using the mo
154
161
 
155
162
  ### Future work
156
163
 
157
- #### CSS
158
- I'm really considering using `tinycss2` and making an actual stylesheet engine. This is related to another problem - getting a modern HTML renderer in the terminal that is actually navigable. I *think* it's probably a separate project.
159
-
160
- #### scrape
161
- This is already partially implemented. The idea is every code block can get extracted and put in a directory so you can have a conversation to generate every piece of a project, similar to Aider, Claude or Goose, but in the most hands-off yet still convenient way possible.
162
-
164
+ #### Glow styles
165
+ I'm going to try to be compatible with other popular markdown styles to help for a smoother transition. Glow compatible json sheets is on my radar. There's also mdless and frogmouth. Might be others
@@ -0,0 +1,11 @@
1
+ streamdown/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ streamdown/sd.py,sha256=gFm6WqrWsMqV8EW9tcq7ebGoXpDFrTd89_ka21AaHm8,39118
3
+ streamdown/ss,sha256=sel_phpaecrw6WGIHRLROsD7BFShf0rSDHheflwdUn8,277
4
+ streamdown/ss1,sha256=CUVf86_2zeAle2oQCeTfWYqtHBrAFR_UgvptuYMQzFU,3151
5
+ streamdown/plugins/README.md,sha256=KWqYELs9WkKJmuDzYv3cvPlZMkArsNCBUe4XDoTLjLA,1143
6
+ streamdown/plugins/latex.py,sha256=xZMGMdx_Sw4X1piZejXFHfEG9qazU4fGeceiMI0h13Y,648
7
+ streamdown-0.18.0.dist-info/METADATA,sha256=Lyrf0k6BjC4wjiwwWz5b7aOFPtLR5uJ41e-SRGr1JC0,8062
8
+ streamdown-0.18.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
9
+ streamdown-0.18.0.dist-info/entry_points.txt,sha256=HroKFsFMGf_h9PRTE96NjvjJQWupMW5TGP5RGUr1O_Q,74
10
+ streamdown-0.18.0.dist-info/licenses/LICENSE.MIT,sha256=SnY46EPirUsF20dZDR8HpyVgS2_4Tjxuc6f-4OdqO7U,1070
11
+ streamdown-0.18.0.dist-info/RECORD,,
@@ -1,22 +0,0 @@
1
- def fizzbuzz(n):
2
- for i in range(1, n + 1):
3
- if i % 3 == 0 and i % 5 == 0:
4
- print("FizzBuzz")
5
- elif i % 3 == 0:
6
- print("Fizz")
7
- elif i % 5 == 0:
8
- print("Buzz")
9
- else:
10
- print(i)
11
-
12
- # Example usage: Print FizzBuzz up to 100 Example usage: Print FizzBuzz up to 100 Example usage: Print FizzBuzz up to 100 Example usage: Print FizzBuzz up to 100
13
- fizzbuzz(100)
14
-
15
- # Example usage: different range:
16
- fizzbuzz(20)
17
-
18
- #Example usage: one line output (list comprehension)
19
- def fizzbuzz_oneline(n):
20
- print(["FizzBuzz" if i%3==0 and i%5==0 else "Fizz" if i%3==0 else "Buzz" if i%5==0 else i for i in range(1,n+1)])
21
-
22
- fizzbuzz_oneline(30)
@@ -1,27 +0,0 @@
1
- function fizzBuzz(n) {
2
- for (let i = 1; i <= n; i++) {
3
- if (i % 3 === 0 && i % 5 === 0) {
4
- console.log("FizzBuzz");
5
- } else if (i % 3 === 0) {
6
- console.log("Fizz");
7
- } else if (i % 5 === 0) {
8
- console.log("Buzz");
9
- } else {
10
- console.log(i);
11
- }
12
- }
13
- }
14
-
15
- // Example usage:
16
- fizzBuzz(100);
17
-
18
- // Example usage: different range
19
- fizzBuzz(25);
20
-
21
- // Example one-line output. (arrow function & ternary operator)
22
- const fizzBuzzOneLine = n => {
23
- for (let i = 1; i <= n; i++) {
24
- console.log((i % 3 === 0 ? (i % 5 === 0 ? "FizzBuzz" : "Fizz") : (i % 5 === 0 ? "Buzz" : i)));
25
- }
26
- };
27
- fizzBuzzOneLine(30);
@@ -1,23 +0,0 @@
1
- #include <iostream>
2
-
3
- void fizzBuzz(int n) {
4
- for (int i = 1; i <= n; i++) {
5
- if (i % 3 == 0 && i % 5 == 0) {
6
- std::cout << "FizzBuzz" << std::endl;
7
- } else if (i % 3 == 0) {
8
- std::cout << "Fizz" << std::endl;
9
- } else if (i % 5 == 0) {
10
- std::cout << "Buzz" << std::endl;
11
- } else {
12
- std::cout << i << std::endl;
13
- }
14
- }
15
- }
16
-
17
- int main() {
18
- fizzBuzz(100);
19
-
20
- // Example usage: different range
21
- fizzBuzz(35);
22
- return 0;
23
- }
streamdown/tt.mds DELETED
@@ -1,11 +0,0 @@
1
- **A markdown renderer for modern terminals**
2
- ##### Usage examples:
3
-
4
- ``` bash
5
- sd [filename]
6
- cat README.md | sd
7
- stdbuf -oL llm chat | sd
8
- ```
9
-
10
- If no filename is provided and no input is piped, this help message is displayed.
11
-
@@ -1,13 +0,0 @@
1
- streamdown/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- streamdown/sd.py,sha256=0Ug2grAsf_RWVMyDMOc0j_ZTmL8N6dOPpk24LKBPkhA,35641
3
- streamdown/tt.mds,sha256=srDldQ9KnMJd5P8GdTXTJl4mjTowwV9y58ZIaBVbtFY,359
4
- streamdown/plugins/README.md,sha256=KWqYELs9WkKJmuDzYv3cvPlZMkArsNCBUe4XDoTLjLA,1143
5
- streamdown/plugins/latex.py,sha256=xZMGMdx_Sw4X1piZejXFHfEG9qazU4fGeceiMI0h13Y,648
6
- streamdown/scrape/file_0.py,sha256=OiFxFGGHu2C2iO9LVnhXKCybqCsnw0bu8MmI2E0vs_s,610
7
- streamdown/scrape/file_1.js,sha256=JnXSvlsk9UmU5LsGOfXkP3sGId8VNEJRJo8-uRohRCM,569
8
- streamdown/scrape/file_2.cpp,sha256=4hbT9TJzDNmrU7BVwaIuCMlI2BvUEVeTKoH6wUJRkrI,397
9
- streamdown-0.16.0.dist-info/METADATA,sha256=sq6eIyimqRI8cZHKkNbDdP3OoYpEPYCU-v5LFhBgYfQ,7968
10
- streamdown-0.16.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
11
- streamdown-0.16.0.dist-info/entry_points.txt,sha256=HroKFsFMGf_h9PRTE96NjvjJQWupMW5TGP5RGUr1O_Q,74
12
- streamdown-0.16.0.dist-info/licenses/LICENSE.MIT,sha256=SnY46EPirUsF20dZDR8HpyVgS2_4Tjxuc6f-4OdqO7U,1070
13
- streamdown-0.16.0.dist-info/RECORD,,