PyPI - visidata - Versions diffs - 3.1.1__py3-none-any.whl → 3.3__py3-none-any.whl - Mend

visidata 3.1.1py3-none-any.whl → 3.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (99) hide show

visidata/__init__.py +2 -2
visidata/_input.py +106 -58
visidata/_open.py +10 -7
visidata/_types.py +2 -2
visidata/aggregators.py +125 -16
visidata/apps/vdsql/_ibis.py +8 -13
visidata/basesheet.py +4 -3
visidata/canvas.py +11 -7
visidata/clipboard.py +11 -2
visidata/cliptext.py +68 -23
visidata/cmdlog.py +5 -1
visidata/column.py +48 -33
visidata/ddwplay.py +2 -2
visidata/deprecated.py +96 -63
visidata/errors.py +41 -5
visidata/{features → experimental}/helloworld.py +1 -1
visidata/experimental/liveupdate.py +1 -1
visidata/expr.py +1 -0
visidata/extensible.py +4 -0
visidata/features/cmdpalette.py +64 -25
visidata/features/describe.py +2 -2
visidata/features/expand_cols.py +7 -5
visidata/features/freeze.py +14 -2
visidata/features/go_col.py +3 -3
visidata/features/graph_zoom_y.py +47 -0
visidata/features/incr.py +7 -3
visidata/features/join.py +23 -12
visidata/features/layout.py +8 -4
visidata/features/melt.py +1 -0
visidata/features/rank.py +103 -0
visidata/features/reload_every.py +11 -8
visidata/features/sysedit.py +14 -4
visidata/features/transpose.py +1 -0
visidata/features/window.py +12 -0
visidata/form.py +10 -9
visidata/freqtbl.py +47 -3
visidata/fuzzymatch.py +11 -7
visidata/graph.py +5 -3
visidata/guides/AggregatorsSheet.md +84 -0
visidata/guides/CommandsSheet.md +1 -0
visidata/guides/MacrosSheet.md +1 -1
visidata/guides/RankGuide.md +51 -0
visidata/guides/TypesSheet.md +1 -1
visidata/guides/WindowFunctionGuide.md +49 -0
visidata/help.py +23 -6
visidata/indexsheet.py +1 -1
visidata/loaders/_pandas.py +3 -1
visidata/loaders/archive.py +33 -6
visidata/loaders/csv.py +12 -1
visidata/loaders/eml.py +2 -0
visidata/loaders/f5log.py +2 -2
visidata/loaders/fec.py +6 -9
visidata/loaders/fixed_width.py +2 -0
visidata/loaders/hdf5.py +34 -10
visidata/loaders/npy.py +54 -23
visidata/loaders/orgmode.py +3 -2
visidata/loaders/pandas_freqtbl.py +4 -0
visidata/loaders/psv.py +13 -0
visidata/loaders/sqlite.py +1 -1
visidata/loaders/vds.py +3 -4
visidata/macros.py +5 -4
visidata/main.py +21 -11
visidata/mainloop.py +8 -5
visidata/man/parse_options.py +3 -2
visidata/man/vd.1 +38 -17
visidata/man/vd.txt +47 -17
visidata/menu.py +10 -10
visidata/metasheets.py +3 -3
visidata/mouse.py +3 -0
visidata/movement.py +6 -3
visidata/pyobj.py +17 -9
visidata/save.py +10 -2
visidata/selection.py +29 -18
visidata/settings.py +9 -5
visidata/sheets.py +124 -48
visidata/shell.py +2 -2
visidata/sidebar.py +11 -8
visidata/sort.py +89 -11
visidata/statusbar.py +10 -9
visidata/tests/test_cliptext.py +164 -0
visidata/tests/test_commands.py +6 -2
visidata/tests/test_menu.py +1 -1
visidata/textsheet.py +34 -8
visidata/themes/ascii8.py +2 -2
visidata/themes/light.py +5 -0
visidata/threads.py +38 -8
visidata/utils.py +15 -1
visidata/vendor/__init__.py +0 -0
{visidata-3.1.1.data → visidata-3.3.data}/data/share/man/man1/vd.1 +38 -17
{visidata-3.1.1.data → visidata-3.3.data}/data/share/man/man1/visidata.1 +38 -17
{visidata-3.1.1.dist-info → visidata-3.3.dist-info}/METADATA +62 -15
{visidata-3.1.1.dist-info → visidata-3.3.dist-info}/RECORD +98 -92
{visidata-3.1.1.dist-info → visidata-3.3.dist-info}/WHEEL +1 -1
{visidata-3.1.1.dist-info → visidata-3.3.dist-info}/entry_points.txt +1 -0
visidata-3.1.1.data/scripts/vd +0 -6
{visidata-3.1.1.data → visidata-3.3.data}/data/share/applications/visidata.desktop +0 -0
{visidata-3.1.1.data → visidata-3.3.data}/scripts/vd2to3.vdx +0 -0
{visidata-3.1.1.dist-info → visidata-3.3.dist-info}/LICENSE.gpl3 +0 -0
{visidata-3.1.1.dist-info → visidata-3.3.dist-info}/top_level.txt +0 -0

visidata/__init__.py CHANGED Viewed

@@ -1,10 +1,10 @@
 'VisiData: a curses interface for exploring and arranging tabular data'
-__version__ = '3.1.1'
+__version__ = '3.3'
 __version_info__ = 'VisiData v' + __version__
 __author__ = 'Saul Pwanson <vd@saul.pw>'
 __status__ = 'Production/Stable'
-__copyright__ = 'Copyright (c) 2016-2021 ' + __author__
+__copyright__ = 'Copyright (c) 2016-2024 ' + __author__
 class EscapeException(BaseException):

visidata/_input.py CHANGED Viewed

@@ -4,7 +4,7 @@ import curses
 import visidata
 from visidata import EscapeException, ExpectedException, clipdraw, Sheet, VisiData, BaseSheet
-from visidata import vd, colors, dispwidth, ColorAttr
+from visidata import vd, colors, dispwidth, ColorAttr, clipstr_start
 from visidata import AttrDict
@@ -112,7 +112,8 @@ def delchar(s, i, remove=1):
     'Delete `remove` characters from str `s` beginning at position `i`.'
     return s if i < 0 else s[:i] + s[i+remove:]
-def find_nonword(s, a, b, incr):
+def find_word(s, a, b, incr):
+        '''Return first index of word boundary in s[a:b], going forward if incr is +1 and backward if incr is -1.'''
         if not s: return 0
         a = min(max(a, 0), len(s)-1)
         b = min(max(b, 0), len(s)-1)
@@ -124,9 +125,9 @@ def find_nonword(s, a, b, incr):
                 b += incr
             return min(max(b, -1), len(s))
         else:
-            while not s[a].isalnum() and a < b:  # first skip non-word chars
+            while s[a].isalnum() and a < b:       # first skip word chars
                 a += incr
-            while s[a].isalnum() and a < b:
+            while not s[a].isalnum() and a < b:   # then skip non-word chars
                 a += incr
             return min(max(a, 0), len(s))
@@ -173,15 +174,14 @@ class InputWidget:
         self.former_i = None
         self.just_completed = False
-    def editline(self, scr, y, x, w, attr=ColorAttr(), updater=lambda val: None, bindings={}, clear=True) -> str:
+    def editline(self, scr, y, x, w, attr=ColorAttr(), updater=lambda val:None, bindings={}, clear=True) -> str:
         'If *clear* is True, clear whole editing area before displaying.'
         with EnableCursor():
             while True:
-                vd.drawSheet(scr, vd.activeSheet)
-                if updater:
+                if len(vd.pendingKeys) <= 3:  #speed up paste of long strings by skipping redraws
+                    vd.drawSheet(scr, vd.activeSheet)
                     updater(self.value)
-                vd.drawInputHelp(scr)
+                    vd.drawInputHelp(scr)
                 self.draw(scr, y, x, w, attr, clear=clear)
                 ch = vd.getkeystroke(scr)
@@ -194,32 +194,59 @@ class InputWidget:
     def draw(self, scr, y, x, w, attr=ColorAttr(), clear=True):
         i = self.current_i  # the onscreen offset within the field where v[i] is displayed
-        left_truncchar = right_truncchar = self.truncchar
+        trunch = self.truncchar
+        tr_w = dispwidth(trunch)
+        fill_w = dispwidth(self.fillchar)
+        def _calc_display(dispval, i):
+            '''Return a formatted substring of *dispval* that fills the on-screen width *w*.'''
+            if i == len(dispval): # add a fillchar so the user perceives room to type
+                dispval += self.fillchar
+            dw = dispwidth(dispval)
+            if dw <= w:  # entire value fits
+                dispval += self.fillchar*(w-dw)
+                return dispval, i
+            if w <= tr_w: # column is too narrow to hold a left and right truncation
+                return trunch, 0
+            dw = dispwidth(dispval[i:])
+            if dw + tr_w <= w and dw <= w//2: #cursor is within half-colwidth of end
+                #truncate the left and show the end
+                frag, n = clipstr_start(dispval, w-tr_w)
+                offset = len(dispval) - i
+                dispval = ' '*(w-tr_w - n) + trunch + frag
+                i = len(dispval) - offset
+                return dispval, i
+            # the remaining cases need the right side truncated, after the new dispval is returned
+            dw = dispwidth(dispval[:i+1])
+            if dw + tr_w <= w and dispwidth(dispval[:i]) <= w//2: #cursor is within half-colwidth of start
+                #truncate the right, and show the string start
+                pass
+            else: # truncate left and right sides
+                # Place the cursor at the midpoint of the available colwidth
+                left_w = (w - 2*tr_w)//2
+                # calculate the fragment to the left of the cursor
+                l_frag, n = clipstr_start(dispval[:i], left_w)
+                dispval = ' '*(left_w-n) + trunch + l_frag + dispval[i:]
+                i = left_w-n + len(trunch) + len(l_frag)
+            return dispval, i
         if self.display:
             dispval = clean_printable(self.value)
         else:
             dispval = '*' * len(self.value)
+        dispval, i = _calc_display(dispval, i)
-        if len(dispval) < w:  # entire value fits
-            dispval += self.fillchar*(w-len(dispval)-1)
-        elif i == len(dispval):  # cursor after value (will append)
-            i = w-1
-            dispval = left_truncchar + dispval[len(dispval)-w+2:] + self.fillchar
-        elif i >= len(dispval)-w//2:  # cursor within halfwidth of end
-            i = w-(len(dispval)-i)
-            dispval = left_truncchar + dispval[len(dispval)-w+1:]
-        elif i <= w//2:  # cursor within halfwidth of beginning
-            dispval = dispval[:w-1] + right_truncchar
-        else:
-            i = w//2  # visual cursor stays right in the middle
-            k = 1 if w%2==0 else 0  # odd widths have one character more
-            dispval = left_truncchar + dispval[self.current_i-w//2+1:self.current_i+w//2-k] + right_truncchar
-        prew = clipdraw(scr, y, x, dispval[:i], attr, w, clear=clear, literal=True)
-        clipdraw(scr, y, x+prew, dispval[i:], attr, w-prew+1, clear=clear, literal=True)
+        #clipdraw will truncate the right side of dispval with trunch as needed
+        clipdraw(scr, y, x, dispval, attr, w, clear=clear, literal=True)
+        if x+w < scr.getmaxyx()[1]:
+            #draw a space to indicate that the user can scroll right of the cell's final char
+            clipdraw(scr, y, x+w, ' ', attr, 1, clear=False, literal=True)
         if scr:
-            scr.move(y, x+prew)
+            prew = dispwidth(dispval[:i])
+            if x+prew < scr.getmaxyx()[1]: #move cursor back to where the user is editing
+                scr.move(y, x+prew)
     def handle_key(self, ch:str, scr) -> bool:
         'Return True to accept current input.  Raise EscapeException on Ctrl+C, Ctrl+Q, or ESC.'
@@ -249,17 +276,25 @@ class InputWidget:
             c = vd.prettykeys(c)
             i += len(c)
             v += c
-        elif ch == '^O':                           self.value = vd.launchExternalEditor(v); return True  # auto-accept after $EDITOR
+        elif ch == '^O':
+            edit_v = vd.launchExternalEditor(v)
+            if self.value == edit_v:
+                # leave cell unmodified when the editor exits with no change
+                raise EscapeException(ch)
+            else:
+                self.value = edit_v
+                return True
         elif ch == '^R':                           v = self.orig_value  # ^Reload initial value
         elif ch == '^T':                           v = delchar(splice(v, i-2, v[i-1:i]), i)  # swap chars
         elif ch == '^U':                           v = v[i:]; i = 0  # clear to beginning
         elif ch == '^V':                           v = splice(v, i, until_get_wch(scr)); i += 1  # literal character
-        elif ch == '^W':                           j = find_nonword(v, 0, i-1, -1); v = v[:j+1] + v[i:]; i = j+1  # erase word
+        elif ch == '^W':                           j = find_word(v, 0, i-1, -1); v = v[:j+1] + v[i:]; i = j+1  # erase word
+        elif ch in ('KEY_DC5','kDC5','kDC3'):      j = find_word(v, i, len(v), +1); v = v[:i] + v[j+1:]  # erase word forward
         elif ch == '^Y':                           v = splice(v, i, str(vd.memory.clipval))
         elif ch == '^Z':                           vd.suspend()
         # CTRL+arrow
-        elif ch == 'kLFT5':                        i = find_nonword(v, 0, i-1, -1)+1; # word left
-        elif ch == 'kRIT5':                        i = find_nonword(v, i+1, len(v)-1, +1)+1; # word right
+        elif ch == 'kLFT5':                        i = find_word(v, 0, i-1, -1)+1;  # word left
+        elif ch == 'kRIT5':                        i = find_word(v, i, len(v)-1, +1);  # word right
         elif ch == 'kUP5':                         pass
         elif ch == 'kDN5':                         pass
         elif self.history and ch == 'KEY_UP':    v, i = self.prev_history(v, i)
@@ -337,9 +372,9 @@ class InputWidget:
 @VisiData.api
 def editText(vd, y, x, w, attr=ColorAttr(), value='',
              help='',
-             updater=None, bindings={},
+             updater=lambda val: None, bindings={},
              display=True, record=True, clear=True, **kwargs):
-    'Invoke modal single-line editor at (*y*, *x*) for *w* terminal chars. Use *display* is False for sensitive input like passphrases.  If *record* is True, get input from the cmdlog in batch mode, and save input to the cmdlog if *display* is also True. Return new value as string.'
+    '''Invoke modal single-line editor at (*y*, *x*) for *w* terminal chars. Use *display* is False for sensitive input like passphrases.  If *record* is True, get input from the cmdlog in batch mode, and save input to the cmdlog if *display* is also True. Return new value as string. Callers should handle curses.error, which will be raised if the terminal is resized during the edit, in a way that moves the editor coordinates offscreen.'''
     v = None
     if record and vd.cmdlog:
         v = vd.getCommandInput()
@@ -357,8 +392,8 @@ def editText(vd, y, x, w, attr=ColorAttr(), value='',
         try:
             widget = InputWidget(value=str(value), display=display, **kwargs)
-            with vd.AddedHelp(vd.getHelpPane('input', module='visidata'), 'Input Keystrokes Help'), \
-                 vd.AddedHelp(help, 'Input Field Help'):
+            with vd.AddedHelp(vd.getHelpPane('input', module='visidata'), 'Input Keystrokes Help', 'inputkeys'), \
+                 vd.AddedHelp(help, 'Input Field Help', 'inputfield'):
                 v = widget.editline(vd.activeSheet._scr, y, x, w, attr=attr, updater=updater, bindings=bindings, clear=clear)
         except AcceptInput as e:
             v = e.args[0]
@@ -434,7 +469,6 @@ def inputMultiple(vd, updater=lambda val: None, record=True, **kwargs):
         assert False, type(previnput)
-    y = sheet.windowHeight-1
     maxw = sheet.windowWidth//2
     attr = colors.color_edit_unfocused
@@ -460,9 +494,11 @@ def inputMultiple(vd, updater=lambda val: None, record=True, **kwargs):
     def _drawPrompt(val):
         for k, v in kwargs.items():
+            #recalculate y to adjust for screen resizes during input()
+            y = sheet.windowHeight-v.get('dy')-1
             maxw = min(sheet.windowWidth-1, max(dispwidth(v.get('prompt')), dispwidth(str(v.get('value', '')))))
-            promptlen = clipdraw(scr, y-v.get('dy'), 0, v.get('prompt'), attr, w=maxw)  #1947
-            promptlen = clipdraw(scr, y-v.get('dy'), promptlen, v.get('value', ''),  attr, w=maxw)
+            promptlen = clipdraw(scr, y, 0, v.get('prompt'), attr, w=maxw)  #1947
+            promptlen = clipdraw(scr, y, promptlen, v.get('value', ''),  attr, w=maxw)
         return updater(val)
@@ -549,27 +585,36 @@ def input(vd, prompt, type=None, defaultLast=False, history=[], dy=0, attr=None,
         return sheet.windowWidth-promptlen-rstatuslen-2
     w = kwargs.pop('w', _drawPrompt())
-    ret = vd.editText(y, promptlen, w=w,
-                        attr=colors.color_edit_cell,
-                        options=vd.options,
-                        history=history,
-                        updater=_drawPrompt,
-                        **kwargs)
-    if ret:
-        if kwargs.get('record', True) and kwargs.get('display', True):
-            vd.addInputHistory(ret, type=type)
-    elif defaultLast:
-        history or vd.fail("no previous input")
-        ret = history[-1]
+    restarts = 0
+    while restarts < 100:
+        #recalculate y to handle resize events
+        y = sheet.windowHeight-dy-1
+        try:
+            ret = vd.editText(y, promptlen, w=w,
+                                attr=colors.color_edit_cell,
+                                options=vd.options,
+                                history=history,
+                                updater=_drawPrompt,
+                                **kwargs)
+            if ret:
+                if kwargs.get('record', True) and kwargs.get('display', True):
+                    vd.addInputHistory(ret, type=type)
+            elif defaultLast:
+                history or vd.fail("no previous input")
+                ret = history[-1]
-    return ret
+            return ret
+        except curses.error:
+            vd.warning('restarting input due to resize')
+            restarts += 1
+    # if it keeps happening, it's probably not resize events, so give some debug output
+    vd.error(f'aborting input:  y={y}, w={w}, windowHeight={sheet.windowHeight}, windowWidth={sheet.windowWidth}')
 @VisiData.api
 def confirm(vd, prompt, exc=EscapeException):
     'Display *prompt* on status line and demand input that starts with "Y" or "y" to proceed.  Raise *exc* otherwise.  Return True.'
-    if vd.options.batch and not vd.options.interactive:
+    if vd.options.batch:
         return vd.fail('cannot confirm in batch mode: ' + prompt)
     yn = vd.input(prompt, value='no', record=False)[:1]
@@ -594,7 +639,7 @@ class CompleteKey:
 @Sheet.api
 def editCell(self, vcolidx=None, rowidx=None, value=None, **kwargs):
     '''Call vd.editText for the cell at (*rowidx*, *vcolidx*).  Return the new value, properly typed.
+       - *vcolidx*: numeric index into ``self.availCols``. When None, use current column.
        - *rowidx*: numeric index into ``self.rows``.  If negative, indicates the column name in the header.
        - *value*: if given, the starting input; otherwise the starting input is the cell value or column name as appropriate.
        - *kwargs*: passthrough args to ``vd.editText``.
@@ -604,7 +649,7 @@ def editCell(self, vcolidx=None, rowidx=None, value=None, **kwargs):
         vcolidx = self.cursorVisibleColIndex
     x, w = self._visibleColLayout.get(vcolidx, (0, 0))
-    col = self.visibleCols[vcolidx]
+    col = self.availCols[vcolidx]
     if rowidx is None:
         rowidx = self.cursorRowIndex
@@ -626,7 +671,7 @@ def editCell(self, vcolidx=None, rowidx=None, value=None, **kwargs):
         'KEY_BTAB':   acceptThenFunc('go-left', 'rename-col' if rowidx < 0 else 'edit-cell'),
     }
-    if vcolidx >= self.nVisibleCols-1:
+    if vcolidx == self.nVisibleCols-1 or vcolidx >= self.nCols-1:
         bindings['^I'] = acceptThenFunc('go-down', 'go-leftmost', 'edit-cell')
     if vcolidx <= 0:
@@ -639,7 +684,10 @@ def editCell(self, vcolidx=None, rowidx=None, value=None, **kwargs):
     editargs = dict(value=value, options=self.options)
     editargs.update(kwargs)  # update with user-specified args
-    r = vd.editText(y, x, w, attr=colors.color_edit_cell, **editargs)
+    try:
+        r = vd.editText(y, x, w, attr=colors.color_edit_cell, **editargs)
+    except curses.error:
+        vd.fail(f'aborting edit due to resize')
     if rowidx >= 0:  # if not header
         r = col.type(r)  # convert input to column type, let exceptions be raised

visidata/_open.py CHANGED Viewed

@@ -81,6 +81,10 @@ def guess_extension(vd, path):
 def openPath(vd, p, filetype=None, create=False):
     '''Call ``open_<filetype>(p)`` or ``openurl_<p.scheme>(p, filetype)``.  Return constructed but unloaded sheet of appropriate type.
     If True, *create* will return a new, blank **Sheet** if file does not exist.'''
+    # allow user to assign a filetype to a pathname:  options.set('filetype', 'csv', '-')
+    filetype = filetype or vd.options.getonly('filetype', str(p), None)  #1710
+    filetype = filetype or vd.options.getonly('filetype', 'global', None)
     if p.scheme and not p.has_fp():
         schemes = p.scheme.split('+')
         openfuncname = 'openurl_' + schemes[-1]
@@ -94,8 +98,10 @@ def openPath(vd, p, filetype=None, create=False):
     if not p.exists() and not create:
         return None
-    if not filetype:
-        filetype = p.ext or vd.options.filetype
+    # assign filetype from extension, but only for files, not directories
+    if not p.is_dir():  #2547
+        filetype = filetype or p.ext
+    filetype = filetype or vd.options.filetype
     filetype = filetype.lower()
@@ -147,15 +153,12 @@ def openSource(vd, p, filetype=None, create=False, **kwargs):
     if isinstance(p, BaseSheet):
         return p
-    filetype = filetype or vd.options.getonly('filetype', str(p), '')  #1710
-    filetype = filetype or vd.options.getonly('filetype', 'global', '')
     vs = None
     if isinstance(p, str):
         if '://' in p:
             vs = vd.openPath(Path(p), filetype=filetype)  # convert to Path and recurse
         elif p == '-':
-            if sys.stdin.isatty():
+            if vd.stdinSource.fptext.isatty():
                 vd.fail('cannot open stdin when it is a tty')
             vs = vd.openPath(vd.stdinSource, filetype=filetype)
         else:
@@ -180,7 +183,7 @@ def open_txt(vd, p):
                 if delimiter and delimiter in next(fp):    # peek at the first line
                     return vd.open_tsv(p)  # TSV often have .txt extension
             except StopIteration:
-                return TableSheet(p.base_stem, columns=[SettableColumn(width=vd.options.default_width)], source=p)
+                return vd.newSheet(p.base_stem, 1, source=p)
     return TextSheet(p.base_stem, source=p)

visidata/_types.py CHANGED Viewed

@@ -1,7 +1,7 @@
 # VisiData uses Python native int, float, str, and adds simple anytype.
 import locale
-from visidata import options, TypedWrapper, vd, VisiData
+from visidata import vd, VisiData
 vd.help_float_fmt = '''
 - fmt starting with `'%'` (like `%0.2f`) will use [:onclick https://docs.python.org/3.6/library/locale.html#locale.format_string]locale.format_string[/]
@@ -40,7 +40,7 @@ anytype.__name__ = ''
 @VisiData.global_api
 def numericFormatter(vd, fmtstr, typedval):
     try:
-        fmtstr = fmtstr or options['disp_'+type(typedval).__name__+'_fmt']
+        fmtstr = fmtstr or vd.options['disp_'+type(typedval).__name__+'_fmt']
         if fmtstr[0] == '%':
             return locale.format_string(fmtstr, typedval, grouping=False)
         else:

visidata/aggregators.py CHANGED Viewed

@@ -3,9 +3,11 @@ import math
 import functools
 import collections
 import statistics
+from copy import copy
+import itertools
-from visidata import Progress, Sheet, Column, ColumnsSheet, VisiData
-from visidata import vd, anytype, vlen, asyncthread, wrapply, AttrDict, date, INPROGRESS
+from visidata import Progress, Sheet, Column, ColumnsSheet, VisiData, SettableColumn
+from visidata import vd, anytype, vlen, asyncthread, wrapply, AttrDict, date, INPROGRESS, dispwidth, stacktrace, TypedExceptionWrapper
 vd.help_aggregators = '''# Choose Aggregators
 Start typing an aggregator name or description.
@@ -76,7 +78,7 @@ Column.aggregators = property(aggregators_get, aggregators_set)
 class Aggregator:
-    def __init__(self, name, type, funcValues=None, helpstr='foo'):
+    def __init__(self, name, type, funcValues=None, helpstr=''):
         'Define aggregator `name` that calls funcValues(values)'
         self.type = type
         self.funcValues = funcValues  # funcValues(values)
@@ -92,6 +94,33 @@ class Aggregator:
                 return None
             raise e
+class ListAggregator(Aggregator):
+    '''A list aggregator is an aggregator that returns a list of values, generally
+    one value per input row, unlike ordinary aggregators that operate on rows
+    and return only a single value.
+    To implement a new list aggregator, subclass ListAggregator,
+    and override aggregate() and aggregate_list().'''
+    def __init__(self, name, type, helpstr='', listtype=None):
+        '''*listtype* determines the type of the column created by addcol_aggregate()
+        for list aggrs. If it is None, then the new column will match the type of the input column'''
+        super().__init__(name, type, helpstr=helpstr)
+        self.listtype = listtype
+    def aggregate(self, col, rows) -> list:
+        '''Return a list, which can be shorter than *rows*, because it filters out nulls and errors.
+        Override in subclass.'''
+        vals = self.aggregate_list(col, rows)
+        # filter out nulls and errors
+        vals = [ v for v in vals if not col.sheet.isNullFunc()(v) ]
+        return vals
+    def aggregate_list(self, col, row_group) -> list:
+        '''Return a list of results, which will be one result per input row.
+        *row_group* is an iterable that holds a "group" of rows to run the aggregator on.
+        rows in *row_group* are not necessarily in the same order they are in the sheet.
+        Override in subclass.'''
+        vals = [ col.getTypedValue(r) for r in row_group ]
+        return vals
 @VisiData.api
 def aggregator(vd, name, funcValues, helpstr='', *, type=None):
@@ -99,6 +128,14 @@ def aggregator(vd, name, funcValues, helpstr='', *, type=None):
        Use *type* to force type of aggregated column (default to use type of source column).'''
     vd.aggregators[name] = Aggregator(name, type, funcValues=funcValues, helpstr=helpstr)
+@VisiData.api
+def aggregator_list(vd, name, helpstr='', type=anytype, listtype=anytype):
+    '''Define simple aggregator *name* that calls ``funcValues(values)`` to aggregate *values*.
+       Use *type* to force type of aggregated column (default to use type of source column).
+       Use *listtype* to force the type of the new column created by addcol-aggregate.
+       If *listtype* is None, it will match the type of the source column.'''
+    vd.aggregators[name] = ListAggregator(name, type, helpstr=helpstr, listtype=listtype)
 ## specific aggregator implementations
 def mean(vals):
@@ -109,6 +146,16 @@ def mean(vals):
 def vsum(vals):
     return sum(vals, start=type(vals[0] if len(vals) else 0)())  #1996
+def stdev(vals):
+    # because statistics.stdev can raise an exception, we put it in a wrapper.
+    # The wrapper lets the exception be seen as an error string in the stdev
+    # aggregator, shown at the bottom of the sheet as part of allAggregators.
+    try:
+        return statistics.stdev(vals)
+    except statistics.StatisticsError as e:  #when vals holds only 1 element
+        e.stacktrace = stacktrace()
+        return TypedExceptionWrapper(None, exception=e)
 # http://code.activestate.com/recipes/511478-finding-the-percentile-of-the-values/
 def _percentile(N, percent, key=lambda x:x):
     """
@@ -140,10 +187,49 @@ class PercentileAggregator(Aggregator):
     def aggregate(self, col, rows):
         return _percentile(sorted(col.getValues(rows)), self.pct/100, key=float)
 def quantiles(q, helpstr):
     return [PercentileAggregator(round(100*i/q), helpstr) for i in range(1, q)]
+def aggregate_groups(sheet, col, rows, aggr) -> list:
+    '''Returns a list, containing the result of the aggregator applied to each row.
+    *col* is a column whose values determine each row's rank within a group.
+    *rows* is a list of visidata rows.
+    *aggr* is an Aggregator object.
+    Rows are grouped by their key columns. Null key column cells are considered equal,
+    so nulls are grouped together. Cells with exceptions do not group together.
+    Each exception cell is grouped by itself, with only one row in the group.
+    '''
+    def _key_progress(prog):
+        def identity(val):
+            prog.addProgress(1)
+            return val
+        return identity
+    with Progress(gerund='ranking', total=4*sheet.nRows) as prog:
+        p = _key_progress(prog) # increment progress every time p() is called
+        # compile row data, for each row a list of tuples: (group_key, rank_key, rownum)
+        rowdata = [(sheet.rowkey(r), col.getTypedValue(r), p(rownum)) for rownum, r in enumerate(rows)]
+        # sort by row key and column value to prepare for grouping
+        try:
+            rowdata.sort(key=p)
+        except TypeError as e:
+            vd.fail(f'elements in a ranking column must be comparable: {e.args[0]}')
+        rowvals = []
+        #group by row key
+        for _, group in itertools.groupby(rowdata, key=lambda v: v[0]):
+            # within a group, the rows have already been sorted by col_val
+            group = list(group)
+            if isinstance(aggr, ListAggregator): # for list aggregators, each row gets its own value
+                aggr_vals = aggr.aggregate_list(col, [rows[rownum] for _, _, rownum in group])
+                rowvals += [(rownum, v) for (_, _, rownum), v in zip(group, aggr_vals)]
+            else:             # for normal aggregators, each row in the group gets the same value
+                aggr_val = aggr.aggregate(col, [rows[rownum] for _, _, rownum in group])
+                rowvals += [(rownum, aggr_val) for _, _, rownum in group]
+            prog.addProgress(len(group))
+        # sort by unique rownum, to make rank results match the original row order
+        rowvals.sort(key=p)
+        rowvals = [ v for rownum, v in rowvals ]
+        return rowvals
 vd.aggregator('min', min, 'minimum value')
 vd.aggregator('max', max, 'maximum value')
@@ -154,8 +240,8 @@ vd.aggregator('mode', statistics.mode, 'mode of values')
 vd.aggregator('sum', vsum, 'sum of values')
 vd.aggregator('distinct', set, 'distinct values', type=vlen)
 vd.aggregator('count', lambda values: sum(1 for v in values), 'number of values', type=int)
-vd.aggregator('list', list, 'list of values', type=anytype)
-vd.aggregator('stdev', statistics.stdev, 'standard deviation of values', type=float)
+vd.aggregator_list('list', 'list of values', type=anytype, listtype=None)
+vd.aggregator('stdev', stdev, 'standard deviation of values', type=float)
 vd.aggregators['q3'] = quantiles(3, 'tertiles (33/66th pctile)')
 vd.aggregators['q4'] = quantiles(4, 'quartiles (25/50/75th pctile)')
@@ -205,10 +291,9 @@ def addAggregators(sheet, cols, aggrnames):
     for aggrname in aggrnames:
         aggrs = vd.aggregators.get(aggrname)
         aggrs = aggrs if isinstance(aggrs, list) else [aggrs]
-        for aggr in aggrs:
-            for c in cols:
-                if not hasattr(c, 'aggregators'):
-                    c.aggregators = []
+        for c in cols:
+            vd.addUndo(setattr, c, 'aggregators', copy(c.aggregators))
+            for aggr in aggrs:
                 if aggr and aggr not in c.aggregators:
                     c.aggregators += [aggr]
@@ -243,7 +328,8 @@ def memo_aggregate(col, agg_choices, rows):
         for agg in aggs:
             aggval = agg.aggregate(col, rows)
             typedval = wrapply(agg.type or col.type, aggval)
-            dispval = col.format(typedval)
+            # limit width to limit formatting time when typedval is a long list
+            dispval = col.format(typedval, width=1000)
             k = col.name+'_'+agg.name
             vd.status(f'{k}={dispval}')
             vd.memory[k] = typedval
@@ -254,17 +340,16 @@ def aggregator_choices(vd):
     return [
        AttrDict(key=agg, desc=v[0].helpstr if isinstance(v, list) else v.helpstr)
          for agg, v in vd.aggregators.items()
-            if not agg.startswith('p')  # skip all the percentiles, user should use q# instead
+           if not (agg.startswith('p') and agg[1:].isdigit())  # skip all the percentiles like 'p10', user should use q# instead
     ]
 @VisiData.api
-def chooseAggregators(vd):
+def chooseAggregators(vd, prompt = 'choose aggregators: '):
     '''Return a list of aggregator name strings chosen or entered by the user. User-entered names may be invalid.'''
-    prompt = 'choose aggregators: '
     def _fmt_aggr_summary(match, row, trigger_key):
         formatted_aggrname = match.formatted.get('key', row.key) if match else row.key
-        r = ' '*(len(prompt)-3)
+        r = ' '*(dispwidth(prompt)-3)
         r += f'[:keystrokes]{trigger_key}[/]  '
         r += formatted_aggrname
         if row.desc:
@@ -288,10 +373,34 @@ def chooseAggregators(vd):
             vd.warning(f'aggregator does not exist: {aggr}')
     return aggrs
-Sheet.addCommand('+', 'aggregate-col', 'addAggregators([cursorCol], chooseAggregators())', 'add aggregator to current column')
+@Sheet.api
+@asyncthread
+def addcol_aggregate(sheet, col, aggrnames):
+    for aggrname in aggrnames:
+        aggrs = vd.aggregators.get(aggrname)
+        aggrs = aggrs if isinstance(aggrs, list) else [aggrs]
+        if not aggrs: continue
+        for aggr in aggrs:
+            rows = aggregate_groups(sheet, col, sheet.rows, aggr)
+            if isinstance(aggr, ListAggregator):
+                t = aggr.listtype or col.type
+            else:
+                t = aggr.type or col.type
+            c = SettableColumn(name=f'{col.name}_{aggr.name}', type=t)
+            sheet.addColumnAtCursor(c)
+            c.setValues(sheet.rows, *rows)
+Sheet.addCommand('+', 'aggregate-col', 'addAggregators([cursorCol], chooseAggregators())', 'Add aggregator to current column')
 Sheet.addCommand('z+', 'memo-aggregate', 'cursorCol.memo_aggregate(chooseAggregators(), selectedRows or rows)', 'memo result of aggregator over values in selected rows for current column')
 ColumnsSheet.addCommand('g+', 'aggregate-cols', 'addAggregators(selectedRows or source[0].nonKeyVisibleCols, chooseAggregators())', 'add aggregators to selected source columns')
+Sheet.addCommand('', 'addcol-aggregate', 'addcol_aggregate(cursorCol, chooseAggregators(prompt="aggregator for groups: "))', 'add column(s) with aggregator of rows grouped by key columns')
+vd.addGlobals(
+    ListAggregator=ListAggregator
+)
 vd.addMenuItems('''
     Column > Add aggregator > aggregate-col
+    Column > Add column > aggregate > addcol-aggregate
 ''')

visidata/apps/vdsql/_ibis.py CHANGED Viewed

@@ -1,4 +1,5 @@
 from copy import copy
+import threading
 import functools
 import operator
 import re
@@ -80,24 +81,18 @@ vd.openurl_sqlite = vd.open_vdsql
 class IbisConnectionPool:
     def __init__(self, source, pool=None, total=0):
         self.source = source
-        self.pool = pool if pool is not None else []
-        self.total = total
+        self._local = threading.local()
+        self._local.connection = None
     def __copy__(self):
-        return IbisConnectionPool(self.source, pool=self.pool, total=self.total)
+        return IbisConnectionPool(self.source)
     @contextmanager
     def get_conn(self):
-        if not self.pool:
-            import ibis
-            r = ibis.connect(str(self.source))
-        else:
-            r = self.pool.pop(0)
-        try:
-            yield r
-        finally:
-            self.pool.append(r)
+        import ibis
+        if not hasattr(self._local, 'connection') or not self._local.connection:
+            self._local.connection = ibis.connect(str(self.source))
+        yield self._local.connection
 class IbisTableIndexSheet(IndexSheet):

visidata 3.1.1__py3-none-any.whl → 3.3__py3-none-any.whl

visidata 3.1.1py3-none-any.whl → 3.3py3-none-any.whl