PyPI - visidata - Versions diffs - 2.11.dev0__py3-none-any.whl → 3.0__py3-none-any.whl - Mend

visidata 2.11.dev0py3-none-any.whl → 3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (253) hide show

visidata/__init__.py +72 -91
visidata/_input.py +263 -44
visidata/_open.py +84 -29
visidata/_types.py +22 -4
visidata/_urlcache.py +17 -4
visidata/aggregators.py +65 -25
visidata/apps/__init__.py +0 -0
visidata/apps/vdsql/__about__.py +8 -0
visidata/apps/vdsql/__init__.py +5 -0
visidata/apps/vdsql/__main__.py +27 -0
visidata/apps/vdsql/_ibis.py +748 -0
visidata/apps/vdsql/bigquery.py +61 -0
visidata/apps/vdsql/clickhouse.py +53 -0
visidata/apps/vdsql/setup.py +40 -0
visidata/apps/vdsql/snowflake.py +67 -0
visidata/apps/vgit/__init__.py +13 -0
visidata/apps/vgit/__main__.py +3 -0
visidata/apps/vgit/abort.py +23 -0
visidata/apps/vgit/blame.py +76 -0
visidata/apps/vgit/branch.py +153 -0
visidata/apps/vgit/config.py +95 -0
visidata/apps/vgit/diff.py +169 -0
visidata/apps/vgit/gitsheet.py +161 -0
visidata/apps/vgit/grep.py +37 -0
visidata/apps/vgit/log.py +81 -0
visidata/apps/vgit/main.py +55 -0
visidata/apps/vgit/remote.py +57 -0
visidata/apps/vgit/repos.py +71 -0
visidata/apps/vgit/setup.py +37 -0
visidata/apps/vgit/stash.py +69 -0
visidata/apps/vgit/status.py +204 -0
visidata/apps/vgit/statusbar.py +34 -0
visidata/basesheet.py +59 -50
visidata/canvas.py +251 -99
visidata/choose.py +15 -11
visidata/clean_names.py +29 -0
visidata/clipboard.py +84 -18
visidata/cliptext.py +220 -46
visidata/cmdlog.py +89 -114
visidata/color.py +142 -56
visidata/column.py +134 -131
visidata/ddw/input.ddw +74 -79
visidata/ddw/regex.ddw +57 -0
visidata/ddwplay.py +33 -14
visidata/deprecated.py +77 -3
visidata/desktop/visidata.desktop +7 -0
visidata/editor.py +12 -6
visidata/errors.py +5 -1
visidata/experimental/__init__.py +0 -0
visidata/experimental/diff_sheet.py +29 -0
visidata/experimental/digit_autoedit.py +6 -0
visidata/experimental/gdrive.py +89 -0
visidata/experimental/google.py +37 -0
visidata/experimental/gsheets.py +79 -0
visidata/experimental/live_search.py +37 -0
visidata/experimental/liveupdate.py +45 -0
visidata/experimental/mark.py +133 -0
visidata/experimental/noahs_tapestry/__init__.py +1 -0
visidata/experimental/noahs_tapestry/tapestry.py +147 -0
visidata/experimental/rownum.py +73 -0
visidata/experimental/slide_cells.py +26 -0
visidata/expr.py +8 -4
visidata/extensible.py +32 -6
visidata/features/__init__.py +0 -0
visidata/features/addcol_audiometadata.py +42 -0
visidata/features/addcol_histogram.py +34 -0
visidata/features/canvas_save_svg.py +69 -0
visidata/features/change_precision.py +46 -0
visidata/features/cmdpalette.py +163 -0
visidata/features/colorbrewer.py +363 -0
visidata/{colorsheet.py → features/colorsheet.py} +17 -16
visidata/features/command_server.py +105 -0
visidata/features/currency_to_usd.py +70 -0
visidata/{customdate.py → features/customdate.py} +2 -0
visidata/features/dedupe.py +132 -0
visidata/{describe.py → features/describe.py} +17 -15
visidata/features/errors_guide.py +26 -0
visidata/features/expand_cols.py +202 -0
visidata/{fill.py → features/fill.py} +4 -2
visidata/{freeze.py → features/freeze.py} +11 -6
visidata/features/graph_seaborn.py +79 -0
visidata/features/helloworld.py +10 -0
visidata/features/hint_types.py +17 -0
visidata/{incr.py → features/incr.py} +5 -0
visidata/{join.py → features/join.py} +107 -53
visidata/features/known_cols.py +21 -0
visidata/features/layout.py +62 -0
visidata/{melt.py → features/melt.py} +33 -21
visidata/features/normcol.py +118 -0
visidata/features/open_config.py +7 -0
visidata/features/open_syspaste.py +18 -0
visidata/features/ping.py +157 -0
visidata/features/procmgr.py +208 -0
visidata/features/random_sample.py +6 -0
visidata/{regex.py → features/regex.py} +47 -31
visidata/features/reload_every.py +55 -0
visidata/features/rename_col_cascade.py +30 -0
visidata/features/scroll_context.py +60 -0
visidata/features/select_equal_selected.py +11 -0
visidata/features/setcol_fake.py +65 -0
visidata/{slide.py → features/slide.py} +75 -21
visidata/features/sparkline.py +48 -0
visidata/features/status_source.py +20 -0
visidata/{sysedit.py → features/sysedit.py} +2 -1
visidata/features/sysopen_mailcap.py +46 -0
visidata/features/term_extras.py +13 -0
visidata/{transpose.py → features/transpose.py} +5 -4
visidata/features/type_ipaddr.py +73 -0
visidata/features/type_url.py +11 -0
visidata/{unfurl.py → features/unfurl.py} +9 -9
visidata/{window.py → features/window.py} +2 -2
visidata/form.py +50 -21
visidata/freqtbl.py +81 -33
visidata/fuzzymatch.py +414 -0
visidata/graph.py +105 -33
visidata/guide.py +180 -0
visidata/help.py +75 -44
visidata/hint.py +39 -0
visidata/indexsheet.py +109 -0
visidata/input_history.py +55 -0
visidata/interface.py +58 -0
visidata/keys.py +17 -16
visidata/loaders/__init__.py +9 -0
visidata/loaders/_pandas.py +61 -21
visidata/loaders/api_airtable.py +70 -0
visidata/loaders/api_bitio.py +102 -0
visidata/loaders/api_matrix.py +148 -0
visidata/loaders/api_reddit.py +306 -0
visidata/loaders/api_zulip.py +249 -0
visidata/loaders/archive.py +41 -7
visidata/loaders/arrow.py +7 -7
visidata/loaders/conll.py +49 -0
visidata/loaders/csv.py +25 -7
visidata/loaders/eml.py +3 -4
visidata/loaders/f5log.py +1204 -0
visidata/loaders/fec.py +325 -0
visidata/loaders/fixed_width.py +3 -5
visidata/loaders/frictionless.py +3 -3
visidata/loaders/geojson.py +8 -5
visidata/loaders/google.py +48 -0
visidata/loaders/graphviz.py +4 -4
visidata/loaders/hdf5.py +4 -4
visidata/loaders/html.py +48 -10
visidata/loaders/http.py +84 -30
visidata/loaders/imap.py +20 -10
visidata/loaders/jrnl.py +52 -0
visidata/loaders/json.py +83 -29
visidata/loaders/jsonla.py +74 -0
visidata/loaders/lsv.py +15 -11
visidata/loaders/mailbox.py +40 -0
visidata/loaders/markdown.py +1 -3
visidata/loaders/mbtiles.py +4 -5
visidata/loaders/mysql.py +11 -13
visidata/loaders/npy.py +7 -7
visidata/loaders/odf.py +4 -1
visidata/loaders/orgmode.py +428 -0
visidata/loaders/pandas_freqtbl.py +14 -20
visidata/loaders/parquet.py +62 -6
visidata/loaders/pcap.py +3 -3
visidata/loaders/pdf.py +4 -3
visidata/loaders/png.py +19 -13
visidata/loaders/postgres.py +9 -8
visidata/loaders/rec.py +7 -3
visidata/loaders/s3.py +342 -0
visidata/loaders/sas.py +5 -5
visidata/loaders/scrape.py +186 -0
visidata/loaders/shp.py +6 -5
visidata/loaders/spss.py +5 -6
visidata/loaders/sqlite.py +68 -28
visidata/loaders/texttables.py +1 -1
visidata/loaders/toml.py +60 -0
visidata/loaders/tsv.py +61 -19
visidata/loaders/ttf.py +19 -7
visidata/loaders/unzip_http.py +6 -5
visidata/loaders/usv.py +1 -1
visidata/loaders/vcf.py +16 -16
visidata/loaders/vds.py +10 -7
visidata/loaders/vdx.py +30 -5
visidata/loaders/xlsb.py +8 -1
visidata/loaders/xlsx.py +145 -25
visidata/loaders/xml.py +6 -3
visidata/loaders/xword.py +4 -4
visidata/loaders/yaml.py +15 -5
visidata/macos.py +1 -1
visidata/macros.py +130 -41
visidata/main.py +119 -94
visidata/mainloop.py +101 -154
visidata/man/parse_options.py +2 -2
visidata/man/vd.1 +302 -147
visidata/man/vd.txt +291 -151
visidata/memory.py +3 -3
visidata/menu.py +104 -423
visidata/metasheets.py +59 -141
visidata/modify.py +79 -23
visidata/motd.py +3 -3
visidata/mouse.py +137 -0
visidata/movement.py +43 -35
visidata/optionssheet.py +99 -0
visidata/path.py +131 -43
visidata/pivot.py +74 -47
visidata/plugins.py +65 -192
visidata/pyobj.py +50 -201
visidata/rename_col.py +20 -0
visidata/save.py +42 -20
visidata/search.py +54 -10
visidata/selection.py +84 -5
visidata/settings.py +162 -24
visidata/sheets.py +229 -257
visidata/shell.py +51 -21
visidata/sidebar.py +162 -0
visidata/sort.py +11 -4
visidata/statusbar.py +113 -104
visidata/stored_list.py +43 -0
visidata/stored_prop.py +38 -0
visidata/tests/conftest.py +3 -3
visidata/tests/test_cliptext.py +39 -0
visidata/tests/test_commands.py +62 -7
visidata/tests/test_edittext.py +2 -2
visidata/tests/test_features.py +17 -0
visidata/tests/test_menu.py +14 -0
visidata/tests/test_path.py +13 -4
visidata/text_source.py +53 -0
visidata/textsheet.py +10 -3
visidata/theme.py +44 -0
visidata/themes/__init__.py +0 -0
visidata/themes/ascii8.py +84 -0
visidata/themes/asciimono.py +84 -0
visidata/themes/light.py +17 -0
visidata/threads.py +87 -39
visidata/tuiwin.py +22 -0
visidata/type_currency.py +22 -3
visidata/type_date.py +31 -9
visidata/type_floatsi.py +5 -1
visidata/undo.py +18 -6
visidata/utils.py +106 -23
visidata/vdobj.py +28 -17
visidata/windows.py +10 -0
visidata/wrappers.py +9 -3
visidata-3.0.data/data/share/applications/visidata.desktop +7 -0
{visidata-2.11.dev0.data → visidata-3.0.data}/data/share/man/man1/vd.1 +302 -147
{visidata-2.11.dev0.data → visidata-3.0.data}/data/share/man/man1/visidata.1 +302 -147
visidata-3.0.data/scripts/vd2to3.vdx +9 -0
{visidata-2.11.dev0.dist-info → visidata-3.0.dist-info}/METADATA +13 -11
visidata-3.0.dist-info/RECORD +257 -0
{visidata-2.11.dev0.dist-info → visidata-3.0.dist-info}/WHEEL +1 -1
{visidata-2.11.dev0.dist-info → visidata-3.0.dist-info}/entry_points.txt +0 -1
visidata/layout.py +0 -44
visidata/misc.py +0 -5
visidata-2.11.dev0.dist-info/RECORD +0 -142
/visidata/{repeat.py → features/repeat.py} +0 -0
{visidata-2.11.dev0.data → visidata-3.0.data}/scripts/vd +0 -0
{visidata-2.11.dev0.dist-info → visidata-3.0.dist-info}/LICENSE.gpl3 +0 -0
{visidata-2.11.dev0.dist-info → visidata-3.0.dist-info}/top_level.txt +0 -0

visidata/loaders/parquet.py CHANGED Viewed

@@ -1,28 +1,84 @@
-from visidata import VisiData, Sheet, Column
+from visidata import Sheet, VisiData, TypedWrapper, anytype, date, vlen, Column, vd
+from collections import defaultdict
 @VisiData.api
 def open_parquet(vd, p):
-    return ParquetSheet(p.name, source=p)
+    return ParquetSheet(p.base_stem, source=p)
 class ParquetColumn(Column):
     def calcValue(self, row):
-        return self.source[row['__rownum__']].as_py()
+        val = self.source[row["__rownum__"]]
+        if val.type == 'large_string':
+            return memoryview(val.as_buffer())[:2**20].tobytes().decode('utf-8')
+        else:
+            return val.as_py()
 class ParquetSheet(Sheet):
     # rowdef: {'__rownum__':int, parquet_col:overridden_value, ...}
     def iterload(self):
-        import pyarrow.parquet as pq
+        pa = vd.importExternal("pyarrow", "pyarrow")
+        pq = vd.importExternal("pyarrow.parquet", "pyarrow")
         from visidata.loaders.arrow import arrow_to_vdtype
-        self.tbl = pq.read_table(self.source)
+        if self.source.is_dir():
+            self.tbl = pq.read_table(str(self.source))
+        else:
+            with self.source.open('rb') as f:
+                self.tbl = pq.read_table(f)
         self.columns = []
         for colname, col in zip(self.tbl.column_names, self.tbl.columns):
             c = ParquetColumn(colname,
                               type=arrow_to_vdtype(col.type),
-                              source=col)
+                              source=col,
+                              cache=(col.type.id == pa.lib.Type_LARGE_STRING))
             self.addColumn(c)
         for i in range(self.tbl.num_rows):
             yield dict(__rownum__=i)
+@VisiData.api
+def save_parquet(vd, p, sheet):
+    pa = vd.importExternal("pyarrow")
+    pq = vd.importExternal("pyarrow.parquet", "pyarrow")
+    typemap = {
+        anytype: pa.string(),
+        int: pa.int64(),
+        vlen: pa.int64(),
+        float: pa.float64(),
+        str: pa.string(),
+        date: pa.date64(),
+        # list: pa.array(),
+    }
+    for t in vd.numericTypes:
+        if t not in typemap:
+            typemap[t] = pa.float64()
+    databycol = defaultdict(list)  # col -> [values]
+    for typedvals in sheet.iterdispvals(format=False):
+        for col, val in typedvals.items():
+            if isinstance(val, TypedWrapper):
+                val = None
+            databycol[col].append(val)
+    data = [
+        pa.array(vals, type=typemap.get(col.type, pa.string()))
+        for col, vals in databycol.items()
+    ]
+    schema = pa.schema(
+        [(c.name, typemap.get(c.type, pa.string())) for c in sheet.visibleCols]
+    )
+    with p.open_bytes(mode="w") as outf:
+        with pq.ParquetWriter(outf, schema) as writer:
+            writer.write_batch(
+                pa.record_batch(data, names=[c.name for c in sheet.visibleCols])
+            )

visidata/loaders/pcap.py CHANGED Viewed

@@ -17,7 +17,7 @@ services = {}  # [('tcp', 25)] -> 'smtp'
 @VisiData.api
 def open_pcap(vd, p):
-    return PcapSheet(p.name, source=p)
+    return PcapSheet(p.base_stem, source=p)
 open_cap = open_pcap
 open_pcapng = open_pcap
@@ -83,8 +83,8 @@ def init_pcap():
         return
     global dpkt, dnslib
-    import dpkt
-    import dnslib
+    dpkt = vd.importExternal('dpkt')
+    dnslib = vd.importExternal('dnslib')
     load_consts(protocols['ethernet'], dpkt.ethernet, 'ETH_TYPE_')
     load_consts(protocols['ip'], dpkt.ip, 'IP_PROTO_')

visidata/loaders/pdf.py CHANGED Viewed

@@ -8,8 +8,8 @@ vd.option('pdf_tables', False, 'parse PDF for tables instead of pages of text',
 @VisiData.api
 def open_pdf(vd, p):
     if vd.options.pdf_tables:
-        return TabulaSheet(p.name, source=p)
-    return PdfMinerSheet(p.name, source=p)
+        return TabulaSheet(p.base_stem, source=p)
+    return PdfMinerSheet(p.base_stem, source=p)
 class PdfMinerSheet(TableSheet):
@@ -20,6 +20,7 @@ class PdfMinerSheet(TableSheet):
         ColumnItem('contents', 2),
     ]
     def iterload(self):
+        vd.importExternal('pdfminer.high_level', 'pdfminer.six')
         import pdfminer.high_level
         from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
         from pdfminer.converter import TextConverter, PDFPageAggregator
@@ -38,6 +39,6 @@ class PdfMinerSheet(TableSheet):
 class TabulaSheet(IndexSheet):
     def iterload(self):
-        import tabula
+        tabula = vd.importExternal('tabula')
         for i, t in enumerate(tabula.read_pdf(self.source, pages='all', multiple_tables=True)):
             yield PandasSheet(self.source.name, i, source=t)

visidata/loaders/png.py CHANGED Viewed

@@ -1,32 +1,24 @@
 import functools
-from visidata import VisiData, Sheet, Column, Progress, colors, ColumnItem, Canvas, asyncthread
+from visidata import VisiData, Sheet, Column, Progress, colors, ColumnItem, Canvas, asyncthread, vd, rgb_to_attr
 @VisiData.api
 def open_png(vd, p):
-    return PNGSheet(p.name, source=p)
+    return PNGSheet(p.base_stem, source=p)
-@functools.lru_cache(256)
-def rgb_to_attr(r,g,b,a):
-    if a == 0: return 0
-    if r > g and r > b: return colors['red']
-    if g > r and g > b: return colors['green']
-    if b > r and b > g: return colors['blue']
-    if a == 255: return colors['white']
-    return 0
 class PNGSheet(Sheet):
     rowtype = 'pixels'  # rowdef: list(x, y, r, g, b, a)
     columns = [ColumnItem(name, i, type=int) for i, name in enumerate('x y R G B A'.split())] + [
-        Column('attr', type=int, getter=lambda col,row: rgb_to_attr(*row[2:]))
+        Column('attr', getter=lambda col,row: rgb_to_attr(*row[2:]))
     ]
     nKeys = 2
     def newRow(self):
         return list((None, None, 0, 0, 0, 0))
     def iterload(self):
-        import png
+        png = vd.importExternal('png', 'pypng')
         self.png = png.Reader(bytes=self.source.read_bytes())
         self.width, self.height, pixels, md = self.png.asRGBA()
         for y, row in enumerate(pixels):
@@ -78,7 +70,7 @@ def save_png(vd, p, vs):
     vd.status('saving %sx%s' % (vs.width, vs.height))
-    import png
+    vd.importExternal('png', 'pypng')
     img = png.from_array(pixels, mode='RGBA')
     with open(p, 'wb') as fp:
         img.write(fp)
@@ -86,4 +78,18 @@ def save_png(vd, p, vs):
     vd.status('saved')
+def blockchar(i:int):
+    '''1   8    into   1  2
+       2  16
+       4  32           4  8
+      64 128
+    '''
+    UL = bool(i & 1 or i & 2)
+    UR = bool(i & 8 or i & 16)
+    LL = bool(i & 4 or i & 64)
+    LR = bool(i & 32 or i & 128)
+    return ' ▘▝▀▖▌▞▛▗▚▐▜▄▙▟█'[UL*1+UR*2+LL*4+LR*8]
+PNGDrawing.options.disp_canvas_charset = ''.join(blockchar(i) for i in range(256))
 PNGSheet.addCommand('.', 'plot-sheet', 'vd.push(PNGDrawing(name+"_plot", source=sheet, sourceRows=rows))', 'plot this png')

visidata/loaders/postgres.py CHANGED Viewed

@@ -1,13 +1,14 @@
 import random
+from urllib.parse import urlparse
-from visidata import VisiData, vd, Sheet, options, anytype, urlparse, asyncthread, ColumnItem
+from visidata import VisiData, vd, Sheet, options, anytype, asyncthread, ColumnItem
 __all__ = ['openurl_postgres', 'openurl_postgresql', 'openurl_rds', 'PgTable', 'PgTablesSheet']
 vd.option('postgres_schema', 'public', 'The desired schema for the Postgres database')
 def codeToType(type_code, colname):
-    import psycopg2
+    psycopg2 = vd.importExternal('psycopg2', 'psycopg2-binary')
     try:
         tname = psycopg2._psycopg.string_types[type_code].name
         if 'INTEGER' in tname:
@@ -21,8 +22,8 @@ def codeToType(type_code, colname):
 @VisiData.api
 def openurl_rds(vd, url, filetype=None):
-    import boto3
-    import psycopg2
+    boto3 = vd.importExternal('boto3')
+    psycopg2 = vd.importExternal('psycopg2', 'psycopg2-binary')
     rds = boto3.client('rds')
     url = urlparse(url.given)
@@ -42,7 +43,7 @@ def openurl_rds(vd, url, filetype=None):
 @VisiData.api
 def openurl_postgres(vd, url, filetype=None):
-    import psycopg2
+    psycopg2 = vd.importExternal('psycopg2', 'psycopg2-binary')
     url = urlparse(url.given)
     dbname = url.path[1:]
@@ -87,7 +88,7 @@ def postgresGetColumns(vd, cur):
 class PgTablesSheet(Sheet):
     rowtype = 'tables'
-    def reload(self):
+    def loader(self):
         schema = options.postgres_schema
         qstr = f'''
             SELECT relname table_name, column_count.ncols, reltuples::bigint est_nrows
@@ -122,9 +123,9 @@ class PgTable(Sheet):
     @asyncthread
     def reload(self):
         if self.options.postgres_schema:
-            source = f"{self.options.postgres_schema}.{self.source}"
+            source = f'"{self.options.postgres_schema}"."{self.source}"'
         else:
-            source = self.source
+            source = f'"{self.source}"'
         with self.sql.cur(f"SELECT * FROM {source}") as cur:
             self.rows = []
             r = cur.fetchone()

visidata/loaders/rec.py CHANGED Viewed

@@ -5,7 +5,7 @@ from visidata import VisiData, vd, Progress, TableSheet, IndexSheet, ItemColumn,
 @VisiData.api
 def open_rec(vd, p):
-    return RecIndexSheet(p.name, source=p)
+    return RecIndexSheet(p.base_stem, source=p)
 def decode_multiline(line, fp):
     'Parse *line* and lookahead into *fp* as iterator for continuing lines.  Return (multiline, next_line) where *multiline* can contain newlines and *next_line is the line after the combined *multiline*.  Handle "\\" at end and "+" at beginning of lines.  *next_line* will be None iff iterator is exhausted.'
@@ -49,6 +49,7 @@ class RecIndexSheet(IndexSheet):
         fp = iter(self.source)
         while next_line is not None:
+          try:
             line, next_line = decode_multiline(next_line, fp)
             line = line.lstrip()
@@ -62,6 +63,7 @@ class RecIndexSheet(IndexSheet):
             if not sheet or (newRecord and line[0] == '%'):
                 sheet = RecSheet('', columns=[], rows=[], source=self, comments=comments)
+                sheet.columns = []
                 comments = []
                 yield sheet
                 newRecord = False
@@ -81,7 +83,7 @@ class RecIndexSheet(IndexSheet):
                         if colname not in sheet.colnames:
                             sheet.addColumn(ItemColumn(colname, keycol=i+1))
                 elif desc in ['sort']:
-                    sheet.orderBy([sheet.column(colname) for colname in rest.split()])
+                    sheet._ordering = [(colname, False) for colname in rest.split()]
                 elif desc in ['type', 'typedef']:
                     pass
                 elif desc in ['auto']:  # autoincrement columns should be present already
@@ -112,6 +114,8 @@ class RecIndexSheet(IndexSheet):
                     row[name].append(rest)
                 else:
                     row[name] = rest
+          except Exception as e:
+              vd.exceptionCaught(e)
         for sheet in Progress(self.rows):
             sheet.sort()
@@ -119,7 +123,7 @@ class RecIndexSheet(IndexSheet):
 @VisiData.api
 def save_rec(vd, p, *vsheets):
-    with p.open_text(mode='w') as fp:
+    with p.open(mode='w') as fp:
         for vs in vsheets:
             comments = getattr(vs, 'comments', [])
             if comments:

visidata/loaders/s3.py ADDED Viewed

@@ -0,0 +1,342 @@
+"""Allow VisiData to work directly with Amazon S3 paths.
+Functionality is more limited than local paths, but supports:
+* Navigating among directories (S3 prefixes)
+* Opening supported filetypes, including compressed files
+* Versioned buckets
+"""
+import textwrap
+from visidata import (
+    ENTER,
+    Column,
+    ItemColumn,
+    Path,
+    Sheet,
+    VisiData,
+    asyncthread,
+    date,
+    vd,
+)
+vd.option(
+    "s3_endpoint",
+    "",
+    "alternate S3 endpoint, used for local testing or alternative S3-compatible services",
+    replay=True,
+)
+vd.option("s3_glob", True, "enable glob-matching for S3 paths", replay=True)
+vd.option(
+    "s3_version_aware",
+    False,
+    "show all object versions in a versioned bucket",
+    replay=True,
+)
+class S3Path(Path):
+    """A Path-like object representing an S3 file (object) or directory (prefix)."""
+    _fs = None
+    def __init__(self, path, version_aware=None, version_id=None):
+        super().__init__(path)
+        self.given = path
+        self.version_aware = version_aware or vd.options.s3_version_aware
+        self.version_id = self.version_aware and version_id or None
+    @property
+    def fs(self):
+        if self._fs is None:
+            s3fs_core = vd.importExternal("s3fs.core", "s3fs")
+            self._fs = s3fs_core.S3FileSystem(
+                client_kwargs={"endpoint_url": vd.options.s3_endpoint or None},
+                version_aware=self.version_aware,
+            )
+        return self._fs
+    @fs.setter
+    def fs(self, val):
+        self._fs = val
+    def open(self, mode='r', **kwargs):
+        """Open the current S3 path, decompressing along the way if needed."""
+        fp = self.fs.open(self.given, mode="rb" if self.compression else mode, version_id=self.version_id)
+        # Workaround for https://github.com/ajkerrigan/visidata-plugins/issues/12
+        if hasattr(fp, "cache") and fp.cache.size != fp.size:
+            vd.debug(
+                f"updating cache size from {fp.cache.size} to {fp.size} to match object size"
+            )
+            fp.cache.size = fp.size
+        if self.compression == "gz":
+            import gzip
+            return gzip.open(fp, mode, **kwargs)
+        if self.compression == "bz2":
+            import bz2
+            return bz2.open(fp, mode, **kwargs)
+        if self.compression == "xz":
+            import lzma
+            return lzma.open(fp, mode, **kwargs)
+        return fp
+class S3DirSheet(Sheet):
+    """Display a listing of files and directories (objects and prefixes) in an S3 path.
+    Allow single or multiple entries to be opened in separate sheets.
+    """
+    columns = [
+        Column("name", getter=lambda col, row: col.sheet.object_display_name(row)),
+        ItemColumn("type"),
+        ItemColumn("size", type=int),
+        ItemColumn("modtime", "LastModified", type=date),
+        ItemColumn("latest", "IsLatest", type=bool),
+        ItemColumn("version_id", "VersionId", type=str, width=0),
+    ]
+    def __init__(self, name, source, version_aware=None):
+        import re
+        super().__init__(name=name, source=source)
+        self.rowtype = "files"
+        self.nKeys = 1
+        self.use_glob_matching = vd.options.s3_glob and re.search(
+            r"[*?\[\]]", self.source.given
+        )
+        self.version_aware = (
+            vd.options.s3_version_aware if version_aware is None else version_aware
+        )
+        self.fs = source.fs
+    def object_display_name(self, row):
+        """Provide a friendly display name for an S3 path.
+        When listing the contents of a single S3 prefix, the name can chop off
+        prefix bits to imitate a directory browser. When glob matching,
+        include the full key name for each entry.
+        """
+        return (
+            row.get("name")
+            if self.use_glob_matching
+            else row.get("name").rpartition("/")[2]
+        )
+    def iterload(self):
+        """Delegate to the underlying filesystem to fetch S3 entries."""
+        list_func = self.fs.glob if self.use_glob_matching else self.fs.ls
+        if not (
+            self.use_glob_matching
+            or self.fs.exists(self.source.given)
+            or self.fs.isdir(self.source.given)
+        ):
+            vd.fail(f"unable to open S3 path: {self.source.given}")
+        if self.version_aware:
+            self.column("latest").hide(False)
+        else:
+            self.column("latest").hide(True)
+        for key in list_func(str(self.source)):
+            if self.version_aware and self.fs.isfile(key):
+                yield from (
+                    {**obj_version, "name": key, "type": "file"}
+                    for obj_version in self.fs.object_version_info(key)
+                    if key.partition("/")[2] == obj_version["Key"]
+                )
+            else:
+                yield self.fs.stat(key)
+    @asyncthread
+    def download(self, rows, savepath):
+        """Download files and directories to a local path.
+        Recurse through through subdirectories.
+        """
+        remote_files = [row["name"] for row in rows]
+        self.fs.download(remote_files, str(savepath), recursive=True)
+    def open_rows(self, rows):
+        """Open new sheets for the target rows."""
+        return (
+            vd.openSource(
+                S3Path(
+                    "s3://{}".format(row["name"]),
+                    version_aware=self.version_aware,
+                    version_id=row.get("VersionId"),
+                )
+            )
+            for row in rows
+        )
+    def join_rows(self, rows):
+        """Open new sheets for the target rows and concatenate their contents."""
+        sheets = list(self.open_rows(rows))
+        for sheet in vd.Progress(sheets):
+            sheet.reload()
+        # Wait for all sheets to fully load before joining them.
+        # 'append' is the only join type that makes sense here,
+        # since we're joining freshly opened sheets with no key
+        # columns.
+        vd.sync()
+        return sheets[0].openJoin(sheets[1:], jointype="append")
+    def refresh_path(self, path=None):
+        """Clear the s3fs cache for the given path and reload.
+        By default, clear the entire cache.
+        """
+        self.fs.invalidate_cache(path)
+        self.reload()
+    def toggle_versioning(self):
+        """Enable or disable support for S3 versioning."""
+        self.version_aware = not self.version_aware
+        self.fs.version_aware = self.version_aware
+        vd.status(f's3 versioning {"enabled" if self.version_aware else "disabled"}')
+        if self.currentThreads:
+            vd.debug("cancelling threads before reloading")
+            vd.cancelThread(*self.currentThreads)
+        self.reload()
+@VisiData.api
+def openurl_s3(vd, p, filetype):
+    """Open a sheet for an S3 path.
+    S3 directories (prefixes) require special handling, but files (objects)
+    can use standard VisiData "open" functions.
+    """
+    # Non-obvious behavior here: For the default case, we don't want to send
+    # a custom endpoint to s3fs. However, using None as a default trips up
+    # VisiData's type detection for the endpoint option. So we use an empty
+    # string as the default instead, and convert back to None here.
+    endpoint = vd.options.s3_endpoint or None
+    p = S3Path(
+        str(p.given),
+        version_aware=getattr(p, "version_aware", vd.options.s3_version_aware),
+        version_id=getattr(p, "version_id", None),
+    )
+    p.fs.version_aware = p.version_aware
+    if p.fs.client_kwargs.get("endpoint_url", "") != endpoint:
+        p.fs.client_kwargs = {"endpoint_url": endpoint}
+        p.fs.connect()
+    if not p.fs.isfile(str(p.given)):
+        return S3DirSheet(p.base_stem, source=p, version_aware=p.version_aware)
+    if not filetype:
+        filetype = p.ext or "txt"
+    openfunc = getattr(vd, f"open_{filetype.lower()}")
+    if not openfunc:
+        vd.warning(f"no loader found for {filetype} files, falling back to txt")
+        filetype = "txt"
+        openfunc = vd.open_txt
+    assert callable(openfunc), f"no function/method available to open {p.given}"
+    vs = openfunc(p)
+    vd.status(
+        f'opening {p.given} as {filetype} (version id: {p.version_id or "latest"})'
+    )
+    return vs
+S3DirSheet.addCommand(
+    ENTER,
+    "s3-open-row",
+    "vd.push(next(sheet.open_rows([cursorRow])))",
+    "open the current S3 entry",
+)
+S3DirSheet.addCommand(
+    "g" + ENTER,
+    "s3-open-rows",
+    "for vs in sheet.open_rows(selectedRows): vd.push(vs)",
+    "open all selected S3 entries",
+)
+S3DirSheet.addCommand(
+    "z^R",
+    "s3-refresh-sheet",
+    "sheet.refresh_path(str(sheet.source))",
+    "clear the s3fs cache for this path, then reload",
+)
+S3DirSheet.addCommand(
+    "gz^R",
+    "s3-refresh-sheet-all",
+    "sheet.refresh_path()",
+    "clear the entire s3fs cache, then reload",
+)
+S3DirSheet.addCommand(
+    "^V",
+    "s3-toggle-versioning",
+    "sheet.toggle_versioning()",
+    "enable/disable support for S3 versioning",
+)
+S3DirSheet.addCommand(
+    "&",
+    "s3-join-rows",
+    "vd.push(sheet.join_rows(selectedRows))",
+    "open and join sheets for selected S3 entries",
+)
+S3DirSheet.addCommand(
+    "gx",
+    "s3-download-rows",
+    textwrap.dedent(
+        """
+        savepath = inputPath("download selected rows to: ", value=".")
+        sheet.download(selectedRows, savepath)
+    """
+    ),
+    "download selected files and directories",
+)
+S3DirSheet.addCommand(
+    "x",
+    "s3-download-row",
+    # Note about the use of `_path.name` here. Given a `visidata.Path`
+    # object `path`, `path._path` is a `pathlib.Path` object.
+    #
+    # `visidata.Path` objects do some fun parsing to pick out
+    # file types and extensions, handle compression transparently,
+    # etc. That parsing leaves the `name` attribute without a file
+    # extension, and makes it a little tricky to tack back on.
+    #
+    # `pathlib.Path` objects have a `name` with the extension intact.
+    # That makes `path._path.name` a convenient default output path.
+    textwrap.dedent(
+        """
+        savepath = inputPath("download to: ", value=Path(cursorRow["name"])._path.name)
+        sheet.download([cursorRow], savepath)
+    """
+    ),
+    "download the file or directory in the cursor row",
+)
+vd.addMenuItems(
+    """
+    File > Toggle versioning > s3-toggle-versioning
+    File > Refresh > Current path > s3-refresh-sheet
+    File > Refresh > All > s3-refresh-sheet-all
+    Row > Download > Current row > s3-download-row
+    Row > Download > Selected rows > s3-download-rows
+    Data > Join > Selected rows > s3-join-rows
+"""
+)
+vd.addGlobals(S3DirSheet=S3DirSheet)

visidata/loaders/sas.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import logging
-from visidata import VisiData, Sheet, Progress, ColumnItem, anytype
+from visidata import VisiData, Sheet, Progress, ColumnItem, anytype, vd
 SASTypes = {
     'string': str,
@@ -9,15 +9,15 @@ SASTypes = {
 @VisiData.api
 def open_xpt(vd, p):
-    return XptSheet(p.name, source=p)
+    return XptSheet(p.base_stem, source=p)
 @VisiData.api
 def open_sas7bdat(vd, p):
-    return SasSheet(p.name, source=p)
+    return SasSheet(p.base_stem, source=p)
 class XptSheet(Sheet):
     def iterload(self):
-        import xport
+        xport = vd.importExternal('xport')
         with open(self.source, 'rb') as fp:
             self.rdr = xport.Reader(fp)
@@ -30,7 +30,7 @@ class XptSheet(Sheet):
 class SasSheet(Sheet):
     def iterload(self):
-        import sas7bdat
+        sas7bdat = vd.importExternal('sas7bdat')
         self.dat = sas7bdat.SAS7BDAT(str(self.source), skip_header=True, log_level=logging.CRITICAL)
         self.columns = []
         for col in self.dat.columns:

visidata 2.11.dev0__py3-none-any.whl → 3.0__py3-none-any.whl

visidata 2.11.dev0py3-none-any.whl → 3.0py3-none-any.whl