visidata 2.11.dev0__py3-none-any.whl → 3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- visidata/__init__.py +72 -91
- visidata/_input.py +263 -44
- visidata/_open.py +84 -29
- visidata/_types.py +22 -4
- visidata/_urlcache.py +17 -4
- visidata/aggregators.py +65 -25
- visidata/apps/__init__.py +0 -0
- visidata/apps/vdsql/__about__.py +8 -0
- visidata/apps/vdsql/__init__.py +5 -0
- visidata/apps/vdsql/__main__.py +27 -0
- visidata/apps/vdsql/_ibis.py +748 -0
- visidata/apps/vdsql/bigquery.py +61 -0
- visidata/apps/vdsql/clickhouse.py +53 -0
- visidata/apps/vdsql/setup.py +40 -0
- visidata/apps/vdsql/snowflake.py +67 -0
- visidata/apps/vgit/__init__.py +13 -0
- visidata/apps/vgit/__main__.py +3 -0
- visidata/apps/vgit/abort.py +23 -0
- visidata/apps/vgit/blame.py +76 -0
- visidata/apps/vgit/branch.py +153 -0
- visidata/apps/vgit/config.py +95 -0
- visidata/apps/vgit/diff.py +169 -0
- visidata/apps/vgit/gitsheet.py +161 -0
- visidata/apps/vgit/grep.py +37 -0
- visidata/apps/vgit/log.py +81 -0
- visidata/apps/vgit/main.py +55 -0
- visidata/apps/vgit/remote.py +57 -0
- visidata/apps/vgit/repos.py +71 -0
- visidata/apps/vgit/setup.py +37 -0
- visidata/apps/vgit/stash.py +69 -0
- visidata/apps/vgit/status.py +204 -0
- visidata/apps/vgit/statusbar.py +34 -0
- visidata/basesheet.py +59 -50
- visidata/canvas.py +251 -99
- visidata/choose.py +15 -11
- visidata/clean_names.py +29 -0
- visidata/clipboard.py +84 -18
- visidata/cliptext.py +220 -46
- visidata/cmdlog.py +89 -114
- visidata/color.py +142 -56
- visidata/column.py +134 -131
- visidata/ddw/input.ddw +74 -79
- visidata/ddw/regex.ddw +57 -0
- visidata/ddwplay.py +33 -14
- visidata/deprecated.py +77 -3
- visidata/desktop/visidata.desktop +7 -0
- visidata/editor.py +12 -6
- visidata/errors.py +5 -1
- visidata/experimental/__init__.py +0 -0
- visidata/experimental/diff_sheet.py +29 -0
- visidata/experimental/digit_autoedit.py +6 -0
- visidata/experimental/gdrive.py +89 -0
- visidata/experimental/google.py +37 -0
- visidata/experimental/gsheets.py +79 -0
- visidata/experimental/live_search.py +37 -0
- visidata/experimental/liveupdate.py +45 -0
- visidata/experimental/mark.py +133 -0
- visidata/experimental/noahs_tapestry/__init__.py +1 -0
- visidata/experimental/noahs_tapestry/tapestry.py +147 -0
- visidata/experimental/rownum.py +73 -0
- visidata/experimental/slide_cells.py +26 -0
- visidata/expr.py +8 -4
- visidata/extensible.py +32 -6
- visidata/features/__init__.py +0 -0
- visidata/features/addcol_audiometadata.py +42 -0
- visidata/features/addcol_histogram.py +34 -0
- visidata/features/canvas_save_svg.py +69 -0
- visidata/features/change_precision.py +46 -0
- visidata/features/cmdpalette.py +163 -0
- visidata/features/colorbrewer.py +363 -0
- visidata/{colorsheet.py → features/colorsheet.py} +17 -16
- visidata/features/command_server.py +105 -0
- visidata/features/currency_to_usd.py +70 -0
- visidata/{customdate.py → features/customdate.py} +2 -0
- visidata/features/dedupe.py +132 -0
- visidata/{describe.py → features/describe.py} +17 -15
- visidata/features/errors_guide.py +26 -0
- visidata/features/expand_cols.py +202 -0
- visidata/{fill.py → features/fill.py} +4 -2
- visidata/{freeze.py → features/freeze.py} +11 -6
- visidata/features/graph_seaborn.py +79 -0
- visidata/features/helloworld.py +10 -0
- visidata/features/hint_types.py +17 -0
- visidata/{incr.py → features/incr.py} +5 -0
- visidata/{join.py → features/join.py} +107 -53
- visidata/features/known_cols.py +21 -0
- visidata/features/layout.py +62 -0
- visidata/{melt.py → features/melt.py} +33 -21
- visidata/features/normcol.py +118 -0
- visidata/features/open_config.py +7 -0
- visidata/features/open_syspaste.py +18 -0
- visidata/features/ping.py +157 -0
- visidata/features/procmgr.py +208 -0
- visidata/features/random_sample.py +6 -0
- visidata/{regex.py → features/regex.py} +47 -31
- visidata/features/reload_every.py +55 -0
- visidata/features/rename_col_cascade.py +30 -0
- visidata/features/scroll_context.py +60 -0
- visidata/features/select_equal_selected.py +11 -0
- visidata/features/setcol_fake.py +65 -0
- visidata/{slide.py → features/slide.py} +75 -21
- visidata/features/sparkline.py +48 -0
- visidata/features/status_source.py +20 -0
- visidata/{sysedit.py → features/sysedit.py} +2 -1
- visidata/features/sysopen_mailcap.py +46 -0
- visidata/features/term_extras.py +13 -0
- visidata/{transpose.py → features/transpose.py} +5 -4
- visidata/features/type_ipaddr.py +73 -0
- visidata/features/type_url.py +11 -0
- visidata/{unfurl.py → features/unfurl.py} +9 -9
- visidata/{window.py → features/window.py} +2 -2
- visidata/form.py +50 -21
- visidata/freqtbl.py +81 -33
- visidata/fuzzymatch.py +414 -0
- visidata/graph.py +105 -33
- visidata/guide.py +180 -0
- visidata/help.py +75 -44
- visidata/hint.py +39 -0
- visidata/indexsheet.py +109 -0
- visidata/input_history.py +55 -0
- visidata/interface.py +58 -0
- visidata/keys.py +17 -16
- visidata/loaders/__init__.py +9 -0
- visidata/loaders/_pandas.py +61 -21
- visidata/loaders/api_airtable.py +70 -0
- visidata/loaders/api_bitio.py +102 -0
- visidata/loaders/api_matrix.py +148 -0
- visidata/loaders/api_reddit.py +306 -0
- visidata/loaders/api_zulip.py +249 -0
- visidata/loaders/archive.py +41 -7
- visidata/loaders/arrow.py +7 -7
- visidata/loaders/conll.py +49 -0
- visidata/loaders/csv.py +25 -7
- visidata/loaders/eml.py +3 -4
- visidata/loaders/f5log.py +1204 -0
- visidata/loaders/fec.py +325 -0
- visidata/loaders/fixed_width.py +3 -5
- visidata/loaders/frictionless.py +3 -3
- visidata/loaders/geojson.py +8 -5
- visidata/loaders/google.py +48 -0
- visidata/loaders/graphviz.py +4 -4
- visidata/loaders/hdf5.py +4 -4
- visidata/loaders/html.py +48 -10
- visidata/loaders/http.py +84 -30
- visidata/loaders/imap.py +20 -10
- visidata/loaders/jrnl.py +52 -0
- visidata/loaders/json.py +83 -29
- visidata/loaders/jsonla.py +74 -0
- visidata/loaders/lsv.py +15 -11
- visidata/loaders/mailbox.py +40 -0
- visidata/loaders/markdown.py +1 -3
- visidata/loaders/mbtiles.py +4 -5
- visidata/loaders/mysql.py +11 -13
- visidata/loaders/npy.py +7 -7
- visidata/loaders/odf.py +4 -1
- visidata/loaders/orgmode.py +428 -0
- visidata/loaders/pandas_freqtbl.py +14 -20
- visidata/loaders/parquet.py +62 -6
- visidata/loaders/pcap.py +3 -3
- visidata/loaders/pdf.py +4 -3
- visidata/loaders/png.py +19 -13
- visidata/loaders/postgres.py +9 -8
- visidata/loaders/rec.py +7 -3
- visidata/loaders/s3.py +342 -0
- visidata/loaders/sas.py +5 -5
- visidata/loaders/scrape.py +186 -0
- visidata/loaders/shp.py +6 -5
- visidata/loaders/spss.py +5 -6
- visidata/loaders/sqlite.py +68 -28
- visidata/loaders/texttables.py +1 -1
- visidata/loaders/toml.py +60 -0
- visidata/loaders/tsv.py +61 -19
- visidata/loaders/ttf.py +19 -7
- visidata/loaders/unzip_http.py +6 -5
- visidata/loaders/usv.py +1 -1
- visidata/loaders/vcf.py +16 -16
- visidata/loaders/vds.py +10 -7
- visidata/loaders/vdx.py +30 -5
- visidata/loaders/xlsb.py +8 -1
- visidata/loaders/xlsx.py +145 -25
- visidata/loaders/xml.py +6 -3
- visidata/loaders/xword.py +4 -4
- visidata/loaders/yaml.py +15 -5
- visidata/macos.py +1 -1
- visidata/macros.py +130 -41
- visidata/main.py +119 -94
- visidata/mainloop.py +101 -154
- visidata/man/parse_options.py +2 -2
- visidata/man/vd.1 +302 -147
- visidata/man/vd.txt +291 -151
- visidata/memory.py +3 -3
- visidata/menu.py +104 -423
- visidata/metasheets.py +59 -141
- visidata/modify.py +79 -23
- visidata/motd.py +3 -3
- visidata/mouse.py +137 -0
- visidata/movement.py +43 -35
- visidata/optionssheet.py +99 -0
- visidata/path.py +131 -43
- visidata/pivot.py +74 -47
- visidata/plugins.py +65 -192
- visidata/pyobj.py +50 -201
- visidata/rename_col.py +20 -0
- visidata/save.py +42 -20
- visidata/search.py +54 -10
- visidata/selection.py +84 -5
- visidata/settings.py +162 -24
- visidata/sheets.py +229 -257
- visidata/shell.py +51 -21
- visidata/sidebar.py +162 -0
- visidata/sort.py +11 -4
- visidata/statusbar.py +113 -104
- visidata/stored_list.py +43 -0
- visidata/stored_prop.py +38 -0
- visidata/tests/conftest.py +3 -3
- visidata/tests/test_cliptext.py +39 -0
- visidata/tests/test_commands.py +62 -7
- visidata/tests/test_edittext.py +2 -2
- visidata/tests/test_features.py +17 -0
- visidata/tests/test_menu.py +14 -0
- visidata/tests/test_path.py +13 -4
- visidata/text_source.py +53 -0
- visidata/textsheet.py +10 -3
- visidata/theme.py +44 -0
- visidata/themes/__init__.py +0 -0
- visidata/themes/ascii8.py +84 -0
- visidata/themes/asciimono.py +84 -0
- visidata/themes/light.py +17 -0
- visidata/threads.py +87 -39
- visidata/tuiwin.py +22 -0
- visidata/type_currency.py +22 -3
- visidata/type_date.py +31 -9
- visidata/type_floatsi.py +5 -1
- visidata/undo.py +18 -6
- visidata/utils.py +106 -23
- visidata/vdobj.py +28 -17
- visidata/windows.py +10 -0
- visidata/wrappers.py +9 -3
- visidata-3.0.data/data/share/applications/visidata.desktop +7 -0
- {visidata-2.11.dev0.data → visidata-3.0.data}/data/share/man/man1/vd.1 +302 -147
- {visidata-2.11.dev0.data → visidata-3.0.data}/data/share/man/man1/visidata.1 +302 -147
- visidata-3.0.data/scripts/vd2to3.vdx +9 -0
- {visidata-2.11.dev0.dist-info → visidata-3.0.dist-info}/METADATA +13 -11
- visidata-3.0.dist-info/RECORD +257 -0
- {visidata-2.11.dev0.dist-info → visidata-3.0.dist-info}/WHEEL +1 -1
- {visidata-2.11.dev0.dist-info → visidata-3.0.dist-info}/entry_points.txt +0 -1
- visidata/layout.py +0 -44
- visidata/misc.py +0 -5
- visidata-2.11.dev0.dist-info/RECORD +0 -142
- /visidata/{repeat.py → features/repeat.py} +0 -0
- {visidata-2.11.dev0.data → visidata-3.0.data}/scripts/vd +0 -0
- {visidata-2.11.dev0.dist-info → visidata-3.0.dist-info}/LICENSE.gpl3 +0 -0
- {visidata-2.11.dev0.dist-info → visidata-3.0.dist-info}/top_level.txt +0 -0
visidata/loaders/parquet.py
CHANGED
@@ -1,28 +1,84 @@
|
|
1
|
-
from visidata import VisiData,
|
1
|
+
from visidata import Sheet, VisiData, TypedWrapper, anytype, date, vlen, Column, vd
|
2
|
+
from collections import defaultdict
|
2
3
|
|
3
4
|
|
4
5
|
@VisiData.api
|
5
6
|
def open_parquet(vd, p):
|
6
|
-
return ParquetSheet(p.
|
7
|
+
return ParquetSheet(p.base_stem, source=p)
|
8
|
+
|
7
9
|
|
8
10
|
class ParquetColumn(Column):
|
9
11
|
def calcValue(self, row):
|
10
|
-
|
12
|
+
val = self.source[row["__rownum__"]]
|
13
|
+
if val.type == 'large_string':
|
14
|
+
return memoryview(val.as_buffer())[:2**20].tobytes().decode('utf-8')
|
15
|
+
else:
|
16
|
+
return val.as_py()
|
11
17
|
|
12
18
|
|
13
19
|
class ParquetSheet(Sheet):
|
14
20
|
# rowdef: {'__rownum__':int, parquet_col:overridden_value, ...}
|
15
21
|
def iterload(self):
|
16
|
-
|
22
|
+
pa = vd.importExternal("pyarrow", "pyarrow")
|
23
|
+
pq = vd.importExternal("pyarrow.parquet", "pyarrow")
|
17
24
|
from visidata.loaders.arrow import arrow_to_vdtype
|
18
25
|
|
19
|
-
self.
|
26
|
+
if self.source.is_dir():
|
27
|
+
self.tbl = pq.read_table(str(self.source))
|
28
|
+
else:
|
29
|
+
with self.source.open('rb') as f:
|
30
|
+
self.tbl = pq.read_table(f)
|
31
|
+
|
20
32
|
self.columns = []
|
21
33
|
for colname, col in zip(self.tbl.column_names, self.tbl.columns):
|
22
34
|
c = ParquetColumn(colname,
|
23
35
|
type=arrow_to_vdtype(col.type),
|
24
|
-
source=col
|
36
|
+
source=col,
|
37
|
+
cache=(col.type.id == pa.lib.Type_LARGE_STRING))
|
25
38
|
self.addColumn(c)
|
26
39
|
|
27
40
|
for i in range(self.tbl.num_rows):
|
28
41
|
yield dict(__rownum__=i)
|
42
|
+
|
43
|
+
|
44
|
+
@VisiData.api
|
45
|
+
def save_parquet(vd, p, sheet):
|
46
|
+
pa = vd.importExternal("pyarrow")
|
47
|
+
pq = vd.importExternal("pyarrow.parquet", "pyarrow")
|
48
|
+
|
49
|
+
typemap = {
|
50
|
+
anytype: pa.string(),
|
51
|
+
int: pa.int64(),
|
52
|
+
vlen: pa.int64(),
|
53
|
+
float: pa.float64(),
|
54
|
+
str: pa.string(),
|
55
|
+
date: pa.date64(),
|
56
|
+
# list: pa.array(),
|
57
|
+
}
|
58
|
+
|
59
|
+
for t in vd.numericTypes:
|
60
|
+
if t not in typemap:
|
61
|
+
typemap[t] = pa.float64()
|
62
|
+
|
63
|
+
databycol = defaultdict(list) # col -> [values]
|
64
|
+
|
65
|
+
for typedvals in sheet.iterdispvals(format=False):
|
66
|
+
for col, val in typedvals.items():
|
67
|
+
if isinstance(val, TypedWrapper):
|
68
|
+
val = None
|
69
|
+
|
70
|
+
databycol[col].append(val)
|
71
|
+
|
72
|
+
data = [
|
73
|
+
pa.array(vals, type=typemap.get(col.type, pa.string()))
|
74
|
+
for col, vals in databycol.items()
|
75
|
+
]
|
76
|
+
|
77
|
+
schema = pa.schema(
|
78
|
+
[(c.name, typemap.get(c.type, pa.string())) for c in sheet.visibleCols]
|
79
|
+
)
|
80
|
+
with p.open_bytes(mode="w") as outf:
|
81
|
+
with pq.ParquetWriter(outf, schema) as writer:
|
82
|
+
writer.write_batch(
|
83
|
+
pa.record_batch(data, names=[c.name for c in sheet.visibleCols])
|
84
|
+
)
|
visidata/loaders/pcap.py
CHANGED
@@ -17,7 +17,7 @@ services = {} # [('tcp', 25)] -> 'smtp'
|
|
17
17
|
|
18
18
|
@VisiData.api
|
19
19
|
def open_pcap(vd, p):
|
20
|
-
return PcapSheet(p.
|
20
|
+
return PcapSheet(p.base_stem, source=p)
|
21
21
|
|
22
22
|
open_cap = open_pcap
|
23
23
|
open_pcapng = open_pcap
|
@@ -83,8 +83,8 @@ def init_pcap():
|
|
83
83
|
return
|
84
84
|
|
85
85
|
global dpkt, dnslib
|
86
|
-
|
87
|
-
|
86
|
+
dpkt = vd.importExternal('dpkt')
|
87
|
+
dnslib = vd.importExternal('dnslib')
|
88
88
|
|
89
89
|
load_consts(protocols['ethernet'], dpkt.ethernet, 'ETH_TYPE_')
|
90
90
|
load_consts(protocols['ip'], dpkt.ip, 'IP_PROTO_')
|
visidata/loaders/pdf.py
CHANGED
@@ -8,8 +8,8 @@ vd.option('pdf_tables', False, 'parse PDF for tables instead of pages of text',
|
|
8
8
|
@VisiData.api
|
9
9
|
def open_pdf(vd, p):
|
10
10
|
if vd.options.pdf_tables:
|
11
|
-
return TabulaSheet(p.
|
12
|
-
return PdfMinerSheet(p.
|
11
|
+
return TabulaSheet(p.base_stem, source=p)
|
12
|
+
return PdfMinerSheet(p.base_stem, source=p)
|
13
13
|
|
14
14
|
|
15
15
|
class PdfMinerSheet(TableSheet):
|
@@ -20,6 +20,7 @@ class PdfMinerSheet(TableSheet):
|
|
20
20
|
ColumnItem('contents', 2),
|
21
21
|
]
|
22
22
|
def iterload(self):
|
23
|
+
vd.importExternal('pdfminer.high_level', 'pdfminer.six')
|
23
24
|
import pdfminer.high_level
|
24
25
|
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
|
25
26
|
from pdfminer.converter import TextConverter, PDFPageAggregator
|
@@ -38,6 +39,6 @@ class PdfMinerSheet(TableSheet):
|
|
38
39
|
|
39
40
|
class TabulaSheet(IndexSheet):
|
40
41
|
def iterload(self):
|
41
|
-
|
42
|
+
tabula = vd.importExternal('tabula')
|
42
43
|
for i, t in enumerate(tabula.read_pdf(self.source, pages='all', multiple_tables=True)):
|
43
44
|
yield PandasSheet(self.source.name, i, source=t)
|
visidata/loaders/png.py
CHANGED
@@ -1,32 +1,24 @@
|
|
1
1
|
import functools
|
2
2
|
|
3
|
-
from visidata import VisiData, Sheet, Column, Progress, colors, ColumnItem, Canvas, asyncthread
|
3
|
+
from visidata import VisiData, Sheet, Column, Progress, colors, ColumnItem, Canvas, asyncthread, vd, rgb_to_attr
|
4
4
|
|
5
5
|
|
6
6
|
@VisiData.api
|
7
7
|
def open_png(vd, p):
|
8
|
-
return PNGSheet(p.
|
8
|
+
return PNGSheet(p.base_stem, source=p)
|
9
9
|
|
10
|
-
@functools.lru_cache(256)
|
11
|
-
def rgb_to_attr(r,g,b,a):
|
12
|
-
if a == 0: return 0
|
13
|
-
if r > g and r > b: return colors['red']
|
14
|
-
if g > r and g > b: return colors['green']
|
15
|
-
if b > r and b > g: return colors['blue']
|
16
|
-
if a == 255: return colors['white']
|
17
|
-
return 0
|
18
10
|
|
19
11
|
class PNGSheet(Sheet):
|
20
12
|
rowtype = 'pixels' # rowdef: list(x, y, r, g, b, a)
|
21
13
|
columns = [ColumnItem(name, i, type=int) for i, name in enumerate('x y R G B A'.split())] + [
|
22
|
-
Column('attr',
|
14
|
+
Column('attr', getter=lambda col,row: rgb_to_attr(*row[2:]))
|
23
15
|
]
|
24
16
|
nKeys = 2
|
25
17
|
def newRow(self):
|
26
18
|
return list((None, None, 0, 0, 0, 0))
|
27
19
|
|
28
20
|
def iterload(self):
|
29
|
-
|
21
|
+
png = vd.importExternal('png', 'pypng')
|
30
22
|
self.png = png.Reader(bytes=self.source.read_bytes())
|
31
23
|
self.width, self.height, pixels, md = self.png.asRGBA()
|
32
24
|
for y, row in enumerate(pixels):
|
@@ -78,7 +70,7 @@ def save_png(vd, p, vs):
|
|
78
70
|
|
79
71
|
vd.status('saving %sx%s' % (vs.width, vs.height))
|
80
72
|
|
81
|
-
|
73
|
+
vd.importExternal('png', 'pypng')
|
82
74
|
img = png.from_array(pixels, mode='RGBA')
|
83
75
|
with open(p, 'wb') as fp:
|
84
76
|
img.write(fp)
|
@@ -86,4 +78,18 @@ def save_png(vd, p, vs):
|
|
86
78
|
vd.status('saved')
|
87
79
|
|
88
80
|
|
81
|
+
def blockchar(i:int):
|
82
|
+
'''1 8 into 1 2
|
83
|
+
2 16
|
84
|
+
4 32 4 8
|
85
|
+
64 128
|
86
|
+
'''
|
87
|
+
UL = bool(i & 1 or i & 2)
|
88
|
+
UR = bool(i & 8 or i & 16)
|
89
|
+
LL = bool(i & 4 or i & 64)
|
90
|
+
LR = bool(i & 32 or i & 128)
|
91
|
+
return ' ▘▝▀▖▌▞▛▗▚▐▜▄▙▟█'[UL*1+UR*2+LL*4+LR*8]
|
92
|
+
|
93
|
+
|
94
|
+
PNGDrawing.options.disp_canvas_charset = ''.join(blockchar(i) for i in range(256))
|
89
95
|
PNGSheet.addCommand('.', 'plot-sheet', 'vd.push(PNGDrawing(name+"_plot", source=sheet, sourceRows=rows))', 'plot this png')
|
visidata/loaders/postgres.py
CHANGED
@@ -1,13 +1,14 @@
|
|
1
1
|
import random
|
2
|
+
from urllib.parse import urlparse
|
2
3
|
|
3
|
-
from visidata import VisiData, vd, Sheet, options, anytype,
|
4
|
+
from visidata import VisiData, vd, Sheet, options, anytype, asyncthread, ColumnItem
|
4
5
|
|
5
6
|
__all__ = ['openurl_postgres', 'openurl_postgresql', 'openurl_rds', 'PgTable', 'PgTablesSheet']
|
6
7
|
|
7
8
|
vd.option('postgres_schema', 'public', 'The desired schema for the Postgres database')
|
8
9
|
|
9
10
|
def codeToType(type_code, colname):
|
10
|
-
|
11
|
+
psycopg2 = vd.importExternal('psycopg2', 'psycopg2-binary')
|
11
12
|
try:
|
12
13
|
tname = psycopg2._psycopg.string_types[type_code].name
|
13
14
|
if 'INTEGER' in tname:
|
@@ -21,8 +22,8 @@ def codeToType(type_code, colname):
|
|
21
22
|
|
22
23
|
@VisiData.api
|
23
24
|
def openurl_rds(vd, url, filetype=None):
|
24
|
-
|
25
|
-
|
25
|
+
boto3 = vd.importExternal('boto3')
|
26
|
+
psycopg2 = vd.importExternal('psycopg2', 'psycopg2-binary')
|
26
27
|
|
27
28
|
rds = boto3.client('rds')
|
28
29
|
url = urlparse(url.given)
|
@@ -42,7 +43,7 @@ def openurl_rds(vd, url, filetype=None):
|
|
42
43
|
|
43
44
|
@VisiData.api
|
44
45
|
def openurl_postgres(vd, url, filetype=None):
|
45
|
-
|
46
|
+
psycopg2 = vd.importExternal('psycopg2', 'psycopg2-binary')
|
46
47
|
|
47
48
|
url = urlparse(url.given)
|
48
49
|
dbname = url.path[1:]
|
@@ -87,7 +88,7 @@ def postgresGetColumns(vd, cur):
|
|
87
88
|
class PgTablesSheet(Sheet):
|
88
89
|
rowtype = 'tables'
|
89
90
|
|
90
|
-
def
|
91
|
+
def loader(self):
|
91
92
|
schema = options.postgres_schema
|
92
93
|
qstr = f'''
|
93
94
|
SELECT relname table_name, column_count.ncols, reltuples::bigint est_nrows
|
@@ -122,9 +123,9 @@ class PgTable(Sheet):
|
|
122
123
|
@asyncthread
|
123
124
|
def reload(self):
|
124
125
|
if self.options.postgres_schema:
|
125
|
-
source = f"{self.options.postgres_schema}.{self.source}"
|
126
|
+
source = f'"{self.options.postgres_schema}"."{self.source}"'
|
126
127
|
else:
|
127
|
-
source = self.source
|
128
|
+
source = f'"{self.source}"'
|
128
129
|
with self.sql.cur(f"SELECT * FROM {source}") as cur:
|
129
130
|
self.rows = []
|
130
131
|
r = cur.fetchone()
|
visidata/loaders/rec.py
CHANGED
@@ -5,7 +5,7 @@ from visidata import VisiData, vd, Progress, TableSheet, IndexSheet, ItemColumn,
|
|
5
5
|
|
6
6
|
@VisiData.api
|
7
7
|
def open_rec(vd, p):
|
8
|
-
return RecIndexSheet(p.
|
8
|
+
return RecIndexSheet(p.base_stem, source=p)
|
9
9
|
|
10
10
|
def decode_multiline(line, fp):
|
11
11
|
'Parse *line* and lookahead into *fp* as iterator for continuing lines. Return (multiline, next_line) where *multiline* can contain newlines and *next_line is the line after the combined *multiline*. Handle "\\" at end and "+" at beginning of lines. *next_line* will be None iff iterator is exhausted.'
|
@@ -49,6 +49,7 @@ class RecIndexSheet(IndexSheet):
|
|
49
49
|
|
50
50
|
fp = iter(self.source)
|
51
51
|
while next_line is not None:
|
52
|
+
try:
|
52
53
|
line, next_line = decode_multiline(next_line, fp)
|
53
54
|
line = line.lstrip()
|
54
55
|
|
@@ -62,6 +63,7 @@ class RecIndexSheet(IndexSheet):
|
|
62
63
|
|
63
64
|
if not sheet or (newRecord and line[0] == '%'):
|
64
65
|
sheet = RecSheet('', columns=[], rows=[], source=self, comments=comments)
|
66
|
+
sheet.columns = []
|
65
67
|
comments = []
|
66
68
|
yield sheet
|
67
69
|
newRecord = False
|
@@ -81,7 +83,7 @@ class RecIndexSheet(IndexSheet):
|
|
81
83
|
if colname not in sheet.colnames:
|
82
84
|
sheet.addColumn(ItemColumn(colname, keycol=i+1))
|
83
85
|
elif desc in ['sort']:
|
84
|
-
sheet.
|
86
|
+
sheet._ordering = [(colname, False) for colname in rest.split()]
|
85
87
|
elif desc in ['type', 'typedef']:
|
86
88
|
pass
|
87
89
|
elif desc in ['auto']: # autoincrement columns should be present already
|
@@ -112,6 +114,8 @@ class RecIndexSheet(IndexSheet):
|
|
112
114
|
row[name].append(rest)
|
113
115
|
else:
|
114
116
|
row[name] = rest
|
117
|
+
except Exception as e:
|
118
|
+
vd.exceptionCaught(e)
|
115
119
|
|
116
120
|
for sheet in Progress(self.rows):
|
117
121
|
sheet.sort()
|
@@ -119,7 +123,7 @@ class RecIndexSheet(IndexSheet):
|
|
119
123
|
|
120
124
|
@VisiData.api
|
121
125
|
def save_rec(vd, p, *vsheets):
|
122
|
-
with p.
|
126
|
+
with p.open(mode='w') as fp:
|
123
127
|
for vs in vsheets:
|
124
128
|
comments = getattr(vs, 'comments', [])
|
125
129
|
if comments:
|
visidata/loaders/s3.py
ADDED
@@ -0,0 +1,342 @@
|
|
1
|
+
"""Allow VisiData to work directly with Amazon S3 paths.
|
2
|
+
|
3
|
+
Functionality is more limited than local paths, but supports:
|
4
|
+
|
5
|
+
* Navigating among directories (S3 prefixes)
|
6
|
+
* Opening supported filetypes, including compressed files
|
7
|
+
* Versioned buckets
|
8
|
+
"""
|
9
|
+
|
10
|
+
import textwrap
|
11
|
+
from visidata import (
|
12
|
+
ENTER,
|
13
|
+
Column,
|
14
|
+
ItemColumn,
|
15
|
+
Path,
|
16
|
+
Sheet,
|
17
|
+
VisiData,
|
18
|
+
asyncthread,
|
19
|
+
date,
|
20
|
+
vd,
|
21
|
+
)
|
22
|
+
|
23
|
+
vd.option(
|
24
|
+
"s3_endpoint",
|
25
|
+
"",
|
26
|
+
"alternate S3 endpoint, used for local testing or alternative S3-compatible services",
|
27
|
+
replay=True,
|
28
|
+
)
|
29
|
+
vd.option("s3_glob", True, "enable glob-matching for S3 paths", replay=True)
|
30
|
+
vd.option(
|
31
|
+
"s3_version_aware",
|
32
|
+
False,
|
33
|
+
"show all object versions in a versioned bucket",
|
34
|
+
replay=True,
|
35
|
+
)
|
36
|
+
|
37
|
+
|
38
|
+
class S3Path(Path):
|
39
|
+
"""A Path-like object representing an S3 file (object) or directory (prefix)."""
|
40
|
+
|
41
|
+
_fs = None
|
42
|
+
|
43
|
+
def __init__(self, path, version_aware=None, version_id=None):
|
44
|
+
super().__init__(path)
|
45
|
+
self.given = path
|
46
|
+
self.version_aware = version_aware or vd.options.s3_version_aware
|
47
|
+
self.version_id = self.version_aware and version_id or None
|
48
|
+
|
49
|
+
@property
|
50
|
+
def fs(self):
|
51
|
+
if self._fs is None:
|
52
|
+
s3fs_core = vd.importExternal("s3fs.core", "s3fs")
|
53
|
+
self._fs = s3fs_core.S3FileSystem(
|
54
|
+
client_kwargs={"endpoint_url": vd.options.s3_endpoint or None},
|
55
|
+
version_aware=self.version_aware,
|
56
|
+
)
|
57
|
+
|
58
|
+
return self._fs
|
59
|
+
|
60
|
+
@fs.setter
|
61
|
+
def fs(self, val):
|
62
|
+
self._fs = val
|
63
|
+
|
64
|
+
def open(self, mode='r', **kwargs):
|
65
|
+
"""Open the current S3 path, decompressing along the way if needed."""
|
66
|
+
|
67
|
+
fp = self.fs.open(self.given, mode="rb" if self.compression else mode, version_id=self.version_id)
|
68
|
+
|
69
|
+
# Workaround for https://github.com/ajkerrigan/visidata-plugins/issues/12
|
70
|
+
if hasattr(fp, "cache") and fp.cache.size != fp.size:
|
71
|
+
vd.debug(
|
72
|
+
f"updating cache size from {fp.cache.size} to {fp.size} to match object size"
|
73
|
+
)
|
74
|
+
fp.cache.size = fp.size
|
75
|
+
|
76
|
+
if self.compression == "gz":
|
77
|
+
import gzip
|
78
|
+
|
79
|
+
return gzip.open(fp, mode, **kwargs)
|
80
|
+
|
81
|
+
if self.compression == "bz2":
|
82
|
+
import bz2
|
83
|
+
|
84
|
+
return bz2.open(fp, mode, **kwargs)
|
85
|
+
|
86
|
+
if self.compression == "xz":
|
87
|
+
import lzma
|
88
|
+
|
89
|
+
return lzma.open(fp, mode, **kwargs)
|
90
|
+
|
91
|
+
return fp
|
92
|
+
|
93
|
+
|
94
|
+
class S3DirSheet(Sheet):
|
95
|
+
"""Display a listing of files and directories (objects and prefixes) in an S3 path.
|
96
|
+
|
97
|
+
Allow single or multiple entries to be opened in separate sheets.
|
98
|
+
"""
|
99
|
+
|
100
|
+
columns = [
|
101
|
+
Column("name", getter=lambda col, row: col.sheet.object_display_name(row)),
|
102
|
+
ItemColumn("type"),
|
103
|
+
ItemColumn("size", type=int),
|
104
|
+
ItemColumn("modtime", "LastModified", type=date),
|
105
|
+
ItemColumn("latest", "IsLatest", type=bool),
|
106
|
+
ItemColumn("version_id", "VersionId", type=str, width=0),
|
107
|
+
]
|
108
|
+
|
109
|
+
def __init__(self, name, source, version_aware=None):
|
110
|
+
import re
|
111
|
+
|
112
|
+
super().__init__(name=name, source=source)
|
113
|
+
self.rowtype = "files"
|
114
|
+
self.nKeys = 1
|
115
|
+
self.use_glob_matching = vd.options.s3_glob and re.search(
|
116
|
+
r"[*?\[\]]", self.source.given
|
117
|
+
)
|
118
|
+
self.version_aware = (
|
119
|
+
vd.options.s3_version_aware if version_aware is None else version_aware
|
120
|
+
)
|
121
|
+
self.fs = source.fs
|
122
|
+
|
123
|
+
def object_display_name(self, row):
|
124
|
+
"""Provide a friendly display name for an S3 path.
|
125
|
+
|
126
|
+
When listing the contents of a single S3 prefix, the name can chop off
|
127
|
+
prefix bits to imitate a directory browser. When glob matching,
|
128
|
+
include the full key name for each entry.
|
129
|
+
"""
|
130
|
+
return (
|
131
|
+
row.get("name")
|
132
|
+
if self.use_glob_matching
|
133
|
+
else row.get("name").rpartition("/")[2]
|
134
|
+
)
|
135
|
+
|
136
|
+
def iterload(self):
|
137
|
+
"""Delegate to the underlying filesystem to fetch S3 entries."""
|
138
|
+
list_func = self.fs.glob if self.use_glob_matching else self.fs.ls
|
139
|
+
|
140
|
+
if not (
|
141
|
+
self.use_glob_matching
|
142
|
+
or self.fs.exists(self.source.given)
|
143
|
+
or self.fs.isdir(self.source.given)
|
144
|
+
):
|
145
|
+
vd.fail(f"unable to open S3 path: {self.source.given}")
|
146
|
+
|
147
|
+
if self.version_aware:
|
148
|
+
self.column("latest").hide(False)
|
149
|
+
else:
|
150
|
+
self.column("latest").hide(True)
|
151
|
+
|
152
|
+
for key in list_func(str(self.source)):
|
153
|
+
if self.version_aware and self.fs.isfile(key):
|
154
|
+
yield from (
|
155
|
+
{**obj_version, "name": key, "type": "file"}
|
156
|
+
for obj_version in self.fs.object_version_info(key)
|
157
|
+
if key.partition("/")[2] == obj_version["Key"]
|
158
|
+
)
|
159
|
+
else:
|
160
|
+
yield self.fs.stat(key)
|
161
|
+
|
162
|
+
@asyncthread
|
163
|
+
def download(self, rows, savepath):
|
164
|
+
"""Download files and directories to a local path.
|
165
|
+
|
166
|
+
Recurse through through subdirectories.
|
167
|
+
"""
|
168
|
+
remote_files = [row["name"] for row in rows]
|
169
|
+
self.fs.download(remote_files, str(savepath), recursive=True)
|
170
|
+
|
171
|
+
def open_rows(self, rows):
|
172
|
+
"""Open new sheets for the target rows."""
|
173
|
+
return (
|
174
|
+
vd.openSource(
|
175
|
+
S3Path(
|
176
|
+
"s3://{}".format(row["name"]),
|
177
|
+
version_aware=self.version_aware,
|
178
|
+
version_id=row.get("VersionId"),
|
179
|
+
)
|
180
|
+
)
|
181
|
+
for row in rows
|
182
|
+
)
|
183
|
+
|
184
|
+
def join_rows(self, rows):
|
185
|
+
"""Open new sheets for the target rows and concatenate their contents."""
|
186
|
+
sheets = list(self.open_rows(rows))
|
187
|
+
for sheet in vd.Progress(sheets):
|
188
|
+
sheet.reload()
|
189
|
+
|
190
|
+
# Wait for all sheets to fully load before joining them.
|
191
|
+
# 'append' is the only join type that makes sense here,
|
192
|
+
# since we're joining freshly opened sheets with no key
|
193
|
+
# columns.
|
194
|
+
vd.sync()
|
195
|
+
return sheets[0].openJoin(sheets[1:], jointype="append")
|
196
|
+
|
197
|
+
def refresh_path(self, path=None):
|
198
|
+
"""Clear the s3fs cache for the given path and reload.
|
199
|
+
|
200
|
+
By default, clear the entire cache.
|
201
|
+
"""
|
202
|
+
self.fs.invalidate_cache(path)
|
203
|
+
self.reload()
|
204
|
+
|
205
|
+
def toggle_versioning(self):
|
206
|
+
"""Enable or disable support for S3 versioning."""
|
207
|
+
self.version_aware = not self.version_aware
|
208
|
+
self.fs.version_aware = self.version_aware
|
209
|
+
vd.status(f's3 versioning {"enabled" if self.version_aware else "disabled"}')
|
210
|
+
if self.currentThreads:
|
211
|
+
vd.debug("cancelling threads before reloading")
|
212
|
+
vd.cancelThread(*self.currentThreads)
|
213
|
+
self.reload()
|
214
|
+
|
215
|
+
|
216
|
+
@VisiData.api
|
217
|
+
def openurl_s3(vd, p, filetype):
|
218
|
+
"""Open a sheet for an S3 path.
|
219
|
+
|
220
|
+
S3 directories (prefixes) require special handling, but files (objects)
|
221
|
+
can use standard VisiData "open" functions.
|
222
|
+
"""
|
223
|
+
|
224
|
+
# Non-obvious behavior here: For the default case, we don't want to send
|
225
|
+
# a custom endpoint to s3fs. However, using None as a default trips up
|
226
|
+
# VisiData's type detection for the endpoint option. So we use an empty
|
227
|
+
# string as the default instead, and convert back to None here.
|
228
|
+
endpoint = vd.options.s3_endpoint or None
|
229
|
+
|
230
|
+
p = S3Path(
|
231
|
+
str(p.given),
|
232
|
+
version_aware=getattr(p, "version_aware", vd.options.s3_version_aware),
|
233
|
+
version_id=getattr(p, "version_id", None),
|
234
|
+
)
|
235
|
+
|
236
|
+
p.fs.version_aware = p.version_aware
|
237
|
+
if p.fs.client_kwargs.get("endpoint_url", "") != endpoint:
|
238
|
+
p.fs.client_kwargs = {"endpoint_url": endpoint}
|
239
|
+
p.fs.connect()
|
240
|
+
|
241
|
+
if not p.fs.isfile(str(p.given)):
|
242
|
+
return S3DirSheet(p.base_stem, source=p, version_aware=p.version_aware)
|
243
|
+
|
244
|
+
if not filetype:
|
245
|
+
filetype = p.ext or "txt"
|
246
|
+
|
247
|
+
openfunc = getattr(vd, f"open_{filetype.lower()}")
|
248
|
+
if not openfunc:
|
249
|
+
vd.warning(f"no loader found for {filetype} files, falling back to txt")
|
250
|
+
filetype = "txt"
|
251
|
+
openfunc = vd.open_txt
|
252
|
+
|
253
|
+
assert callable(openfunc), f"no function/method available to open {p.given}"
|
254
|
+
vs = openfunc(p)
|
255
|
+
vd.status(
|
256
|
+
f'opening {p.given} as {filetype} (version id: {p.version_id or "latest"})'
|
257
|
+
)
|
258
|
+
return vs
|
259
|
+
|
260
|
+
|
261
|
+
S3DirSheet.addCommand(
|
262
|
+
ENTER,
|
263
|
+
"s3-open-row",
|
264
|
+
"vd.push(next(sheet.open_rows([cursorRow])))",
|
265
|
+
"open the current S3 entry",
|
266
|
+
)
|
267
|
+
S3DirSheet.addCommand(
|
268
|
+
"g" + ENTER,
|
269
|
+
"s3-open-rows",
|
270
|
+
"for vs in sheet.open_rows(selectedRows): vd.push(vs)",
|
271
|
+
"open all selected S3 entries",
|
272
|
+
)
|
273
|
+
S3DirSheet.addCommand(
|
274
|
+
"z^R",
|
275
|
+
"s3-refresh-sheet",
|
276
|
+
"sheet.refresh_path(str(sheet.source))",
|
277
|
+
"clear the s3fs cache for this path, then reload",
|
278
|
+
)
|
279
|
+
S3DirSheet.addCommand(
|
280
|
+
"gz^R",
|
281
|
+
"s3-refresh-sheet-all",
|
282
|
+
"sheet.refresh_path()",
|
283
|
+
"clear the entire s3fs cache, then reload",
|
284
|
+
)
|
285
|
+
S3DirSheet.addCommand(
|
286
|
+
"^V",
|
287
|
+
"s3-toggle-versioning",
|
288
|
+
"sheet.toggle_versioning()",
|
289
|
+
"enable/disable support for S3 versioning",
|
290
|
+
)
|
291
|
+
S3DirSheet.addCommand(
|
292
|
+
"&",
|
293
|
+
"s3-join-rows",
|
294
|
+
"vd.push(sheet.join_rows(selectedRows))",
|
295
|
+
"open and join sheets for selected S3 entries",
|
296
|
+
)
|
297
|
+
S3DirSheet.addCommand(
|
298
|
+
"gx",
|
299
|
+
"s3-download-rows",
|
300
|
+
textwrap.dedent(
|
301
|
+
"""
|
302
|
+
savepath = inputPath("download selected rows to: ", value=".")
|
303
|
+
sheet.download(selectedRows, savepath)
|
304
|
+
"""
|
305
|
+
),
|
306
|
+
"download selected files and directories",
|
307
|
+
)
|
308
|
+
|
309
|
+
S3DirSheet.addCommand(
|
310
|
+
"x",
|
311
|
+
"s3-download-row",
|
312
|
+
# Note about the use of `_path.name` here. Given a `visidata.Path`
|
313
|
+
# object `path`, `path._path` is a `pathlib.Path` object.
|
314
|
+
#
|
315
|
+
# `visidata.Path` objects do some fun parsing to pick out
|
316
|
+
# file types and extensions, handle compression transparently,
|
317
|
+
# etc. That parsing leaves the `name` attribute without a file
|
318
|
+
# extension, and makes it a little tricky to tack back on.
|
319
|
+
#
|
320
|
+
# `pathlib.Path` objects have a `name` with the extension intact.
|
321
|
+
# That makes `path._path.name` a convenient default output path.
|
322
|
+
textwrap.dedent(
|
323
|
+
"""
|
324
|
+
savepath = inputPath("download to: ", value=Path(cursorRow["name"])._path.name)
|
325
|
+
sheet.download([cursorRow], savepath)
|
326
|
+
"""
|
327
|
+
),
|
328
|
+
"download the file or directory in the cursor row",
|
329
|
+
)
|
330
|
+
|
331
|
+
vd.addMenuItems(
|
332
|
+
"""
|
333
|
+
File > Toggle versioning > s3-toggle-versioning
|
334
|
+
File > Refresh > Current path > s3-refresh-sheet
|
335
|
+
File > Refresh > All > s3-refresh-sheet-all
|
336
|
+
Row > Download > Current row > s3-download-row
|
337
|
+
Row > Download > Selected rows > s3-download-rows
|
338
|
+
Data > Join > Selected rows > s3-join-rows
|
339
|
+
"""
|
340
|
+
)
|
341
|
+
|
342
|
+
vd.addGlobals(S3DirSheet=S3DirSheet)
|
visidata/loaders/sas.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
import logging
|
2
2
|
|
3
|
-
from visidata import VisiData, Sheet, Progress, ColumnItem, anytype
|
3
|
+
from visidata import VisiData, Sheet, Progress, ColumnItem, anytype, vd
|
4
4
|
|
5
5
|
SASTypes = {
|
6
6
|
'string': str,
|
@@ -9,15 +9,15 @@ SASTypes = {
|
|
9
9
|
|
10
10
|
@VisiData.api
|
11
11
|
def open_xpt(vd, p):
|
12
|
-
return XptSheet(p.
|
12
|
+
return XptSheet(p.base_stem, source=p)
|
13
13
|
|
14
14
|
@VisiData.api
|
15
15
|
def open_sas7bdat(vd, p):
|
16
|
-
return SasSheet(p.
|
16
|
+
return SasSheet(p.base_stem, source=p)
|
17
17
|
|
18
18
|
class XptSheet(Sheet):
|
19
19
|
def iterload(self):
|
20
|
-
|
20
|
+
xport = vd.importExternal('xport')
|
21
21
|
with open(self.source, 'rb') as fp:
|
22
22
|
self.rdr = xport.Reader(fp)
|
23
23
|
|
@@ -30,7 +30,7 @@ class XptSheet(Sheet):
|
|
30
30
|
|
31
31
|
class SasSheet(Sheet):
|
32
32
|
def iterload(self):
|
33
|
-
|
33
|
+
sas7bdat = vd.importExternal('sas7bdat')
|
34
34
|
self.dat = sas7bdat.SAS7BDAT(str(self.source), skip_header=True, log_level=logging.CRITICAL)
|
35
35
|
self.columns = []
|
36
36
|
for col in self.dat.columns:
|