visidata 2.11.dev0__py3-none-any.whl → 3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- visidata/__init__.py +72 -91
- visidata/_input.py +263 -44
- visidata/_open.py +84 -29
- visidata/_types.py +22 -4
- visidata/_urlcache.py +17 -4
- visidata/aggregators.py +65 -25
- visidata/apps/__init__.py +0 -0
- visidata/apps/vdsql/__about__.py +8 -0
- visidata/apps/vdsql/__init__.py +5 -0
- visidata/apps/vdsql/__main__.py +27 -0
- visidata/apps/vdsql/_ibis.py +748 -0
- visidata/apps/vdsql/bigquery.py +61 -0
- visidata/apps/vdsql/clickhouse.py +53 -0
- visidata/apps/vdsql/setup.py +40 -0
- visidata/apps/vdsql/snowflake.py +67 -0
- visidata/apps/vgit/__init__.py +13 -0
- visidata/apps/vgit/__main__.py +3 -0
- visidata/apps/vgit/abort.py +23 -0
- visidata/apps/vgit/blame.py +76 -0
- visidata/apps/vgit/branch.py +153 -0
- visidata/apps/vgit/config.py +95 -0
- visidata/apps/vgit/diff.py +169 -0
- visidata/apps/vgit/gitsheet.py +161 -0
- visidata/apps/vgit/grep.py +37 -0
- visidata/apps/vgit/log.py +81 -0
- visidata/apps/vgit/main.py +55 -0
- visidata/apps/vgit/remote.py +57 -0
- visidata/apps/vgit/repos.py +71 -0
- visidata/apps/vgit/setup.py +37 -0
- visidata/apps/vgit/stash.py +69 -0
- visidata/apps/vgit/status.py +204 -0
- visidata/apps/vgit/statusbar.py +34 -0
- visidata/basesheet.py +59 -50
- visidata/canvas.py +251 -99
- visidata/choose.py +15 -11
- visidata/clean_names.py +29 -0
- visidata/clipboard.py +84 -18
- visidata/cliptext.py +220 -46
- visidata/cmdlog.py +89 -114
- visidata/color.py +142 -56
- visidata/column.py +134 -131
- visidata/ddw/input.ddw +74 -79
- visidata/ddw/regex.ddw +57 -0
- visidata/ddwplay.py +33 -14
- visidata/deprecated.py +77 -3
- visidata/desktop/visidata.desktop +7 -0
- visidata/editor.py +12 -6
- visidata/errors.py +5 -1
- visidata/experimental/__init__.py +0 -0
- visidata/experimental/diff_sheet.py +29 -0
- visidata/experimental/digit_autoedit.py +6 -0
- visidata/experimental/gdrive.py +89 -0
- visidata/experimental/google.py +37 -0
- visidata/experimental/gsheets.py +79 -0
- visidata/experimental/live_search.py +37 -0
- visidata/experimental/liveupdate.py +45 -0
- visidata/experimental/mark.py +133 -0
- visidata/experimental/noahs_tapestry/__init__.py +1 -0
- visidata/experimental/noahs_tapestry/tapestry.py +147 -0
- visidata/experimental/rownum.py +73 -0
- visidata/experimental/slide_cells.py +26 -0
- visidata/expr.py +8 -4
- visidata/extensible.py +32 -6
- visidata/features/__init__.py +0 -0
- visidata/features/addcol_audiometadata.py +42 -0
- visidata/features/addcol_histogram.py +34 -0
- visidata/features/canvas_save_svg.py +69 -0
- visidata/features/change_precision.py +46 -0
- visidata/features/cmdpalette.py +163 -0
- visidata/features/colorbrewer.py +363 -0
- visidata/{colorsheet.py → features/colorsheet.py} +17 -16
- visidata/features/command_server.py +105 -0
- visidata/features/currency_to_usd.py +70 -0
- visidata/{customdate.py → features/customdate.py} +2 -0
- visidata/features/dedupe.py +132 -0
- visidata/{describe.py → features/describe.py} +17 -15
- visidata/features/errors_guide.py +26 -0
- visidata/features/expand_cols.py +202 -0
- visidata/{fill.py → features/fill.py} +4 -2
- visidata/{freeze.py → features/freeze.py} +11 -6
- visidata/features/graph_seaborn.py +79 -0
- visidata/features/helloworld.py +10 -0
- visidata/features/hint_types.py +17 -0
- visidata/{incr.py → features/incr.py} +5 -0
- visidata/{join.py → features/join.py} +107 -53
- visidata/features/known_cols.py +21 -0
- visidata/features/layout.py +62 -0
- visidata/{melt.py → features/melt.py} +33 -21
- visidata/features/normcol.py +118 -0
- visidata/features/open_config.py +7 -0
- visidata/features/open_syspaste.py +18 -0
- visidata/features/ping.py +157 -0
- visidata/features/procmgr.py +208 -0
- visidata/features/random_sample.py +6 -0
- visidata/{regex.py → features/regex.py} +47 -31
- visidata/features/reload_every.py +55 -0
- visidata/features/rename_col_cascade.py +30 -0
- visidata/features/scroll_context.py +60 -0
- visidata/features/select_equal_selected.py +11 -0
- visidata/features/setcol_fake.py +65 -0
- visidata/{slide.py → features/slide.py} +75 -21
- visidata/features/sparkline.py +48 -0
- visidata/features/status_source.py +20 -0
- visidata/{sysedit.py → features/sysedit.py} +2 -1
- visidata/features/sysopen_mailcap.py +46 -0
- visidata/features/term_extras.py +13 -0
- visidata/{transpose.py → features/transpose.py} +5 -4
- visidata/features/type_ipaddr.py +73 -0
- visidata/features/type_url.py +11 -0
- visidata/{unfurl.py → features/unfurl.py} +9 -9
- visidata/{window.py → features/window.py} +2 -2
- visidata/form.py +50 -21
- visidata/freqtbl.py +81 -33
- visidata/fuzzymatch.py +414 -0
- visidata/graph.py +105 -33
- visidata/guide.py +180 -0
- visidata/help.py +75 -44
- visidata/hint.py +39 -0
- visidata/indexsheet.py +109 -0
- visidata/input_history.py +55 -0
- visidata/interface.py +58 -0
- visidata/keys.py +17 -16
- visidata/loaders/__init__.py +9 -0
- visidata/loaders/_pandas.py +61 -21
- visidata/loaders/api_airtable.py +70 -0
- visidata/loaders/api_bitio.py +102 -0
- visidata/loaders/api_matrix.py +148 -0
- visidata/loaders/api_reddit.py +306 -0
- visidata/loaders/api_zulip.py +249 -0
- visidata/loaders/archive.py +41 -7
- visidata/loaders/arrow.py +7 -7
- visidata/loaders/conll.py +49 -0
- visidata/loaders/csv.py +25 -7
- visidata/loaders/eml.py +3 -4
- visidata/loaders/f5log.py +1204 -0
- visidata/loaders/fec.py +325 -0
- visidata/loaders/fixed_width.py +3 -5
- visidata/loaders/frictionless.py +3 -3
- visidata/loaders/geojson.py +8 -5
- visidata/loaders/google.py +48 -0
- visidata/loaders/graphviz.py +4 -4
- visidata/loaders/hdf5.py +4 -4
- visidata/loaders/html.py +48 -10
- visidata/loaders/http.py +84 -30
- visidata/loaders/imap.py +20 -10
- visidata/loaders/jrnl.py +52 -0
- visidata/loaders/json.py +83 -29
- visidata/loaders/jsonla.py +74 -0
- visidata/loaders/lsv.py +15 -11
- visidata/loaders/mailbox.py +40 -0
- visidata/loaders/markdown.py +1 -3
- visidata/loaders/mbtiles.py +4 -5
- visidata/loaders/mysql.py +11 -13
- visidata/loaders/npy.py +7 -7
- visidata/loaders/odf.py +4 -1
- visidata/loaders/orgmode.py +428 -0
- visidata/loaders/pandas_freqtbl.py +14 -20
- visidata/loaders/parquet.py +62 -6
- visidata/loaders/pcap.py +3 -3
- visidata/loaders/pdf.py +4 -3
- visidata/loaders/png.py +19 -13
- visidata/loaders/postgres.py +9 -8
- visidata/loaders/rec.py +7 -3
- visidata/loaders/s3.py +342 -0
- visidata/loaders/sas.py +5 -5
- visidata/loaders/scrape.py +186 -0
- visidata/loaders/shp.py +6 -5
- visidata/loaders/spss.py +5 -6
- visidata/loaders/sqlite.py +68 -28
- visidata/loaders/texttables.py +1 -1
- visidata/loaders/toml.py +60 -0
- visidata/loaders/tsv.py +61 -19
- visidata/loaders/ttf.py +19 -7
- visidata/loaders/unzip_http.py +6 -5
- visidata/loaders/usv.py +1 -1
- visidata/loaders/vcf.py +16 -16
- visidata/loaders/vds.py +10 -7
- visidata/loaders/vdx.py +30 -5
- visidata/loaders/xlsb.py +8 -1
- visidata/loaders/xlsx.py +145 -25
- visidata/loaders/xml.py +6 -3
- visidata/loaders/xword.py +4 -4
- visidata/loaders/yaml.py +15 -5
- visidata/macos.py +1 -1
- visidata/macros.py +130 -41
- visidata/main.py +119 -94
- visidata/mainloop.py +101 -154
- visidata/man/parse_options.py +2 -2
- visidata/man/vd.1 +302 -147
- visidata/man/vd.txt +291 -151
- visidata/memory.py +3 -3
- visidata/menu.py +104 -423
- visidata/metasheets.py +59 -141
- visidata/modify.py +79 -23
- visidata/motd.py +3 -3
- visidata/mouse.py +137 -0
- visidata/movement.py +43 -35
- visidata/optionssheet.py +99 -0
- visidata/path.py +131 -43
- visidata/pivot.py +74 -47
- visidata/plugins.py +65 -192
- visidata/pyobj.py +50 -201
- visidata/rename_col.py +20 -0
- visidata/save.py +42 -20
- visidata/search.py +54 -10
- visidata/selection.py +84 -5
- visidata/settings.py +162 -24
- visidata/sheets.py +229 -257
- visidata/shell.py +51 -21
- visidata/sidebar.py +162 -0
- visidata/sort.py +11 -4
- visidata/statusbar.py +113 -104
- visidata/stored_list.py +43 -0
- visidata/stored_prop.py +38 -0
- visidata/tests/conftest.py +3 -3
- visidata/tests/test_cliptext.py +39 -0
- visidata/tests/test_commands.py +62 -7
- visidata/tests/test_edittext.py +2 -2
- visidata/tests/test_features.py +17 -0
- visidata/tests/test_menu.py +14 -0
- visidata/tests/test_path.py +13 -4
- visidata/text_source.py +53 -0
- visidata/textsheet.py +10 -3
- visidata/theme.py +44 -0
- visidata/themes/__init__.py +0 -0
- visidata/themes/ascii8.py +84 -0
- visidata/themes/asciimono.py +84 -0
- visidata/themes/light.py +17 -0
- visidata/threads.py +87 -39
- visidata/tuiwin.py +22 -0
- visidata/type_currency.py +22 -3
- visidata/type_date.py +31 -9
- visidata/type_floatsi.py +5 -1
- visidata/undo.py +18 -6
- visidata/utils.py +106 -23
- visidata/vdobj.py +28 -17
- visidata/windows.py +10 -0
- visidata/wrappers.py +9 -3
- visidata-3.0.data/data/share/applications/visidata.desktop +7 -0
- {visidata-2.11.dev0.data → visidata-3.0.data}/data/share/man/man1/vd.1 +302 -147
- {visidata-2.11.dev0.data → visidata-3.0.data}/data/share/man/man1/visidata.1 +302 -147
- visidata-3.0.data/scripts/vd2to3.vdx +9 -0
- {visidata-2.11.dev0.dist-info → visidata-3.0.dist-info}/METADATA +13 -11
- visidata-3.0.dist-info/RECORD +257 -0
- {visidata-2.11.dev0.dist-info → visidata-3.0.dist-info}/WHEEL +1 -1
- {visidata-2.11.dev0.dist-info → visidata-3.0.dist-info}/entry_points.txt +0 -1
- visidata/layout.py +0 -44
- visidata/misc.py +0 -5
- visidata-2.11.dev0.dist-info/RECORD +0 -142
- /visidata/{repeat.py → features/repeat.py} +0 -0
- {visidata-2.11.dev0.data → visidata-3.0.data}/scripts/vd +0 -0
- {visidata-2.11.dev0.dist-info → visidata-3.0.dist-info}/LICENSE.gpl3 +0 -0
- {visidata-2.11.dev0.dist-info → visidata-3.0.dist-info}/top_level.txt +0 -0
@@ -3,13 +3,16 @@ import itertools
|
|
3
3
|
import functools
|
4
4
|
from copy import copy
|
5
5
|
|
6
|
-
from visidata import
|
6
|
+
from visidata import vd, VisiData, asyncthread, Sheet, Progress, IndexSheet, Column, CellColorizer, ColumnItem, SubColumnItem, TypedWrapper, ColumnsSheet, AttrDict
|
7
|
+
|
8
|
+
vd.help_join = '# Join Help\nHELPTODO'
|
7
9
|
|
8
10
|
@VisiData.api
|
9
11
|
def ensureLoaded(vd, sheets):
|
10
12
|
threads = [vs.ensureLoaded() for vs in sheets]
|
11
13
|
threads = [t for t in threads if t]
|
12
|
-
|
14
|
+
if threads:
|
15
|
+
vd.status('loading %d source sheets' % len(threads))
|
13
16
|
return threads
|
14
17
|
|
15
18
|
|
@@ -28,6 +31,20 @@ def _appendRowsAfterLoading(joinsheet, origsheets):
|
|
28
31
|
joinsheet.addColumn(newcol)
|
29
32
|
|
30
33
|
|
34
|
+
@VisiData.api
|
35
|
+
def join_sheets_cols(vd, cols, jointype:str=''):
|
36
|
+
'match joinkeys by cols in order per sheet.'
|
37
|
+
sheetkeys = collections.defaultdict(list) # [sheet] -> list of keycols on that sheet
|
38
|
+
for c in cols:
|
39
|
+
sheetkeys[c.sheet].append(c)
|
40
|
+
|
41
|
+
sheets = list(sheetkeys.keys())
|
42
|
+
return JoinSheet('+'.join(vs.name for vs in sheets),
|
43
|
+
sources=sheets,
|
44
|
+
sheetKeyCols=sheetkeys,
|
45
|
+
jointype=jointype)
|
46
|
+
|
47
|
+
|
31
48
|
@Sheet.api
|
32
49
|
def openJoin(sheet, others, jointype=''):
|
33
50
|
sheets = [sheet] + others
|
@@ -53,19 +70,24 @@ def openJoin(sheet, others, jointype=''):
|
|
53
70
|
name = '&'.join(vs.name for vs in sheets)
|
54
71
|
return ConcatSheet(name, source=sheets)
|
55
72
|
|
56
|
-
for s in sheets
|
57
|
-
|
73
|
+
nkeys = set(len(s.keyCols) for s in sheets)
|
74
|
+
if 0 in nkeys or len(nkeys) != 1:
|
75
|
+
vd.fail(f'all sheets must have the same number of key columns')
|
58
76
|
|
59
77
|
if jointype == 'extend':
|
60
78
|
vs = copy(sheets[0])
|
61
79
|
vs.name = '+'.join(vs.name for vs in sheets)
|
80
|
+
vs.sheetKeyCols = {vs:vs.keyCols for vs in sheets}
|
62
81
|
vs.reload = functools.partial(ExtendedSheet_reload, vs, sheets)
|
63
82
|
return vs
|
64
83
|
else:
|
65
|
-
return JoinSheet('+'.join(vs.name for vs in sheets),
|
84
|
+
return JoinSheet('+'.join(vs.name for vs in sheets),
|
85
|
+
sources=sheets,
|
86
|
+
jointype=jointype,
|
87
|
+
sheetKeyCols={s:s.keyCols for s in sheets})
|
66
88
|
|
67
89
|
|
68
|
-
vd.jointypes = [
|
90
|
+
vd.jointypes = [AttrDict(key=k, desc=v) for k, v in {
|
69
91
|
'inner': 'only rows with matching keys on all sheets',
|
70
92
|
'outer': 'only rows with matching keys on first selected sheet',
|
71
93
|
'full': 'all rows from all sheets (union)',
|
@@ -73,27 +95,27 @@ vd.jointypes = [{'key': k, 'desc': v} for k, v in {
|
|
73
95
|
'append': 'all rows from all sheets; columns from all sheets',
|
74
96
|
'concat': 'all rows from all sheets; columns and type from first sheet',
|
75
97
|
'extend': 'only rows from first sheet; type from first sheet; columns from all sheets',
|
76
|
-
'merge': 'merge differences from other sheets into first sheet',
|
98
|
+
'merge': 'merge differences from other sheets into first sheet (including new rows)',
|
77
99
|
}.items()]
|
78
100
|
|
79
|
-
def joinkey(
|
80
|
-
return tuple(c.getDisplayValue(row) for c in
|
101
|
+
def joinkey(sheetKeyCols, row):
|
102
|
+
return tuple(c.getDisplayValue(row) for c in sheetKeyCols)
|
81
103
|
|
82
104
|
|
83
|
-
def groupRowsByKey(sheets, rowsBySheetKey, rowsByKey):
|
105
|
+
def groupRowsByKey(sheets:dict, rowsBySheetKey, rowsByKey):
|
84
106
|
with Progress(gerund='grouping', total=sum(len(vs.rows) for vs in sheets)*2) as prog:
|
85
107
|
for vs in sheets:
|
86
108
|
# tally rows by keys for each sheet
|
87
109
|
rowsBySheetKey[vs] = collections.defaultdict(list)
|
88
110
|
for r in vs.rows:
|
89
111
|
prog.addProgress(1)
|
90
|
-
key = joinkey(vs, r)
|
112
|
+
key = joinkey(sheets[vs], r)
|
91
113
|
rowsBySheetKey[vs][key].append(r)
|
92
114
|
|
93
115
|
for vs in sheets:
|
94
116
|
for r in vs.rows:
|
95
117
|
prog.addProgress(1)
|
96
|
-
key = joinkey(vs, r)
|
118
|
+
key = joinkey(sheets[vs], r)
|
97
119
|
if key not in rowsByKey: # gather for this key has not been done yet
|
98
120
|
# multiplicative for non-unique keys
|
99
121
|
rowsByKey[key] = [
|
@@ -114,11 +136,11 @@ class JoinKeyColumn(Column):
|
|
114
136
|
vals = set()
|
115
137
|
for i, c in enumerate(self.keycols):
|
116
138
|
if row[c.sheet] is not None:
|
117
|
-
vals.add(c.
|
118
|
-
if len(vals)
|
119
|
-
|
120
|
-
|
121
|
-
|
139
|
+
vals.add(c.getTypedValue(row[c.sheet]))
|
140
|
+
if len(vals) != 1:
|
141
|
+
keycolnames = ', '.join([f'{col.sheet.name}:{col.name}' for col in self.keycols])
|
142
|
+
vd.warning(f"source key columns ({keycolnames}) have different types")
|
143
|
+
return vals.pop()
|
122
144
|
|
123
145
|
def putValue(self, row, value):
|
124
146
|
for i, c in enumerate(self.keycols):
|
@@ -133,22 +155,23 @@ class JoinKeyColumn(Column):
|
|
133
155
|
|
134
156
|
class MergeColumn(Column):
|
135
157
|
def calcValue(self, row):
|
136
|
-
for vs, c in self.cols.items():
|
158
|
+
for vs, c in reversed(list(self.cols.items())):
|
137
159
|
if c:
|
138
160
|
v = c.getTypedValue(row[vs])
|
139
161
|
if v and not isinstance(v, TypedWrapper):
|
140
162
|
return v
|
141
163
|
|
142
164
|
def putValue(self, row, value):
|
143
|
-
for vs, c in reversed(self.cols.items()):
|
165
|
+
for vs, c in reversed(list(self.cols.items())):
|
144
166
|
c.setValue(row[vs], value)
|
145
167
|
|
146
168
|
def isDiff(self, row, value):
|
147
169
|
col = list(self.cols.values())[0]
|
148
170
|
return col and value != col.getValue(row[col.sheet])
|
149
171
|
|
172
|
+
|
150
173
|
#### slicing and dicing
|
151
|
-
# rowdef:
|
174
|
+
# rowdef: {sheet1:sheet1_row, sheet2:sheet2_row, ...}
|
152
175
|
# if a sheet does not have this key, sheet#_row is None
|
153
176
|
class JoinSheet(Sheet):
|
154
177
|
'Column-wise join/merge. `jointype` constructor arg should be one of jointypes.'
|
@@ -156,8 +179,9 @@ class JoinSheet(Sheet):
|
|
156
179
|
CellColorizer(0, 'color_diff', lambda s,c,r,v: c and r and isinstance(c, MergeColumn) and c.isDiff(r, v.value))
|
157
180
|
]
|
158
181
|
|
159
|
-
|
160
|
-
|
182
|
+
sheetKeyCols = {} # [sheet] -> list of joinkeycols for that sheet
|
183
|
+
|
184
|
+
def loader(self):
|
161
185
|
sheets = self.sources
|
162
186
|
|
163
187
|
vd.ensureLoaded(sheets)
|
@@ -167,30 +191,32 @@ class JoinSheet(Sheet):
|
|
167
191
|
# first columns are the key columns from the first sheet, using its row (0)
|
168
192
|
self.columns = []
|
169
193
|
|
170
|
-
for i, cols in enumerate(itertools.zip_longest(*(
|
194
|
+
for i, cols in enumerate(itertools.zip_longest(*list(self.sheetKeyCols.values()))):
|
171
195
|
self.addColumn(JoinKeyColumn(cols[0].name, keycols=cols)) # ColumnItem(c.name, i, sheet=sheets[0], type=c.type, width=c.width)))
|
172
196
|
self.setKeys(self.columns)
|
173
197
|
|
174
198
|
allcols = collections.defaultdict(dict) # colname: { sheet: origcol, ... }
|
175
199
|
for sheetnum, vs in enumerate(sheets):
|
176
|
-
for c in vs.
|
177
|
-
|
200
|
+
for c in vs.visibleCols:
|
201
|
+
if c not in self.sheetKeyCols[vs]:
|
202
|
+
allcols[c.name][vs] = c
|
178
203
|
|
179
204
|
if self.jointype == 'merge':
|
180
205
|
for colname, cols in allcols.items():
|
181
206
|
self.addColumn(MergeColumn(colname, cols=cols))
|
182
207
|
else:
|
183
|
-
ctr = collections.Counter(c.name for vs in sheets for c in vs.
|
208
|
+
ctr = collections.Counter(c.name for vs in sheets for c in vs.visibleCols if c not in self.sheetKeyCols[vs])
|
184
209
|
for sheetnum, vs in enumerate(sheets):
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
210
|
+
# subsequent elements are the rows from each source, in order of the source sheets
|
211
|
+
for c in vs.visibleCols:
|
212
|
+
if c not in self.sheetKeyCols[vs]:
|
213
|
+
newname = c.name if ctr[c.name] == 1 else '%s_%s' % (vs.name, c.name)
|
214
|
+
self.addColumn(SubColumnItem(vs, c, name=newname))
|
189
215
|
|
190
216
|
rowsBySheetKey = {} # [sheet] -> { key:list(rows), ... }
|
191
217
|
rowsByKey = {} # [key] -> [{sheet1:row1, sheet2:row1, ... }, ...]
|
192
218
|
|
193
|
-
groupRowsByKey(
|
219
|
+
groupRowsByKey(self.sheetKeyCols, rowsBySheetKey, rowsByKey)
|
194
220
|
|
195
221
|
self.rows = []
|
196
222
|
|
@@ -221,13 +247,13 @@ class JoinSheet(Sheet):
|
|
221
247
|
## for ExtendedSheet_reload below
|
222
248
|
class ExtendedColumn(Column):
|
223
249
|
def calcValue(self, row):
|
224
|
-
key = joinkey(self.firstJoinSource, row)
|
250
|
+
key = joinkey(self.firstJoinSource.keyCols, row)
|
225
251
|
srcrow = self.rowsBySheetKey[self.srcsheet][key]
|
226
252
|
if srcrow:
|
227
253
|
return self.sourceCol.calcValue(srcrow[0])
|
228
254
|
|
229
255
|
def putValue(self, row, value):
|
230
|
-
key = joinkey(self.firstJoinSource, row)
|
256
|
+
key = joinkey(self.firstJoinSource.keyCols, row)
|
231
257
|
srcrow = self.rowsBySheetKey[self.srcsheet][key]
|
232
258
|
if len(srcrow) == 1:
|
233
259
|
self.sourceCol.putValue(srcrow[0], value)
|
@@ -248,21 +274,23 @@ def ExtendedSheet_reload(self, sheets):
|
|
248
274
|
self.addColumn(copy(c))
|
249
275
|
self.setKeys(self.columns)
|
250
276
|
|
251
|
-
for i, c in enumerate(sheets[0].
|
252
|
-
self.
|
277
|
+
for i, c in enumerate(sheets[0].visibleCols):
|
278
|
+
if c not in self.sheetKeyCols[c.sheet]:
|
279
|
+
self.addColumn(copy(c))
|
253
280
|
|
254
281
|
self.rowsBySheetKey = {} # [srcSheet][key] -> list(rowobjs from sheets[0])
|
255
282
|
rowsByKey = {} # [key] -> [{sheet1:row1, sheet2:row1, ... }, ...]
|
256
283
|
|
257
284
|
for sheetnum, vs in enumerate(sheets[1:]):
|
258
285
|
# subsequent elements are the rows from each source, in order of the source sheets
|
259
|
-
# ctr = collections.Counter(c.name for c in vs.
|
260
|
-
for c in vs.
|
261
|
-
|
262
|
-
|
263
|
-
|
286
|
+
# ctr = collections.Counter(c.name for c in vs.visibleCols if c not in sheetkeys[vs])
|
287
|
+
for c in vs.visibleCols:
|
288
|
+
if c not in self.sheetKeyCols[c.sheet]:
|
289
|
+
newname = '%s_%s' % (vs.name, c.name)
|
290
|
+
newcol = ExtendedColumn(newname, srcsheet=vs, rowsBySheetKey=self.rowsBySheetKey, firstJoinSource=sheets[0], sourceCol=c)
|
291
|
+
self.addColumn(newcol)
|
264
292
|
|
265
|
-
groupRowsByKey(
|
293
|
+
groupRowsByKey(self.sheetKeyCols, self.rowsBySheetKey, rowsByKey)
|
266
294
|
|
267
295
|
self.rows = []
|
268
296
|
|
@@ -298,14 +326,10 @@ class ConcatColumn(Column):
|
|
298
326
|
# rowdef: (srcSheet, srcRow)
|
299
327
|
class ConcatSheet(Sheet):
|
300
328
|
'combination of multiple sheets by row concatenation. source=list of sheets. '
|
301
|
-
|
302
|
-
def
|
303
|
-
self.columns = []
|
304
|
-
self.addColumn(ColumnItem('origin_sheet', 0, width=0))
|
305
|
-
|
329
|
+
columns = [ColumnItem('origin_sheet', 0, width=0)]
|
330
|
+
def iterload(self):
|
306
331
|
# only one column with each name allowed per sheet
|
307
332
|
keyedcols = collections.defaultdict(dict) # name -> { sheet -> col }
|
308
|
-
self.rows = []
|
309
333
|
|
310
334
|
with Progress(gerund='joining', sheet=self, total=sum(vs.nRows for vs in self.source)) as prog:
|
311
335
|
for sheet in self.source:
|
@@ -313,7 +337,7 @@ class ConcatSheet(Sheet):
|
|
313
337
|
vd.sync()
|
314
338
|
|
315
339
|
for r in sheet.rows:
|
316
|
-
|
340
|
+
yield (sheet, r)
|
317
341
|
prog.addProgress(1)
|
318
342
|
|
319
343
|
for idx, col in enumerate(sheet.visibleCols):
|
@@ -327,10 +351,40 @@ class ConcatSheet(Sheet):
|
|
327
351
|
keyedcols[col.name][sheet] = col
|
328
352
|
|
329
353
|
|
330
|
-
|
331
|
-
|
354
|
+
@VisiData.api
|
355
|
+
def chooseJointype(vd):
|
356
|
+
prompt = 'choose jointype: '
|
357
|
+
def _fmt_aggr_summary(match, row, trigger_key):
|
358
|
+
formatted_jointype = match.formatted.get('key', row.key) if match else row.key
|
359
|
+
r = ' '*(len(prompt)-3)
|
360
|
+
r += f'[:keystrokes]{trigger_key}[/] '
|
361
|
+
r += formatted_jointype
|
362
|
+
if row.desc:
|
363
|
+
r += ' - '
|
364
|
+
r += match.formatted.get('desc', row.desc) if match else row.desc
|
365
|
+
return r
|
366
|
+
|
367
|
+
return vd.activeSheet.inputPalette(prompt,
|
368
|
+
vd.jointypes,
|
369
|
+
value_key='key',
|
370
|
+
formatter=_fmt_aggr_summary,
|
371
|
+
help=vd.help_join,
|
372
|
+
type='jointype')
|
373
|
+
|
374
|
+
|
375
|
+
IndexSheet.addCommand('&', 'join-selected', 'left, rights = someSelectedRows[0], someSelectedRows[1:]; vd.push(left.openJoin(rights, jointype=chooseJointype()))', 'merge selected sheets with visible columns from all, keeping rows according to jointype')
|
332
376
|
IndexSheet.bindkey('g&', 'join-selected')
|
333
|
-
Sheet.addCommand('&', 'join-sheets-top2', 'vd.push(openJoin(vd.sheets[1:2], jointype=
|
334
|
-
Sheet.addCommand('g&', 'join-sheets-all', 'vd.push(openJoin(vd.sheets[1:], jointype=
|
377
|
+
Sheet.addCommand('&', 'join-sheets-top2', 'vd.push(openJoin(vd.sheets[1:2], jointype=chooseJointype()))', 'concatenate top two sheets in Sheets Stack')
|
378
|
+
Sheet.addCommand('g&', 'join-sheets-all', 'vd.push(openJoin(vd.sheets[1:], jointype=chooseJointype()))', 'concatenate all sheets in Sheets Stack')
|
379
|
+
|
380
|
+
ColumnsSheet.addCommand('&', 'join-sheets-cols', 'vd.push(join_sheets_cols(selectedRows, jointype=chooseJointype()))', '')
|
381
|
+
|
382
|
+
vd.addMenuItems('''
|
383
|
+
Data > Join > selected sheets > join-selected
|
384
|
+
Data > Join > top two sheets > join-sheets-top2
|
385
|
+
Data > Join > all sheets > join-sheets-all
|
386
|
+
''')
|
335
387
|
|
336
|
-
|
388
|
+
IndexSheet.guide += '''
|
389
|
+
- `&` to join the selected sheets together
|
390
|
+
'''
|
@@ -0,0 +1,21 @@
|
|
1
|
+
'''#1488
|
2
|
+
Usage (in .visidatarc):
|
3
|
+
|
4
|
+
|
5
|
+
DirSheet.knownCols.directory.width = 0
|
6
|
+
Sheet.knownCols.date.type = date
|
7
|
+
'''
|
8
|
+
|
9
|
+
from visidata import Sheet, DefaultAttrDict
|
10
|
+
|
11
|
+
|
12
|
+
Sheet.knownCols = DefaultAttrDict()
|
13
|
+
|
14
|
+
|
15
|
+
@Sheet.before
|
16
|
+
def afterLoad(sheet):
|
17
|
+
for colname, attrs in sheet.knownCols.items():
|
18
|
+
col = sheet.colsByName.get(colname)
|
19
|
+
if col:
|
20
|
+
for k, v in attrs.items():
|
21
|
+
setattr(col, k, v)
|
@@ -0,0 +1,62 @@
|
|
1
|
+
from visidata import VisiData, vd, Column, Sheet, Fanout
|
2
|
+
|
3
|
+
@Column.api
|
4
|
+
def setWidth(self, w):
|
5
|
+
if self.width != w:
|
6
|
+
if self.width == 0 or w == 0: # hide/unhide
|
7
|
+
vd.addUndo(setattr, self, '_width', self.width)
|
8
|
+
self._width = w
|
9
|
+
|
10
|
+
|
11
|
+
@Column.api
|
12
|
+
def toggleWidth(self, width):
|
13
|
+
'Change column width to either given `width` or default value.'
|
14
|
+
if self.width != width:
|
15
|
+
self.width = width
|
16
|
+
else:
|
17
|
+
self.width = int(self.sheet.options.default_width)
|
18
|
+
|
19
|
+
|
20
|
+
@Column.api
|
21
|
+
def toggleMultiline(self):
|
22
|
+
if self.height == 1:
|
23
|
+
self.height = self.sheet.options.default_height
|
24
|
+
else:
|
25
|
+
self.height = 1
|
26
|
+
|
27
|
+
@VisiData.api
|
28
|
+
def unhide_cols(vd, cols, rows):
|
29
|
+
'sets appropriate width if column was either hidden (0) or unseen (None)'
|
30
|
+
for c in cols:
|
31
|
+
c.setWidth(abs(c.width or 0) or c.getMaxWidth(rows))
|
32
|
+
|
33
|
+
@VisiData.api
|
34
|
+
def hide_col(vd, col):
|
35
|
+
if not col: vd.fail("no columns to hide")
|
36
|
+
col.hide()
|
37
|
+
|
38
|
+
Sheet.addCommand('_', 'resize-col-max', 'if cursorCol: cursorCol.toggleWidth(cursorCol.getMaxWidth(visibleRows))', 'toggle width of current column between full and default width')
|
39
|
+
Sheet.addCommand('z_', 'resize-col-input', 'width = int(input("set width= ", value=cursorCol.width)); cursorCol.setWidth(width)', 'adjust width of current column to N')
|
40
|
+
Sheet.addCommand('g_', 'resize-cols-max', 'for c in visibleCols: c.setWidth(c.getMaxWidth(visibleRows))', 'toggle widths of all visible columns between full and default width')
|
41
|
+
Sheet.addCommand('gz_', 'resize-cols-input', 'width = int(input("set width= ", value=cursorCol.width)); Fanout(visibleCols).setWidth(width)', 'adjust widths of all visible columns to N')
|
42
|
+
|
43
|
+
Sheet.addCommand('-', 'hide-col', 'hide_col(cursorCol)', 'Hide current column')
|
44
|
+
Sheet.addCommand('z-', 'resize-col-half', 'cursorCol.setWidth(cursorCol.width//2)', 'reduce width of current column by half')
|
45
|
+
|
46
|
+
Sheet.addCommand('gv', 'unhide-cols', 'unhide_cols(columns, visibleRows)', 'Unhide all hidden columns')
|
47
|
+
Sheet.addCommand('v', 'toggle-multiline', 'for c in visibleCols: c.toggleMultiline()', 'toggle multiline display')
|
48
|
+
Sheet.addCommand('zv', 'resize-height-input', 'Fanout(visibleCols).height=int(input("set height for all columns to: ", value=max(c.height for c in sheet.visibleCols)))', 'resize row height to N')
|
49
|
+
Sheet.addCommand('gzv', 'resize-height-max', 'h=calc_height(cursorRow, {}, maxheight=windowHeight-1); vd.status(f"set height for all columns to {h}"); Fanout(visibleCols).height=h', 'resize row height to max height needed to see this row')
|
50
|
+
|
51
|
+
vd.addMenuItems('''
|
52
|
+
Column > Hide > hide-col
|
53
|
+
Column > Unhide all > unhide-cols
|
54
|
+
Column > Resize > half width > resize-col-half
|
55
|
+
Column > Resize > current column width to max > resize-col-max
|
56
|
+
Column > Resize > current column width to N > resize-col-input
|
57
|
+
Column > Resize > all columns width to max > resize-cols-max
|
58
|
+
Column > Resize > all columns width to N > resize-cols-input
|
59
|
+
Row > Resize > height to N > resize-height-input
|
60
|
+
Row > Resize > height to max > resize-height-max
|
61
|
+
View > Toggle display > multiline > toggle-multiline
|
62
|
+
''')
|
@@ -1,34 +1,27 @@
|
|
1
1
|
import collections
|
2
2
|
import re
|
3
|
+
from copy import copy
|
3
4
|
|
4
|
-
from visidata import
|
5
|
-
|
5
|
+
from visidata import Sheet, SubColumnItem, ColumnItem, Column, Progress
|
6
|
+
from visidata import asyncthread, vd
|
6
7
|
|
7
8
|
melt_var_colname = 'Variable' # column name to use for the melted variable name
|
8
9
|
melt_value_colname = 'Value' # column name to use for the melted value
|
9
|
-
melt_null = False # whether to
|
10
|
+
melt_null = False # whether to include rows for null values during melt
|
10
11
|
|
11
12
|
|
12
13
|
# rowdef: {0:sourceRow, 1:Category1, ..., N:CategoryN, ColumnName:Column, ...}
|
13
14
|
class MeltedSheet(Sheet):
|
14
|
-
"Perform 'melt', the inverse of 'pivot', on input sheet."
|
15
|
+
"Perform 'melt', the inverse of 'pivot', on input `source` sheet. Pass `regex` to parse column names into additional columns"
|
15
16
|
|
16
17
|
rowtype = 'melted values'
|
17
18
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
isNull = self.isNullFunc()
|
22
|
-
|
23
|
-
sheet = self.source
|
24
|
-
for c in sheet.keyCols:
|
25
|
-
self.addColumn(SubColumnItem(0, c))
|
26
|
-
self.setKeys(self.columns)
|
27
|
-
|
28
|
-
colsToMelt = [copy(c) for c in sheet.nonKeyVisibleCols]
|
19
|
+
def getValueCols(self) -> dict:
|
20
|
+
'''Return dict of ('Category1', 'Category2') -> list of tuple('ColumnName', Column)'''
|
21
|
+
colsToMelt = [copy(c) for c in self.source.nonKeyVisibleCols]
|
29
22
|
|
30
23
|
# break down Category1_Category2_ColumnName as per regex
|
31
|
-
valcols = collections.OrderedDict()
|
24
|
+
valcols = collections.OrderedDict()
|
32
25
|
for c in colsToMelt:
|
33
26
|
c.aggregators = [vd.aggregators['max']]
|
34
27
|
m = re.match(self.regex, c.name)
|
@@ -44,11 +37,23 @@ class MeltedSheet(Sheet):
|
|
44
37
|
valcols[cats].append((valcolname, c))
|
45
38
|
ncats = len(varvals)
|
46
39
|
else:
|
47
|
-
vd.status('"
|
40
|
+
vd.status(f'"{c.name}" column does not match regex, skipping')
|
48
41
|
ncats = 0
|
49
42
|
|
43
|
+
return valcols
|
44
|
+
|
45
|
+
def resetCols(self):
|
46
|
+
self.columns = []
|
47
|
+
sheet = self.source
|
48
|
+
for c in sheet.keyCols:
|
49
|
+
self.addColumn(SubColumnItem(0, c))
|
50
|
+
self.setKeys(self.columns)
|
51
|
+
|
52
|
+
valcols = self.getValueCols()
|
50
53
|
othercols = set()
|
54
|
+
ncats = 0
|
51
55
|
for colnames, cols in valcols.items():
|
56
|
+
ncats = max(ncats, len(colnames))
|
52
57
|
for cname, _ in cols:
|
53
58
|
othercols.add(cname)
|
54
59
|
|
@@ -56,7 +61,7 @@ class MeltedSheet(Sheet):
|
|
56
61
|
self.addColumn(ColumnItem(melt_var_colname, 1))
|
57
62
|
else:
|
58
63
|
for i in range(ncats):
|
59
|
-
self.addColumn(ColumnItem('
|
64
|
+
self.addColumn(ColumnItem(f'{melt_var_colname}{i+1}', i+1))
|
60
65
|
|
61
66
|
for cname in othercols:
|
62
67
|
self.addColumn(Column(cname,
|
@@ -64,7 +69,10 @@ class MeltedSheet(Sheet):
|
|
64
69
|
setter=lambda col,row,val,cname=cname: row[cname].setValues([row[0]], val),
|
65
70
|
aggregators=[vd.aggregators['max']]))
|
66
71
|
|
67
|
-
|
72
|
+
def iterload(self):
|
73
|
+
isNull = self.isNullFunc()
|
74
|
+
|
75
|
+
valcols = self.getValueCols()
|
68
76
|
for r in Progress(self.source.rows, 'melting'):
|
69
77
|
for colnames, cols in valcols.items():
|
70
78
|
meltedrow = {}
|
@@ -80,7 +88,7 @@ class MeltedSheet(Sheet):
|
|
80
88
|
for i, colname in enumerate(colnames):
|
81
89
|
meltedrow[i+1] = colname
|
82
90
|
|
83
|
-
|
91
|
+
yield meltedrow
|
84
92
|
|
85
93
|
|
86
94
|
@Sheet.api
|
@@ -90,5 +98,9 @@ def openMelt(sheet, regex='(.*)'):
|
|
90
98
|
|
91
99
|
Sheet.addCommand('M', 'melt', 'vd.push(openMelt())', 'open Melted Sheet (unpivot), with key columns retained and all non-key columns reduced to Variable-Value rows')
|
92
100
|
|
101
|
+
Sheet.addCommand('gM', 'melt-regex', 'vd.push(openMelt(vd.inputRegex("regex to split colname: ", value="(.*)_(.*)", type="regex-capture")))', 'open Melted Sheet (unpivot), with key columns retained and regex capture groups determining how the non-key columns will be reduced to Variable-Value rows')
|
93
102
|
|
94
|
-
|
103
|
+
vd.addMenuItems('''
|
104
|
+
Data > Melt > nonkey columns > melt
|
105
|
+
Data > Melt > nonkey columns by regex > melt-regex
|
106
|
+
''')
|
@@ -0,0 +1,118 @@
|
|
1
|
+
"""
|
2
|
+
# Usage
|
3
|
+
|
4
|
+
This plugin normalizes column names in any given sheet, so that the names are:
|
5
|
+
|
6
|
+
- Composed only of lowercase letters, numbers, and underscores.
|
7
|
+
|
8
|
+
- Valid Python identifiers. This is mostly handled by the rule above, but also
|
9
|
+
prohibits names beginning with a digit; that is handled by prefixing those
|
10
|
+
names with an underscore.
|
11
|
+
|
12
|
+
- Unique within the sheet. Non-unique names are suffixed with "__" and an
|
13
|
+
integer.
|
14
|
+
|
15
|
+
Unnamed columns are left as such.
|
16
|
+
|
17
|
+
For instance, a sheet with the following columns names:
|
18
|
+
|
19
|
+
- "Genus, Species"
|
20
|
+
- "Height"
|
21
|
+
- "5-score"
|
22
|
+
- "Height"
|
23
|
+
- ""
|
24
|
+
- ""
|
25
|
+
|
26
|
+
... would be converted to have the following column names:
|
27
|
+
|
28
|
+
- "genus_species"
|
29
|
+
- "height__0"
|
30
|
+
- "_5_score"
|
31
|
+
- "height__1"
|
32
|
+
- ""
|
33
|
+
- ""
|
34
|
+
|
35
|
+
## Commands
|
36
|
+
|
37
|
+
- `normalize-col-names` normalizes the names of all *non-hidden* columns in the
|
38
|
+
active sheet, per the approach described above.
|
39
|
+
|
40
|
+
"""
|
41
|
+
|
42
|
+
__author__ = "Jeremy Singer-Vine <jsvine@gmail.com>"
|
43
|
+
|
44
|
+
from visidata import vd, Sheet, asyncthread, Progress
|
45
|
+
from collections import Counter
|
46
|
+
import re
|
47
|
+
import string
|
48
|
+
|
49
|
+
nonalphanum_pat = re.compile(r"[^a-z0-9]+")
|
50
|
+
|
51
|
+
|
52
|
+
def normalize_name(name):
|
53
|
+
"""
|
54
|
+
Given a string, return a normalized string, per the first two rules
|
55
|
+
described above.
|
56
|
+
"""
|
57
|
+
# Lowercase and replace all non-alphanumeric characters with _
|
58
|
+
subbed = re.sub(nonalphanum_pat, "_", name.lower())
|
59
|
+
|
60
|
+
# Remove leading and trailing _s
|
61
|
+
stripped = subbed.strip("_")
|
62
|
+
|
63
|
+
# To ensure it's a valid Python identifier
|
64
|
+
if (stripped or "_")[0] in string.digits:
|
65
|
+
stripped = "_" + stripped
|
66
|
+
|
67
|
+
return stripped
|
68
|
+
|
69
|
+
|
70
|
+
def gen_normalize_names(names):
|
71
|
+
"""
|
72
|
+
Given a list of strings, yield fully-normalized conversions of those
|
73
|
+
strings, ensuring that each is unique.
|
74
|
+
"""
|
75
|
+
base = list(map(normalize_name, names))
|
76
|
+
counts = Counter(base)
|
77
|
+
|
78
|
+
# Append __{i} to non-unique names
|
79
|
+
seen = dict((key, 0) for key in counts.keys())
|
80
|
+
for name in base:
|
81
|
+
if counts[name] == 1 or name == "":
|
82
|
+
norm_name = name
|
83
|
+
else:
|
84
|
+
norm_name = name + "__" + str(seen[name])
|
85
|
+
seen[name] += 1
|
86
|
+
yield norm_name
|
87
|
+
|
88
|
+
|
89
|
+
@Sheet.api
|
90
|
+
@asyncthread
|
91
|
+
def normalize_column_names(sheet):
|
92
|
+
"""
|
93
|
+
Normalize the names of all non-hidden columns on the active sheet.
|
94
|
+
"""
|
95
|
+
|
96
|
+
init_names = {}
|
97
|
+
gen = gen_normalize_names(c.name for c in sheet.visibleCols)
|
98
|
+
prog = Progress(gen, gerund="normalizing", total=sheet.nVisibleCols)
|
99
|
+
|
100
|
+
for i, norm_name in enumerate(prog):
|
101
|
+
col = sheet.visibleCols[i]
|
102
|
+
init_names[col] = col.name # Store for undo
|
103
|
+
col.name = norm_name
|
104
|
+
|
105
|
+
@asyncthread
|
106
|
+
def undo():
|
107
|
+
for c, oldname in init_names.items():
|
108
|
+
c.name = oldname
|
109
|
+
|
110
|
+
vd.addUndo(undo)
|
111
|
+
|
112
|
+
|
113
|
+
# Add longname-commands to VisiData to execute these methods
|
114
|
+
Sheet.addCommand(None, "normalize-col-names", "vd.sheet.normalize_column_names()", "normalize the names of all non-hidden columns")
|
115
|
+
|
116
|
+
vd.addMenuItems('''
|
117
|
+
Column > Rename > normalize all > normalize-col-names
|
118
|
+
''')
|
@@ -0,0 +1,18 @@
|
|
1
|
+
'''
|
2
|
+
Load new table from system clipboard
|
3
|
+
'''
|
4
|
+
|
5
|
+
from visidata import vd, BaseSheet, Path
|
6
|
+
|
7
|
+
|
8
|
+
@BaseSheet.api
|
9
|
+
def open_syspaste(sheet, filetype='tsv'):
|
10
|
+
import io
|
11
|
+
|
12
|
+
v = vd.sysclipValue().strip() or vd.fail('nothing to open')
|
13
|
+
|
14
|
+
p = Path('syspaste'+'.'+filetype, fp=io.BytesIO(v.encode('utf-8')))
|
15
|
+
return vd.openSource(p, filetype=filetype)
|
16
|
+
|
17
|
+
|
18
|
+
BaseSheet.addCommand('gShift+P', 'open-syspaste', 'vd.push(open_syspaste(filetype=vd.input("paste as filetype: ", value="tsv")))', 'open clipboard as filetype')
|