visidata 2.11.1__py3-none-any.whl → 3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- visidata/__init__.py +72 -91
- visidata/_input.py +259 -42
- visidata/_open.py +84 -29
- visidata/_types.py +21 -3
- visidata/_urlcache.py +17 -4
- visidata/aggregators.py +65 -25
- visidata/apps/__init__.py +0 -0
- visidata/apps/vdsql/__about__.py +8 -0
- visidata/apps/vdsql/__init__.py +5 -0
- visidata/apps/vdsql/__main__.py +27 -0
- visidata/apps/vdsql/_ibis.py +748 -0
- visidata/apps/vdsql/bigquery.py +61 -0
- visidata/apps/vdsql/clickhouse.py +53 -0
- visidata/apps/vdsql/setup.py +40 -0
- visidata/apps/vdsql/snowflake.py +67 -0
- visidata/apps/vgit/__init__.py +13 -0
- {vgit → visidata/apps/vgit}/blame.py +5 -2
- {vgit → visidata/apps/vgit}/branch.py +31 -16
- {vgit → visidata/apps/vgit}/config.py +3 -3
- visidata/apps/vgit/diff.py +169 -0
- visidata/apps/vgit/gitsheet.py +161 -0
- {vgit → visidata/apps/vgit}/grep.py +6 -5
- visidata/apps/vgit/log.py +81 -0
- {vgit → visidata/apps/vgit}/main.py +18 -5
- {vgit → visidata/apps/vgit}/remote.py +8 -4
- visidata/apps/vgit/repos.py +71 -0
- {vgit → visidata/apps/vgit}/setup.py +6 -4
- visidata/apps/vgit/stash.py +69 -0
- visidata/apps/vgit/status.py +204 -0
- {vgit → visidata/apps/vgit}/statusbar.py +2 -0
- visidata/basesheet.py +59 -50
- visidata/canvas.py +208 -93
- visidata/choose.py +6 -6
- visidata/clean_names.py +29 -0
- visidata/clipboard.py +73 -17
- visidata/cliptext.py +220 -46
- visidata/cmdlog.py +88 -114
- visidata/color.py +142 -56
- visidata/column.py +121 -129
- visidata/ddw/input.ddw +74 -79
- visidata/ddw/regex.ddw +57 -0
- visidata/ddwplay.py +33 -14
- visidata/deprecated.py +77 -3
- visidata/desktop/visidata.desktop +7 -0
- visidata/editor.py +12 -6
- visidata/errors.py +5 -1
- visidata/experimental/__init__.py +0 -0
- visidata/experimental/diff_sheet.py +29 -0
- visidata/experimental/digit_autoedit.py +6 -0
- visidata/experimental/gdrive.py +89 -0
- visidata/experimental/google.py +37 -0
- visidata/experimental/gsheets.py +79 -0
- visidata/experimental/live_search.py +37 -0
- visidata/experimental/liveupdate.py +45 -0
- visidata/experimental/mark.py +133 -0
- visidata/experimental/noahs_tapestry/__init__.py +1 -0
- visidata/experimental/noahs_tapestry/tapestry.py +147 -0
- visidata/experimental/rownum.py +73 -0
- visidata/experimental/slide_cells.py +26 -0
- visidata/expr.py +8 -4
- visidata/extensible.py +30 -5
- visidata/features/__init__.py +0 -0
- visidata/features/addcol_audiometadata.py +42 -0
- visidata/features/addcol_histogram.py +34 -0
- visidata/features/canvas_save_svg.py +69 -0
- visidata/features/change_precision.py +46 -0
- visidata/features/cmdpalette.py +163 -0
- visidata/features/colorbrewer.py +363 -0
- visidata/{colorsheet.py → features/colorsheet.py} +17 -16
- visidata/features/command_server.py +105 -0
- visidata/features/currency_to_usd.py +70 -0
- visidata/{customdate.py → features/customdate.py} +2 -0
- visidata/features/dedupe.py +132 -0
- visidata/{describe.py → features/describe.py} +17 -15
- visidata/features/errors_guide.py +26 -0
- visidata/features/expand_cols.py +202 -0
- visidata/{fill.py → features/fill.py} +3 -1
- visidata/{freeze.py → features/freeze.py} +11 -6
- visidata/features/graph_seaborn.py +79 -0
- visidata/features/helloworld.py +10 -0
- visidata/features/hint_types.py +17 -0
- visidata/{incr.py → features/incr.py} +5 -0
- visidata/{join.py → features/join.py} +107 -53
- visidata/features/known_cols.py +21 -0
- visidata/features/layout.py +62 -0
- visidata/{melt.py → features/melt.py} +32 -21
- visidata/features/normcol.py +118 -0
- visidata/features/open_config.py +7 -0
- visidata/features/open_syspaste.py +18 -0
- visidata/features/ping.py +157 -0
- visidata/features/procmgr.py +208 -0
- visidata/features/random_sample.py +6 -0
- visidata/{regex.py → features/regex.py} +47 -31
- visidata/features/reload_every.py +55 -0
- visidata/features/rename_col_cascade.py +30 -0
- visidata/features/scroll_context.py +60 -0
- visidata/features/select_equal_selected.py +11 -0
- visidata/features/setcol_fake.py +65 -0
- visidata/{slide.py → features/slide.py} +75 -21
- visidata/features/sparkline.py +48 -0
- visidata/features/status_source.py +20 -0
- visidata/{sysedit.py → features/sysedit.py} +2 -1
- visidata/features/sysopen_mailcap.py +46 -0
- visidata/features/term_extras.py +13 -0
- visidata/{transpose.py → features/transpose.py} +5 -4
- visidata/features/type_ipaddr.py +73 -0
- visidata/features/type_url.py +11 -0
- visidata/{unfurl.py → features/unfurl.py} +9 -9
- visidata/{window.py → features/window.py} +2 -2
- visidata/form.py +50 -21
- visidata/freqtbl.py +81 -33
- visidata/fuzzymatch.py +414 -0
- visidata/graph.py +105 -33
- visidata/guide.py +180 -0
- visidata/help.py +75 -44
- visidata/hint.py +39 -0
- visidata/indexsheet.py +109 -0
- visidata/input_history.py +55 -0
- visidata/interface.py +58 -0
- visidata/keys.py +17 -16
- visidata/loaders/__init__.py +9 -0
- visidata/loaders/_pandas.py +61 -21
- visidata/loaders/api_airtable.py +70 -0
- visidata/loaders/api_bitio.py +102 -0
- visidata/loaders/api_matrix.py +148 -0
- visidata/loaders/api_reddit.py +306 -0
- visidata/loaders/api_zulip.py +249 -0
- visidata/loaders/archive.py +41 -7
- visidata/loaders/arrow.py +7 -7
- visidata/loaders/conll.py +49 -0
- visidata/loaders/csv.py +25 -7
- visidata/loaders/eml.py +3 -4
- visidata/loaders/f5log.py +1204 -0
- visidata/loaders/fec.py +325 -0
- visidata/loaders/fixed_width.py +2 -4
- visidata/loaders/frictionless.py +3 -3
- visidata/loaders/geojson.py +8 -5
- visidata/loaders/google.py +48 -0
- visidata/loaders/graphviz.py +4 -4
- visidata/loaders/hdf5.py +4 -4
- visidata/loaders/html.py +48 -10
- visidata/loaders/http.py +84 -30
- visidata/loaders/imap.py +20 -10
- visidata/loaders/jrnl.py +52 -0
- visidata/loaders/json.py +83 -29
- visidata/loaders/jsonla.py +74 -0
- visidata/loaders/lsv.py +15 -11
- visidata/loaders/mailbox.py +40 -0
- visidata/loaders/markdown.py +1 -3
- visidata/loaders/mbtiles.py +4 -5
- visidata/loaders/mysql.py +11 -13
- visidata/loaders/npy.py +7 -7
- visidata/loaders/odf.py +4 -1
- visidata/loaders/orgmode.py +428 -0
- visidata/loaders/pandas_freqtbl.py +14 -20
- visidata/loaders/parquet.py +62 -6
- visidata/loaders/pcap.py +3 -3
- visidata/loaders/pdf.py +4 -3
- visidata/loaders/png.py +19 -13
- visidata/loaders/postgres.py +9 -8
- visidata/loaders/rec.py +7 -3
- visidata/loaders/s3.py +342 -0
- visidata/loaders/sas.py +5 -5
- visidata/loaders/scrape.py +186 -0
- visidata/loaders/shp.py +6 -5
- visidata/loaders/spss.py +5 -6
- visidata/loaders/sqlite.py +68 -28
- visidata/loaders/texttables.py +1 -1
- visidata/loaders/toml.py +60 -0
- visidata/loaders/tsv.py +61 -19
- visidata/loaders/ttf.py +19 -7
- visidata/loaders/unzip_http.py +6 -5
- visidata/loaders/usv.py +1 -1
- visidata/loaders/vcf.py +16 -16
- visidata/loaders/vds.py +10 -7
- visidata/loaders/vdx.py +30 -5
- visidata/loaders/xlsb.py +8 -1
- visidata/loaders/xlsx.py +145 -25
- visidata/loaders/xml.py +6 -3
- visidata/loaders/xword.py +4 -4
- visidata/loaders/yaml.py +15 -5
- visidata/macros.py +129 -42
- visidata/main.py +119 -94
- visidata/mainloop.py +101 -155
- visidata/man/parse_options.py +2 -2
- visidata/man/vd.1 +301 -148
- visidata/man/vd.txt +290 -153
- visidata/memory.py +3 -3
- visidata/menu.py +104 -423
- visidata/metasheets.py +59 -141
- visidata/modify.py +78 -23
- visidata/motd.py +3 -3
- visidata/mouse.py +137 -0
- visidata/movement.py +43 -35
- visidata/optionssheet.py +99 -0
- visidata/path.py +113 -32
- visidata/pivot.py +73 -47
- visidata/plugins.py +65 -192
- visidata/pyobj.py +50 -201
- visidata/rename_col.py +20 -0
- visidata/save.py +37 -20
- visidata/search.py +54 -10
- visidata/selection.py +84 -5
- visidata/settings.py +162 -25
- visidata/sheets.py +229 -257
- visidata/shell.py +51 -21
- visidata/sidebar.py +162 -0
- visidata/sort.py +11 -4
- visidata/statusbar.py +113 -104
- visidata/stored_list.py +43 -0
- visidata/stored_prop.py +38 -0
- visidata/tests/conftest.py +3 -3
- visidata/tests/test_cliptext.py +39 -0
- visidata/tests/test_commands.py +62 -7
- visidata/tests/test_edittext.py +2 -2
- visidata/tests/test_features.py +17 -0
- visidata/tests/test_menu.py +14 -0
- visidata/tests/test_path.py +13 -4
- visidata/text_source.py +53 -0
- visidata/textsheet.py +10 -3
- visidata/theme.py +44 -0
- visidata/themes/__init__.py +0 -0
- visidata/themes/ascii8.py +84 -0
- visidata/themes/asciimono.py +84 -0
- visidata/themes/light.py +17 -0
- visidata/threads.py +87 -39
- visidata/tuiwin.py +22 -0
- visidata/type_currency.py +22 -3
- visidata/type_date.py +31 -9
- visidata/type_floatsi.py +5 -1
- visidata/undo.py +17 -5
- visidata/utils.py +106 -23
- visidata/vdobj.py +28 -17
- visidata/windows.py +10 -0
- visidata/wrappers.py +9 -3
- visidata-3.0.data/data/share/applications/visidata.desktop +7 -0
- {visidata-2.11.1.data → visidata-3.0.data}/data/share/man/man1/vd.1 +301 -148
- {visidata-2.11.1.data → visidata-3.0.data}/data/share/man/man1/visidata.1 +301 -148
- visidata-3.0.data/scripts/vd2to3.vdx +9 -0
- {visidata-2.11.1.dist-info → visidata-3.0.dist-info}/METADATA +12 -8
- visidata-3.0.dist-info/RECORD +257 -0
- {visidata-2.11.1.dist-info → visidata-3.0.dist-info}/WHEEL +1 -1
- vgit/__init__.py +0 -1
- vgit/gitsheet.py +0 -164
- visidata/layout.py +0 -44
- visidata/misc.py +0 -5
- visidata-2.11.1.data/scripts/vgit +0 -9
- visidata-2.11.1.dist-info/RECORD +0 -155
- {vgit → visidata/apps/vgit}/__main__.py +0 -0
- {vgit → visidata/apps/vgit}/abort.py +0 -0
- /visidata/{repeat.py → features/repeat.py} +0 -0
- {visidata-2.11.1.data → visidata-3.0.data}/scripts/vd +0 -0
- {visidata-2.11.1.dist-info → visidata-3.0.dist-info}/LICENSE.gpl3 +0 -0
- {visidata-2.11.1.dist-info → visidata-3.0.dist-info}/entry_points.txt +0 -0
- {visidata-2.11.1.dist-info → visidata-3.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,186 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
|
3
|
+
__all__=[ 'SelectorColumn', 'soupstr' ]
|
4
|
+
|
5
|
+
import os.path
|
6
|
+
from urllib.parse import urljoin
|
7
|
+
|
8
|
+
import concurrent.futures
|
9
|
+
import functools
|
10
|
+
|
11
|
+
from visidata import vd, VisiData, TableSheet, vdtype, Column, AttrColumn, Progress, date
|
12
|
+
|
13
|
+
|
14
|
+
@VisiData.api
|
15
|
+
def soup(vd, s):
|
16
|
+
bs4 = vd.importExternal('bs4', 'beautifulsoup4')
|
17
|
+
from bs4 import BeautifulSoup
|
18
|
+
return BeautifulSoup(s, 'html.parser')
|
19
|
+
|
20
|
+
|
21
|
+
@VisiData.api
|
22
|
+
def open_scrape(vd, p):
|
23
|
+
bs4 = vd.importExternal('bs4', 'beautifulsoup4')
|
24
|
+
|
25
|
+
vd.enable_requests_cache()
|
26
|
+
if p.is_url():
|
27
|
+
return HtmlDocsSheet(p.base_stem, source=p, urls=[p.given])
|
28
|
+
else:
|
29
|
+
return HtmlElementsSheet(p.base_stem, source=p, elements=None)
|
30
|
+
|
31
|
+
VisiData.openhttp_scrape = VisiData.open_scrape
|
32
|
+
|
33
|
+
def node_name(node):
|
34
|
+
me = node.name
|
35
|
+
class_ = node.attrs.get("class")
|
36
|
+
if class_:
|
37
|
+
me += '.' + class_[0]
|
38
|
+
id_ = node.attrs.get("id")
|
39
|
+
if id_:
|
40
|
+
me += '#' + id_
|
41
|
+
return me
|
42
|
+
|
43
|
+
@functools.lru_cache(maxsize=None)
|
44
|
+
def calc_selector(node):
|
45
|
+
if not node.parent:
|
46
|
+
return ''
|
47
|
+
|
48
|
+
psel = calc_selector(node.parent)
|
49
|
+
oursel = node_name(node)
|
50
|
+
if not psel:
|
51
|
+
return oursel
|
52
|
+
|
53
|
+
root = list(node.parents)[-1]
|
54
|
+
|
55
|
+
combinedsel = psel+' '+oursel
|
56
|
+
if len(root.select(combinedsel)) == len(root.select(oursel)):
|
57
|
+
return oursel
|
58
|
+
|
59
|
+
return combinedsel
|
60
|
+
|
61
|
+
|
62
|
+
class HtmlAttrColumn(Column):
|
63
|
+
def calcValue(self, row):
|
64
|
+
return row.attrs.get(self.expr)
|
65
|
+
|
66
|
+
|
67
|
+
def prev_header(r):
|
68
|
+
hdrtags = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']
|
69
|
+
try:
|
70
|
+
i = hdrtags.index(r.name)
|
71
|
+
return r.find_previous(hdrtags[:i-1])
|
72
|
+
except Exception:
|
73
|
+
return r.find_previous(hdrtags)
|
74
|
+
|
75
|
+
|
76
|
+
# one row per element
|
77
|
+
class HtmlElementsSheet(TableSheet):
|
78
|
+
guide = '''# HTMLElements
|
79
|
+
|
80
|
+
This is a list of HTML elements from _{sheet.source}_ as parsed by `beautifulsoup4`.
|
81
|
+
|
82
|
+
Standard VisiData exploration techniques can be used to find relevant data, which will help determine the proper selector to use.
|
83
|
+
|
84
|
+
- `Enter` to dive into children of cursor element (or children of all selected rows with `g Enter`)
|
85
|
+
- `go` to batch open links in selected rows on new RequestsSheet, which will fetch each page
|
86
|
+
- `~` to use the `soupstr` type to join all the text elements
|
87
|
+
'''
|
88
|
+
# source=[element, ...]
|
89
|
+
rowtype='dom nodes' # rowdef soup.element
|
90
|
+
columns = [
|
91
|
+
Column('name', getter=lambda c,r: node_name(r)),
|
92
|
+
Column('selector', getter=lambda c,r: calc_selector(r), cache='async', width=0),
|
93
|
+
AttrColumn('string'),
|
94
|
+
Column('depth', cache=True, getter=lambda c,r: list(c.sheet.html_parents(r))),
|
95
|
+
Column('prev_header', getter=lambda c,r: prev_header(r), cache=True),
|
96
|
+
HtmlAttrColumn('href', expr='href'),
|
97
|
+
]
|
98
|
+
def iterload(self):
|
99
|
+
for el in self.elements or [vd.soup(self.source.read_text())]:
|
100
|
+
for x in el.find_all():
|
101
|
+
if x.string:
|
102
|
+
yield x
|
103
|
+
|
104
|
+
def html_parents(self, row):
|
105
|
+
while row.parent and row.parent is not row:
|
106
|
+
yield row.parent
|
107
|
+
row = row.parent
|
108
|
+
|
109
|
+
@property
|
110
|
+
def rootSource(self):
|
111
|
+
return self.rootSheet.source
|
112
|
+
|
113
|
+
def openRows(self, rows):
|
114
|
+
realurls = [urljoin(self.rootSource.given, r.attrs.get('href')) for r in rows]
|
115
|
+
yield HtmlDocsSheet(self.name, 'scrape', source=self, urls=realurls)
|
116
|
+
|
117
|
+
def openRow(self, row):
|
118
|
+
'opening a single row'
|
119
|
+
return HtmlElementsSheet('', source=self, elements=[row])
|
120
|
+
|
121
|
+
|
122
|
+
class DocsSelectorColumn(Column):
|
123
|
+
def calcValue(self, row):
|
124
|
+
return [x for x in row.soup.select(self.expr)]
|
125
|
+
|
126
|
+
class SelectorColumn(Column):
|
127
|
+
def calcValue(self, row):
|
128
|
+
return [x for x in row.select(self.expr)]
|
129
|
+
|
130
|
+
|
131
|
+
# urls=list of urls to scrape
|
132
|
+
class HtmlDocsSheet(TableSheet):
|
133
|
+
help='''# HtmlDocsSheet
|
134
|
+
|
135
|
+
- `Enter` to open the current request as list of HTMLElements
|
136
|
+
- `;` to add column of elements matching given css selector
|
137
|
+
- this is how to cross-tabulate data from multiple pages
|
138
|
+
'''
|
139
|
+
rowtype='requests' # rowdef: requests.Response
|
140
|
+
columns = [
|
141
|
+
AttrColumn('url'),
|
142
|
+
AttrColumn('status_code', type=int),
|
143
|
+
AttrColumn('from_cache'),
|
144
|
+
AttrColumn('fetched_at', 'created_at', type=date, width=0),
|
145
|
+
AttrColumn('expires', type=date),
|
146
|
+
AttrColumn('reason'),
|
147
|
+
AttrColumn('soup.title.string'),
|
148
|
+
]
|
149
|
+
def iterload(self):
|
150
|
+
requests = vd.importExternal('requests')
|
151
|
+
self.colnames = {}
|
152
|
+
# with concurrent.futures.ThreadPoolExecutor(max_workers=8) as executor:
|
153
|
+
# yield from executor.map(requests.get, Progress(self.urls))
|
154
|
+
for url in Progress(self.urls):
|
155
|
+
yield requests.get(url)
|
156
|
+
|
157
|
+
def addRow(self, row, index=None):
|
158
|
+
super().addRow(row, index=index)
|
159
|
+
row.soup = vd.callNoExceptions(vd.soup, row.text)
|
160
|
+
|
161
|
+
def openRow(self, row):
|
162
|
+
return HtmlElementsSheet(row.url, source=self, elements=[row.soup])
|
163
|
+
|
164
|
+
def soupstr(coll):
|
165
|
+
return ' '.join(v.string for v in coll)
|
166
|
+
|
167
|
+
vdtype(soupstr, 's')
|
168
|
+
|
169
|
+
@TableSheet.api
|
170
|
+
def scrape_urls(sheet, col, rows):
|
171
|
+
return HtmlDocsSheet(sheet.name, "selected_urls", urls=[col.getTypedValue(r) for r in rows])
|
172
|
+
|
173
|
+
HtmlElementsSheet.addCommand('~', 'type-soupstr', 'cursorCol.type=soupstr', 'set type of current column to list of html elements')
|
174
|
+
HtmlElementsSheet.addCommand('go', 'open-rows', 'for vs in openRows(selectedRows): vd.push(vs)', 'open sheet for each selected element')
|
175
|
+
TableSheet.addCommand('gzo', 'scrape-cells', 'vd.push(scrape_urls(cursorCol, selectedRows))', 'open HTML Documents sheet from selected URLs')
|
176
|
+
HtmlDocsSheet.addCommand(';', 'addcol-selector', 'sel=input("css selector: ", type="selector"); addColumn(DocsSelectorColumn(sel, expr=sel, cache="async"))', 'add column derived from css selector of current column')
|
177
|
+
HtmlElementsSheet.addCommand(';', 'addcol-selector', 'sel=input("css selector: ", type="selector"); addColumn(SelectorColumn(sel, expr=sel, cache="async"))', 'add column derived from css selector of current column')
|
178
|
+
|
179
|
+
vd.addGlobals({
|
180
|
+
'HtmlDocsSheet':SelectorColumn,
|
181
|
+
'SelectorColumn':SelectorColumn,
|
182
|
+
'DocsSelectorColumn':DocsSelectorColumn,
|
183
|
+
'soupstr':soupstr
|
184
|
+
})
|
185
|
+
|
186
|
+
vd.addMenuItem('Data', '+Scrape', 'selected cells', 'scrape-cells')
|
visidata/loaders/shp.py
CHANGED
@@ -1,13 +1,14 @@
|
|
1
1
|
import json
|
2
|
+
from copy import copy
|
2
3
|
|
3
|
-
from visidata import VisiData, vd, Sheet, Column, Progress, date,
|
4
|
+
from visidata import VisiData, vd, Sheet, Column, Progress, date, InvertedCanvas, asyncthread
|
4
5
|
|
5
6
|
# requires pyshp
|
6
7
|
|
7
8
|
|
8
9
|
@VisiData.api
|
9
10
|
def open_shp(vd, p):
|
10
|
-
return ShapeSheet(p.
|
11
|
+
return ShapeSheet(p.base_stem, source=p)
|
11
12
|
|
12
13
|
VisiData.open_dbf = VisiData.open_shp
|
13
14
|
|
@@ -33,7 +34,7 @@ class ShapeSheet(Sheet):
|
|
33
34
|
Column('shapeType', width=0, getter=lambda col,row: row.shape.shapeType)
|
34
35
|
]
|
35
36
|
def iterload(self):
|
36
|
-
|
37
|
+
shapefile = vd.importExternal('shapefile', 'pyshp')
|
37
38
|
self.sf = shapefile.Reader(str(self.source))
|
38
39
|
self.reloadCols()
|
39
40
|
for shaperec in Progress(self.sf.iterShapeRecords(), total=self.sf.numRecords):
|
@@ -97,10 +98,10 @@ def save_geojson(vd, p, vs):
|
|
97
98
|
'type': 'FeatureCollection',
|
98
99
|
'features': features,
|
99
100
|
}
|
100
|
-
with p.
|
101
|
+
with p.open(mode='w', encoding=vs.options.save_encoding) as fp:
|
101
102
|
for chunk in json.JSONEncoder().iterencode(featcoll):
|
102
103
|
fp.write(chunk)
|
103
104
|
|
104
105
|
ShapeSheet.addCommand('.', 'plot-row', 'vd.push(ShapeMap(name+"_map", source=sheet, sourceRows=[cursorRow], textCol=cursorCol))', 'plot geospatial vector in current row')
|
105
106
|
ShapeSheet.addCommand('g.', 'plot-rows', 'vd.push(ShapeMap(name+"_map", source=sheet, sourceRows=rows, textCol=cursorCol))', 'plot all geospatial vectors in current sheet')
|
106
|
-
ShapeMap.addCommand('^S', 'save-sheet', 'vd.saveSheets(inputPath("save to: ", value=getDefaultSaveName(sheet)), sheet
|
107
|
+
ShapeMap.addCommand('^S', 'save-sheet', 'vd.saveSheets(inputPath("save to: ", value=getDefaultSaveName(sheet)), sheet)', 'save current sheet to filename in format determined by extension (default .geojson)')
|
visidata/loaders/spss.py
CHANGED
@@ -1,22 +1,21 @@
|
|
1
|
-
from visidata import VisiData, Sheet, Progress, asyncthread,
|
1
|
+
from visidata import VisiData, Sheet, Progress, asyncthread, ItemColumn, vd
|
2
2
|
|
3
3
|
|
4
4
|
@VisiData.api
|
5
5
|
def open_spss(vd, p):
|
6
|
-
return SpssSheet(p.
|
6
|
+
return SpssSheet(p.base_stem, source=p)
|
7
7
|
VisiData.open_sav = VisiData.open_spss
|
8
8
|
|
9
9
|
|
10
10
|
class SpssSheet(Sheet):
|
11
|
-
|
12
|
-
|
13
|
-
import savReaderWriter
|
11
|
+
def loader(self):
|
12
|
+
savReaderWriter = vd.importExternal('savReaderWriter')
|
14
13
|
self.rdr = savReaderWriter.SavReader(str(self.source))
|
15
14
|
with self.rdr as reader:
|
16
15
|
self.columns = []
|
17
16
|
for i, vname in enumerate(reader.varNames):
|
18
17
|
vtype = float if reader.varTypes[vname] == 0 else str
|
19
|
-
self.addColumn(
|
18
|
+
self.addColumn(ItemColumn(vname.decode('utf-8'), i, type=vtype))
|
20
19
|
|
21
20
|
self.rows = []
|
22
21
|
for r in Progress(reader, total=reader.shape.nrows):
|
visidata/loaders/sqlite.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1
|
+
from copy import copy
|
1
2
|
import re
|
2
3
|
|
3
|
-
from visidata import VisiData, vd, Sheet, options, Column, Progress, anytype, ColumnItem, asyncthread, TypedExceptionWrapper, TypedWrapper, IndexSheet,
|
4
|
+
from visidata import VisiData, vd, Sheet, options, Column, Progress, anytype, ColumnItem, asyncthread, TypedExceptionWrapper, TypedWrapper, IndexSheet, vlen
|
4
5
|
from visidata.type_date import date
|
5
6
|
|
6
7
|
vd.option('sqlite_onconnect', '', 'sqlite statement to execute after opening a connection')
|
@@ -16,13 +17,21 @@ def requery(url, **kwargs):
|
|
16
17
|
return urlunparse(url_parts)
|
17
18
|
|
18
19
|
|
20
|
+
@VisiData.api
|
21
|
+
def guess_sqlite(vd, p):
|
22
|
+
if p.open_bytes().read(16).startswith(b'SQLite format'):
|
23
|
+
return dict(filetype='sqlite')
|
24
|
+
|
25
|
+
|
19
26
|
@VisiData.api
|
20
27
|
def open_sqlite(vd, p):
|
21
|
-
|
28
|
+
if not p.is_local():
|
29
|
+
vd.fail('sqlite requires an uncompressed, local file')
|
30
|
+
return SqliteIndexSheet(p.base_stem, source=p)
|
22
31
|
|
23
32
|
@VisiData.api
|
24
33
|
def openurl_sqlite(vd, p, filetype=None):
|
25
|
-
return SqliteIndexSheet(p.
|
34
|
+
return SqliteIndexSheet(p.base_stem, source=p)
|
26
35
|
|
27
36
|
VisiData.open_sqlite3 = VisiData.open_sqlite
|
28
37
|
VisiData.open_db = VisiData.open_sqlite
|
@@ -32,15 +41,14 @@ class SqliteSheet(Sheet):
|
|
32
41
|
'Provide functionality for importing SQLite databases.'
|
33
42
|
savesToSource = True
|
34
43
|
defer = True
|
35
|
-
|
36
|
-
|
37
|
-
'Resolve all the way back to the original source Path.'
|
38
|
-
return self.source.resolve()
|
44
|
+
query = ''
|
45
|
+
tableName = ''
|
39
46
|
|
40
47
|
def conn(self):
|
41
48
|
import sqlite3
|
42
|
-
|
43
|
-
|
49
|
+
localpath = self.rootSheet().source
|
50
|
+
|
51
|
+
url = localpath if localpath.is_url() else f'file:{localpath.resolve()}'
|
44
52
|
url = requery(url, **self.options.getall('sqlite_param_'))
|
45
53
|
|
46
54
|
con = sqlite3.connect(url, uri=True, **self.options.getall('sqlite_connect_'))
|
@@ -49,13 +57,25 @@ class SqliteSheet(Sheet):
|
|
49
57
|
con.execute(self.options.sqlite_onconnect)
|
50
58
|
return con
|
51
59
|
|
60
|
+
def rawSql(self, q:str) -> 'SqliteSheet':
|
61
|
+
return SqliteSheet('query', source=self.source, query=q)
|
62
|
+
|
63
|
+
@property
|
64
|
+
def sidebar(self):
|
65
|
+
if self.query:
|
66
|
+
return '# SQL\n' + self.query
|
67
|
+
else:
|
68
|
+
return super().sidebar
|
69
|
+
|
52
70
|
def execute(self, conn, sql, parms=None):
|
53
71
|
parms = parms or []
|
54
72
|
vd.debug(sql)
|
55
73
|
return conn.execute(sql, parms)
|
56
74
|
|
57
|
-
def
|
58
|
-
|
75
|
+
def iterload_table(self, tblname:str):
|
76
|
+
'''Generate all rows from `tblname` in database at self.source,
|
77
|
+
including type information from table_xinfo(), and getting each rowid
|
78
|
+
if available (for simpler updates).'''
|
59
79
|
|
60
80
|
def parse_sqlite_type(t):
|
61
81
|
m = re.match(r'(\w+)(\((\d+)(,(\d+))?\))?', t.upper())
|
@@ -70,7 +90,6 @@ class SqliteSheet(Sheet):
|
|
70
90
|
|
71
91
|
self.rowidColumn = None
|
72
92
|
with self.conn() as conn:
|
73
|
-
tblname = self.tableName
|
74
93
|
if not isinstance(self, SqliteIndexSheet):
|
75
94
|
self.columns = []
|
76
95
|
for r in self.execute(conn, 'PRAGMA TABLE_XINFO("%s")' % tblname):
|
@@ -92,6 +111,32 @@ class SqliteSheet(Sheet):
|
|
92
111
|
r = self.execute(conn, 'SELECT NULL, * FROM "%s"' % tblname)
|
93
112
|
yield from Progress(r, total=r.rowcount-1)
|
94
113
|
|
114
|
+
def iterload_query(self, query:str):
|
115
|
+
'''Generate rows from `query` to database at self.source,
|
116
|
+
including type information from table_xinfo(), and getting each rowid
|
117
|
+
if available (for simpler updates).'''
|
118
|
+
|
119
|
+
with self.conn() as conn:
|
120
|
+
self.columns = []
|
121
|
+
for c in type(self).columns:
|
122
|
+
self.addColumn(copy(c))
|
123
|
+
|
124
|
+
self.result = self.execute(conn, query, parms=getattr(self, 'parms', []))
|
125
|
+
|
126
|
+
for i, desc in enumerate(self.result.description):
|
127
|
+
self.addColumn(ColumnItem(desc[0], i))
|
128
|
+
|
129
|
+
for row in self.result:
|
130
|
+
yield row
|
131
|
+
|
132
|
+
def iterload(self):
|
133
|
+
if self.tableName:
|
134
|
+
yield from self.iterload_table(self.tableName)
|
135
|
+
elif self.query:
|
136
|
+
yield from self.iterload_query(self.query)
|
137
|
+
else:
|
138
|
+
vd.fail('no query or tablename to load')
|
139
|
+
|
95
140
|
@asyncthread
|
96
141
|
def putChanges(self):
|
97
142
|
adds, mods, dels = self.getDeferredChanges()
|
@@ -192,24 +237,14 @@ class SqliteIndexSheet(SqliteSheet, IndexSheet):
|
|
192
237
|
self.preloadHook()
|
193
238
|
self.reload()
|
194
239
|
|
195
|
-
class SqliteQuerySheet(SqliteSheet):
|
196
|
-
def iterload(self):
|
197
|
-
with self.conn() as conn:
|
198
|
-
self.columns = []
|
199
|
-
for c in type(self).columns:
|
200
|
-
self.addColumn(copy(c))
|
201
|
-
self.result = self.execute(conn, self.query, parms=getattr(self, 'parms', []))
|
202
|
-
for i, desc in enumerate(self.result.description):
|
203
|
-
self.addColumn(ColumnItem(desc[0], i))
|
204
|
-
|
205
|
-
for row in self.result:
|
206
|
-
yield row
|
207
|
-
|
208
240
|
|
209
241
|
|
210
242
|
@VisiData.api
|
211
243
|
def save_sqlite(vd, p, *vsheets):
|
212
244
|
import sqlite3
|
245
|
+
import json
|
246
|
+
jsonenc = json.JSONEncoder() #1589: list/dict values as json
|
247
|
+
|
213
248
|
conn = sqlite3.connect(str(p))
|
214
249
|
conn.text_factory = lambda s, enc=vsheets[0].options.encoding: s.decode(enc)
|
215
250
|
conn.row_factory = sqlite3.Row
|
@@ -231,7 +266,7 @@ def save_sqlite(vd, p, *vsheets):
|
|
231
266
|
vd.sync()
|
232
267
|
|
233
268
|
for vs in vsheets:
|
234
|
-
tblname =
|
269
|
+
tblname = vd.cleanName(vs.name)
|
235
270
|
sqlcols = []
|
236
271
|
for col in vs.visibleCols:
|
237
272
|
sqlcols.append('"%s" %s' % (col.name, sqltypes.get(col.type, 'TEXT')))
|
@@ -247,6 +282,8 @@ def save_sqlite(vd, p, *vsheets):
|
|
247
282
|
v = options.safe_error
|
248
283
|
else:
|
249
284
|
v = None
|
285
|
+
elif isinstance(v, (list, tuple, dict)):
|
286
|
+
v = jsonenc.encode(v)
|
250
287
|
elif not isinstance(v, (int, float, str)):
|
251
288
|
v = col.getDisplayValue(r)
|
252
289
|
sqlvals.append(v)
|
@@ -255,16 +292,19 @@ def save_sqlite(vd, p, *vsheets):
|
|
255
292
|
|
256
293
|
conn.commit()
|
257
294
|
|
258
|
-
vd.status("%s save finished" % p)
|
259
295
|
|
296
|
+
SqliteSheet.addCommand('', 'exec-sql', 'vd.push(rawSql(input("execute SQL: ", type="sql")))', 'execute raw SQL statement')
|
260
297
|
|
261
298
|
SqliteIndexSheet.addCommand('a', 'add-table', 'fail("create a new table by saving a sheet to this database file")', 'stub; add table by saving a sheet to the db file instead')
|
262
299
|
SqliteIndexSheet.bindkey('ga', 'add-table')
|
263
300
|
SqliteSheet.options.header = 0
|
264
301
|
VisiData.save_db = VisiData.save_sqlite
|
265
302
|
|
303
|
+
vd.addMenuItems('''
|
304
|
+
Data > execute SQL query > exec-sql
|
305
|
+
''')
|
306
|
+
|
266
307
|
vd.addGlobals({
|
267
308
|
'SqliteIndexSheet': SqliteIndexSheet,
|
268
309
|
'SqliteSheet': SqliteSheet,
|
269
|
-
'SqliteQuerySheet': SqliteQuerySheet
|
270
310
|
})
|
visidata/loaders/texttables.py
CHANGED
@@ -7,7 +7,7 @@ try:
|
|
7
7
|
def save_table(path, *sheets, fmt=fmt):
|
8
8
|
import tabulate
|
9
9
|
|
10
|
-
with path.
|
10
|
+
with path.open(mode='w', encoding=sheets[0].options.save_encoding) as fp:
|
11
11
|
for vs in sheets:
|
12
12
|
fp.write(tabulate.tabulate(
|
13
13
|
vs.itervals(*vs.visibleCols, format=True),
|
visidata/loaders/toml.py
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
from visidata import (
|
2
|
+
ColumnItem,
|
3
|
+
PythonSheet,
|
4
|
+
VisiData,
|
5
|
+
asyncthread,
|
6
|
+
deduceType,
|
7
|
+
vd,
|
8
|
+
)
|
9
|
+
|
10
|
+
|
11
|
+
@VisiData.api
|
12
|
+
def open_toml(vd, p):
|
13
|
+
return TomlSheet(p.base_stem, source=p)
|
14
|
+
|
15
|
+
|
16
|
+
class TomlSheet(PythonSheet):
|
17
|
+
"""A Sheet representing the top level of a loaded TOML file.
|
18
|
+
|
19
|
+
This is an intentionally minimal loader with cues taken from
|
20
|
+
VisiData built-in JSON and Python object sheet types.
|
21
|
+
"""
|
22
|
+
guide = '''# Toml Sheet
|
23
|
+
This sheet represents the top level of {sheet.source.name}.{sheet.source.ext}.
|
24
|
+
|
25
|
+
Each cell within this sheet can contain dictionaries (representing TOML key:value pairs), lists (representing TOML arrays), or scalars.
|
26
|
+
|
27
|
+
Some helpful commands when working with cells of lists and dictionaries:
|
28
|
+
|
29
|
+
- `(` (`expand-col`) on a column with lists or dictionaries will "expand" the structures in the cells into new columns within the current sheet.
|
30
|
+
- `zEnter` on a cell with lists or dictionaries will "dive" into the current cell, expanding its structures into rows and columns in a separate sheet.
|
31
|
+
'''
|
32
|
+
|
33
|
+
rowtype = "values" # rowdef: dict values, possibly nested
|
34
|
+
|
35
|
+
def loader(self):
|
36
|
+
"""Loading a TOML file produces a single dict. Use
|
37
|
+
its keys as column headings, and populate a single
|
38
|
+
row.
|
39
|
+
"""
|
40
|
+
self.columns = []
|
41
|
+
self.rows = []
|
42
|
+
|
43
|
+
try:
|
44
|
+
# Python 3.11+
|
45
|
+
import tomllib
|
46
|
+
except ModuleNotFoundError:
|
47
|
+
# Python 3.10 and below
|
48
|
+
tomllib = vd.importExternal("tomli")
|
49
|
+
|
50
|
+
data = tomllib.loads(self.source.read_text())
|
51
|
+
for k, v in data.items():
|
52
|
+
self.addColumn(ColumnItem(k, type=deduceType(v)))
|
53
|
+
self.addRow(data)
|
54
|
+
|
55
|
+
|
56
|
+
vd.addGlobals(
|
57
|
+
{
|
58
|
+
"TomlSheet": TomlSheet,
|
59
|
+
}
|
60
|
+
)
|
visidata/loaders/tsv.py
CHANGED
@@ -2,8 +2,10 @@ import os
|
|
2
2
|
import contextlib
|
3
3
|
import itertools
|
4
4
|
import collections
|
5
|
+
import math
|
6
|
+
import time
|
5
7
|
|
6
|
-
from visidata import vd, asyncthread, options, Progress, ColumnItem, SequenceSheet, Sheet,
|
8
|
+
from visidata import vd, asyncthread, options, Progress, ColumnItem, SequenceSheet, Sheet, VisiData
|
7
9
|
from visidata import namedlist, filesize
|
8
10
|
|
9
11
|
vd.option('delimiter', '\t', 'field delimiter to use for tsv/usv filetype', replay=True)
|
@@ -14,23 +16,55 @@ vd.option('tsv_safe_tab', '\u001f', 'replacement for tab character when saving t
|
|
14
16
|
|
15
17
|
@VisiData.api
|
16
18
|
def open_tsv(vd, p):
|
17
|
-
return TsvSheet(p.
|
19
|
+
return TsvSheet(p.base_stem, source=p)
|
20
|
+
|
21
|
+
|
22
|
+
def adaptive_bufferer(fp, max_buffer_size=65536):
|
23
|
+
"""Loading e.g. tsv files goes faster with a large buffer. But when the input stream
|
24
|
+
is slow (e.g. 1 byte/second) and the buffer size is large, it can take a long time until
|
25
|
+
the buffer is filled. Only when the buffer is filled (or the input stream is finished)
|
26
|
+
you can see the data visiualized in visidata. That's why we use an adaptive buffer.
|
27
|
+
For fast input streams, the buffer becomes large, for slow input streams, the buffer stays
|
28
|
+
small"""
|
29
|
+
buffer_size = 8
|
30
|
+
processed_buffer_size = 0
|
31
|
+
previous_start_time = time.time()
|
32
|
+
while True:
|
33
|
+
next_chunk = fp.read(max(buffer_size, 1))
|
34
|
+
if not next_chunk:
|
35
|
+
break
|
18
36
|
|
37
|
+
yield next_chunk
|
19
38
|
|
20
|
-
|
21
|
-
'Generates one line/row/record at a time from fp, separated by delim'
|
39
|
+
processed_buffer_size += len(next_chunk)
|
22
40
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
if
|
27
|
-
|
28
|
-
|
41
|
+
current_time = time.time()
|
42
|
+
current_delta = current_time - previous_start_time
|
43
|
+
|
44
|
+
if current_delta < 1:
|
45
|
+
# if it takes longer than one second to fill the buffer, double the size of the buffer
|
46
|
+
buffer_size = min(buffer_size * 2, max_buffer_size)
|
47
|
+
else:
|
48
|
+
# if it takes less than one second, increase the buffer size so it takes about
|
49
|
+
# 1 second to fill it
|
50
|
+
previous_start_time = current_time
|
51
|
+
buffer_size = math.ceil(min(processed_buffer_size / current_delta, max_buffer_size))
|
52
|
+
processed_buffer_size = 0
|
53
|
+
|
54
|
+
def splitter(stream, delim='\n'):
|
55
|
+
'Generates one line/row/record at a time from stream, separated by delim'
|
56
|
+
|
57
|
+
buf = type(delim)()
|
58
|
+
|
59
|
+
for chunk in stream:
|
60
|
+
buf += chunk
|
29
61
|
|
30
62
|
*rows, buf = buf.split(delim)
|
31
63
|
yield from rows
|
32
64
|
|
33
|
-
|
65
|
+
buf = buf.rstrip(delim) # trim empty trailing lines
|
66
|
+
if buf:
|
67
|
+
yield from buf.rstrip(delim).split(delim)
|
34
68
|
|
35
69
|
|
36
70
|
# rowdef: list
|
@@ -42,8 +76,8 @@ class TsvSheet(SequenceSheet):
|
|
42
76
|
delim = self.delimiter or self.options.delimiter
|
43
77
|
rowdelim = self.row_delimiter or self.options.row_delimiter
|
44
78
|
|
45
|
-
with self.
|
46
|
-
for line in splitter(fp, rowdelim):
|
79
|
+
with self.open_text_source() as fp:
|
80
|
+
for line in splitter(adaptive_bufferer(fp), rowdelim):
|
47
81
|
if not line:
|
48
82
|
continue
|
49
83
|
|
@@ -63,7 +97,7 @@ def save_tsv(vd, p, vs, delimiter='', row_delimiter=''):
|
|
63
97
|
rowsep = row_delimiter or vs.options.row_delimiter
|
64
98
|
trdict = vs.safe_trdict()
|
65
99
|
|
66
|
-
with p.
|
100
|
+
with p.open(mode='w', encoding=vs.options.save_encoding) as fp:
|
67
101
|
colhdr = unitsep.join(col.name.translate(trdict) for col in vs.visibleCols) + rowsep
|
68
102
|
fp.write(colhdr)
|
69
103
|
|
@@ -71,8 +105,6 @@ def save_tsv(vd, p, vs, delimiter='', row_delimiter=''):
|
|
71
105
|
fp.write(unitsep.join(dispvals.values()))
|
72
106
|
fp.write(rowsep)
|
73
107
|
|
74
|
-
vd.status('%s save finished' % p)
|
75
|
-
|
76
108
|
|
77
109
|
@Sheet.api
|
78
110
|
def append_tsv_row(vs, row):
|
@@ -87,14 +119,24 @@ def append_tsv_row(vs, row):
|
|
87
119
|
trdict = vs.safe_trdict()
|
88
120
|
unitsep = options.delimiter
|
89
121
|
|
90
|
-
with vs.source.
|
122
|
+
with vs.source.open(mode='w') as fp:
|
91
123
|
colhdr = unitsep.join(col.name.translate(trdict) for col in vs.visibleCols) + vs.options.row_delimiter
|
92
124
|
if colhdr.strip(): # is anything but whitespace
|
93
125
|
fp.write(colhdr)
|
94
126
|
|
95
|
-
|
96
|
-
|
127
|
+
newrow = ''
|
128
|
+
|
129
|
+
contents = vs.source.open(mode='r').read()
|
130
|
+
if not contents.endswith('\n'): #1569
|
131
|
+
newrow += '\n'
|
132
|
+
|
133
|
+
newrow += '\t'.join(col.getDisplayValue(row) for col in vs.visibleCols) + '\n'
|
134
|
+
|
135
|
+
with vs.source.open(mode='a') as fp:
|
136
|
+
fp.write(newrow)
|
137
|
+
|
97
138
|
|
139
|
+
TsvSheet.options.regex_skip = '^#.*'
|
98
140
|
|
99
141
|
vd.addGlobals({
|
100
142
|
'TsvSheet': TsvSheet,
|