visidata 2.11.dev0__py3-none-any.whl → 3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (253) hide show
  1. visidata/__init__.py +72 -91
  2. visidata/_input.py +263 -44
  3. visidata/_open.py +84 -29
  4. visidata/_types.py +22 -4
  5. visidata/_urlcache.py +17 -4
  6. visidata/aggregators.py +65 -25
  7. visidata/apps/__init__.py +0 -0
  8. visidata/apps/vdsql/__about__.py +8 -0
  9. visidata/apps/vdsql/__init__.py +5 -0
  10. visidata/apps/vdsql/__main__.py +27 -0
  11. visidata/apps/vdsql/_ibis.py +748 -0
  12. visidata/apps/vdsql/bigquery.py +61 -0
  13. visidata/apps/vdsql/clickhouse.py +53 -0
  14. visidata/apps/vdsql/setup.py +40 -0
  15. visidata/apps/vdsql/snowflake.py +67 -0
  16. visidata/apps/vgit/__init__.py +13 -0
  17. visidata/apps/vgit/__main__.py +3 -0
  18. visidata/apps/vgit/abort.py +23 -0
  19. visidata/apps/vgit/blame.py +76 -0
  20. visidata/apps/vgit/branch.py +153 -0
  21. visidata/apps/vgit/config.py +95 -0
  22. visidata/apps/vgit/diff.py +169 -0
  23. visidata/apps/vgit/gitsheet.py +161 -0
  24. visidata/apps/vgit/grep.py +37 -0
  25. visidata/apps/vgit/log.py +81 -0
  26. visidata/apps/vgit/main.py +55 -0
  27. visidata/apps/vgit/remote.py +57 -0
  28. visidata/apps/vgit/repos.py +71 -0
  29. visidata/apps/vgit/setup.py +37 -0
  30. visidata/apps/vgit/stash.py +69 -0
  31. visidata/apps/vgit/status.py +204 -0
  32. visidata/apps/vgit/statusbar.py +34 -0
  33. visidata/basesheet.py +59 -50
  34. visidata/canvas.py +251 -99
  35. visidata/choose.py +15 -11
  36. visidata/clean_names.py +29 -0
  37. visidata/clipboard.py +84 -18
  38. visidata/cliptext.py +220 -46
  39. visidata/cmdlog.py +89 -114
  40. visidata/color.py +142 -56
  41. visidata/column.py +134 -131
  42. visidata/ddw/input.ddw +74 -79
  43. visidata/ddw/regex.ddw +57 -0
  44. visidata/ddwplay.py +33 -14
  45. visidata/deprecated.py +77 -3
  46. visidata/desktop/visidata.desktop +7 -0
  47. visidata/editor.py +12 -6
  48. visidata/errors.py +5 -1
  49. visidata/experimental/__init__.py +0 -0
  50. visidata/experimental/diff_sheet.py +29 -0
  51. visidata/experimental/digit_autoedit.py +6 -0
  52. visidata/experimental/gdrive.py +89 -0
  53. visidata/experimental/google.py +37 -0
  54. visidata/experimental/gsheets.py +79 -0
  55. visidata/experimental/live_search.py +37 -0
  56. visidata/experimental/liveupdate.py +45 -0
  57. visidata/experimental/mark.py +133 -0
  58. visidata/experimental/noahs_tapestry/__init__.py +1 -0
  59. visidata/experimental/noahs_tapestry/tapestry.py +147 -0
  60. visidata/experimental/rownum.py +73 -0
  61. visidata/experimental/slide_cells.py +26 -0
  62. visidata/expr.py +8 -4
  63. visidata/extensible.py +32 -6
  64. visidata/features/__init__.py +0 -0
  65. visidata/features/addcol_audiometadata.py +42 -0
  66. visidata/features/addcol_histogram.py +34 -0
  67. visidata/features/canvas_save_svg.py +69 -0
  68. visidata/features/change_precision.py +46 -0
  69. visidata/features/cmdpalette.py +163 -0
  70. visidata/features/colorbrewer.py +363 -0
  71. visidata/{colorsheet.py → features/colorsheet.py} +17 -16
  72. visidata/features/command_server.py +105 -0
  73. visidata/features/currency_to_usd.py +70 -0
  74. visidata/{customdate.py → features/customdate.py} +2 -0
  75. visidata/features/dedupe.py +132 -0
  76. visidata/{describe.py → features/describe.py} +17 -15
  77. visidata/features/errors_guide.py +26 -0
  78. visidata/features/expand_cols.py +202 -0
  79. visidata/{fill.py → features/fill.py} +4 -2
  80. visidata/{freeze.py → features/freeze.py} +11 -6
  81. visidata/features/graph_seaborn.py +79 -0
  82. visidata/features/helloworld.py +10 -0
  83. visidata/features/hint_types.py +17 -0
  84. visidata/{incr.py → features/incr.py} +5 -0
  85. visidata/{join.py → features/join.py} +107 -53
  86. visidata/features/known_cols.py +21 -0
  87. visidata/features/layout.py +62 -0
  88. visidata/{melt.py → features/melt.py} +33 -21
  89. visidata/features/normcol.py +118 -0
  90. visidata/features/open_config.py +7 -0
  91. visidata/features/open_syspaste.py +18 -0
  92. visidata/features/ping.py +157 -0
  93. visidata/features/procmgr.py +208 -0
  94. visidata/features/random_sample.py +6 -0
  95. visidata/{regex.py → features/regex.py} +47 -31
  96. visidata/features/reload_every.py +55 -0
  97. visidata/features/rename_col_cascade.py +30 -0
  98. visidata/features/scroll_context.py +60 -0
  99. visidata/features/select_equal_selected.py +11 -0
  100. visidata/features/setcol_fake.py +65 -0
  101. visidata/{slide.py → features/slide.py} +75 -21
  102. visidata/features/sparkline.py +48 -0
  103. visidata/features/status_source.py +20 -0
  104. visidata/{sysedit.py → features/sysedit.py} +2 -1
  105. visidata/features/sysopen_mailcap.py +46 -0
  106. visidata/features/term_extras.py +13 -0
  107. visidata/{transpose.py → features/transpose.py} +5 -4
  108. visidata/features/type_ipaddr.py +73 -0
  109. visidata/features/type_url.py +11 -0
  110. visidata/{unfurl.py → features/unfurl.py} +9 -9
  111. visidata/{window.py → features/window.py} +2 -2
  112. visidata/form.py +50 -21
  113. visidata/freqtbl.py +81 -33
  114. visidata/fuzzymatch.py +414 -0
  115. visidata/graph.py +105 -33
  116. visidata/guide.py +180 -0
  117. visidata/help.py +75 -44
  118. visidata/hint.py +39 -0
  119. visidata/indexsheet.py +109 -0
  120. visidata/input_history.py +55 -0
  121. visidata/interface.py +58 -0
  122. visidata/keys.py +17 -16
  123. visidata/loaders/__init__.py +9 -0
  124. visidata/loaders/_pandas.py +61 -21
  125. visidata/loaders/api_airtable.py +70 -0
  126. visidata/loaders/api_bitio.py +102 -0
  127. visidata/loaders/api_matrix.py +148 -0
  128. visidata/loaders/api_reddit.py +306 -0
  129. visidata/loaders/api_zulip.py +249 -0
  130. visidata/loaders/archive.py +41 -7
  131. visidata/loaders/arrow.py +7 -7
  132. visidata/loaders/conll.py +49 -0
  133. visidata/loaders/csv.py +25 -7
  134. visidata/loaders/eml.py +3 -4
  135. visidata/loaders/f5log.py +1204 -0
  136. visidata/loaders/fec.py +325 -0
  137. visidata/loaders/fixed_width.py +3 -5
  138. visidata/loaders/frictionless.py +3 -3
  139. visidata/loaders/geojson.py +8 -5
  140. visidata/loaders/google.py +48 -0
  141. visidata/loaders/graphviz.py +4 -4
  142. visidata/loaders/hdf5.py +4 -4
  143. visidata/loaders/html.py +48 -10
  144. visidata/loaders/http.py +84 -30
  145. visidata/loaders/imap.py +20 -10
  146. visidata/loaders/jrnl.py +52 -0
  147. visidata/loaders/json.py +83 -29
  148. visidata/loaders/jsonla.py +74 -0
  149. visidata/loaders/lsv.py +15 -11
  150. visidata/loaders/mailbox.py +40 -0
  151. visidata/loaders/markdown.py +1 -3
  152. visidata/loaders/mbtiles.py +4 -5
  153. visidata/loaders/mysql.py +11 -13
  154. visidata/loaders/npy.py +7 -7
  155. visidata/loaders/odf.py +4 -1
  156. visidata/loaders/orgmode.py +428 -0
  157. visidata/loaders/pandas_freqtbl.py +14 -20
  158. visidata/loaders/parquet.py +62 -6
  159. visidata/loaders/pcap.py +3 -3
  160. visidata/loaders/pdf.py +4 -3
  161. visidata/loaders/png.py +19 -13
  162. visidata/loaders/postgres.py +9 -8
  163. visidata/loaders/rec.py +7 -3
  164. visidata/loaders/s3.py +342 -0
  165. visidata/loaders/sas.py +5 -5
  166. visidata/loaders/scrape.py +186 -0
  167. visidata/loaders/shp.py +6 -5
  168. visidata/loaders/spss.py +5 -6
  169. visidata/loaders/sqlite.py +68 -28
  170. visidata/loaders/texttables.py +1 -1
  171. visidata/loaders/toml.py +60 -0
  172. visidata/loaders/tsv.py +61 -19
  173. visidata/loaders/ttf.py +19 -7
  174. visidata/loaders/unzip_http.py +6 -5
  175. visidata/loaders/usv.py +1 -1
  176. visidata/loaders/vcf.py +16 -16
  177. visidata/loaders/vds.py +10 -7
  178. visidata/loaders/vdx.py +30 -5
  179. visidata/loaders/xlsb.py +8 -1
  180. visidata/loaders/xlsx.py +145 -25
  181. visidata/loaders/xml.py +6 -3
  182. visidata/loaders/xword.py +4 -4
  183. visidata/loaders/yaml.py +15 -5
  184. visidata/macos.py +1 -1
  185. visidata/macros.py +130 -41
  186. visidata/main.py +119 -94
  187. visidata/mainloop.py +101 -154
  188. visidata/man/parse_options.py +2 -2
  189. visidata/man/vd.1 +302 -147
  190. visidata/man/vd.txt +291 -151
  191. visidata/memory.py +3 -3
  192. visidata/menu.py +104 -423
  193. visidata/metasheets.py +59 -141
  194. visidata/modify.py +79 -23
  195. visidata/motd.py +3 -3
  196. visidata/mouse.py +137 -0
  197. visidata/movement.py +43 -35
  198. visidata/optionssheet.py +99 -0
  199. visidata/path.py +131 -43
  200. visidata/pivot.py +74 -47
  201. visidata/plugins.py +65 -192
  202. visidata/pyobj.py +50 -201
  203. visidata/rename_col.py +20 -0
  204. visidata/save.py +42 -20
  205. visidata/search.py +54 -10
  206. visidata/selection.py +84 -5
  207. visidata/settings.py +162 -24
  208. visidata/sheets.py +229 -257
  209. visidata/shell.py +51 -21
  210. visidata/sidebar.py +162 -0
  211. visidata/sort.py +11 -4
  212. visidata/statusbar.py +113 -104
  213. visidata/stored_list.py +43 -0
  214. visidata/stored_prop.py +38 -0
  215. visidata/tests/conftest.py +3 -3
  216. visidata/tests/test_cliptext.py +39 -0
  217. visidata/tests/test_commands.py +62 -7
  218. visidata/tests/test_edittext.py +2 -2
  219. visidata/tests/test_features.py +17 -0
  220. visidata/tests/test_menu.py +14 -0
  221. visidata/tests/test_path.py +13 -4
  222. visidata/text_source.py +53 -0
  223. visidata/textsheet.py +10 -3
  224. visidata/theme.py +44 -0
  225. visidata/themes/__init__.py +0 -0
  226. visidata/themes/ascii8.py +84 -0
  227. visidata/themes/asciimono.py +84 -0
  228. visidata/themes/light.py +17 -0
  229. visidata/threads.py +87 -39
  230. visidata/tuiwin.py +22 -0
  231. visidata/type_currency.py +22 -3
  232. visidata/type_date.py +31 -9
  233. visidata/type_floatsi.py +5 -1
  234. visidata/undo.py +18 -6
  235. visidata/utils.py +106 -23
  236. visidata/vdobj.py +28 -17
  237. visidata/windows.py +10 -0
  238. visidata/wrappers.py +9 -3
  239. visidata-3.0.data/data/share/applications/visidata.desktop +7 -0
  240. {visidata-2.11.dev0.data → visidata-3.0.data}/data/share/man/man1/vd.1 +302 -147
  241. {visidata-2.11.dev0.data → visidata-3.0.data}/data/share/man/man1/visidata.1 +302 -147
  242. visidata-3.0.data/scripts/vd2to3.vdx +9 -0
  243. {visidata-2.11.dev0.dist-info → visidata-3.0.dist-info}/METADATA +13 -11
  244. visidata-3.0.dist-info/RECORD +257 -0
  245. {visidata-2.11.dev0.dist-info → visidata-3.0.dist-info}/WHEEL +1 -1
  246. {visidata-2.11.dev0.dist-info → visidata-3.0.dist-info}/entry_points.txt +0 -1
  247. visidata/layout.py +0 -44
  248. visidata/misc.py +0 -5
  249. visidata-2.11.dev0.dist-info/RECORD +0 -142
  250. /visidata/{repeat.py → features/repeat.py} +0 -0
  251. {visidata-2.11.dev0.data → visidata-3.0.data}/scripts/vd +0 -0
  252. {visidata-2.11.dev0.dist-info → visidata-3.0.dist-info}/LICENSE.gpl3 +0 -0
  253. {visidata-2.11.dev0.dist-info → visidata-3.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,186 @@
1
+ #!/usr/bin/env python3
2
+
3
+ __all__=[ 'SelectorColumn', 'soupstr' ]
4
+
5
+ import os.path
6
+ from urllib.parse import urljoin
7
+
8
+ import concurrent.futures
9
+ import functools
10
+
11
+ from visidata import vd, VisiData, TableSheet, vdtype, Column, AttrColumn, Progress, date
12
+
13
+
14
+ @VisiData.api
15
+ def soup(vd, s):
16
+ bs4 = vd.importExternal('bs4', 'beautifulsoup4')
17
+ from bs4 import BeautifulSoup
18
+ return BeautifulSoup(s, 'html.parser')
19
+
20
+
21
+ @VisiData.api
22
+ def open_scrape(vd, p):
23
+ bs4 = vd.importExternal('bs4', 'beautifulsoup4')
24
+
25
+ vd.enable_requests_cache()
26
+ if p.is_url():
27
+ return HtmlDocsSheet(p.base_stem, source=p, urls=[p.given])
28
+ else:
29
+ return HtmlElementsSheet(p.base_stem, source=p, elements=None)
30
+
31
+ VisiData.openhttp_scrape = VisiData.open_scrape
32
+
33
+ def node_name(node):
34
+ me = node.name
35
+ class_ = node.attrs.get("class")
36
+ if class_:
37
+ me += '.' + class_[0]
38
+ id_ = node.attrs.get("id")
39
+ if id_:
40
+ me += '#' + id_
41
+ return me
42
+
43
+ @functools.lru_cache(maxsize=None)
44
+ def calc_selector(node):
45
+ if not node.parent:
46
+ return ''
47
+
48
+ psel = calc_selector(node.parent)
49
+ oursel = node_name(node)
50
+ if not psel:
51
+ return oursel
52
+
53
+ root = list(node.parents)[-1]
54
+
55
+ combinedsel = psel+' '+oursel
56
+ if len(root.select(combinedsel)) == len(root.select(oursel)):
57
+ return oursel
58
+
59
+ return combinedsel
60
+
61
+
62
+ class HtmlAttrColumn(Column):
63
+ def calcValue(self, row):
64
+ return row.attrs.get(self.expr)
65
+
66
+
67
+ def prev_header(r):
68
+ hdrtags = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']
69
+ try:
70
+ i = hdrtags.index(r.name)
71
+ return r.find_previous(hdrtags[:i-1])
72
+ except Exception:
73
+ return r.find_previous(hdrtags)
74
+
75
+
76
+ # one row per element
77
+ class HtmlElementsSheet(TableSheet):
78
+ guide = '''# HTMLElements
79
+
80
+ This is a list of HTML elements from _{sheet.source}_ as parsed by `beautifulsoup4`.
81
+
82
+ Standard VisiData exploration techniques can be used to find relevant data, which will help determine the proper selector to use.
83
+
84
+ - `Enter` to dive into children of cursor element (or children of all selected rows with `g Enter`)
85
+ - `go` to batch open links in selected rows on new RequestsSheet, which will fetch each page
86
+ - `~` to use the `soupstr` type to join all the text elements
87
+ '''
88
+ # source=[element, ...]
89
+ rowtype='dom nodes' # rowdef soup.element
90
+ columns = [
91
+ Column('name', getter=lambda c,r: node_name(r)),
92
+ Column('selector', getter=lambda c,r: calc_selector(r), cache='async', width=0),
93
+ AttrColumn('string'),
94
+ Column('depth', cache=True, getter=lambda c,r: list(c.sheet.html_parents(r))),
95
+ Column('prev_header', getter=lambda c,r: prev_header(r), cache=True),
96
+ HtmlAttrColumn('href', expr='href'),
97
+ ]
98
+ def iterload(self):
99
+ for el in self.elements or [vd.soup(self.source.read_text())]:
100
+ for x in el.find_all():
101
+ if x.string:
102
+ yield x
103
+
104
+ def html_parents(self, row):
105
+ while row.parent and row.parent is not row:
106
+ yield row.parent
107
+ row = row.parent
108
+
109
+ @property
110
+ def rootSource(self):
111
+ return self.rootSheet.source
112
+
113
+ def openRows(self, rows):
114
+ realurls = [urljoin(self.rootSource.given, r.attrs.get('href')) for r in rows]
115
+ yield HtmlDocsSheet(self.name, 'scrape', source=self, urls=realurls)
116
+
117
+ def openRow(self, row):
118
+ 'opening a single row'
119
+ return HtmlElementsSheet('', source=self, elements=[row])
120
+
121
+
122
+ class DocsSelectorColumn(Column):
123
+ def calcValue(self, row):
124
+ return [x for x in row.soup.select(self.expr)]
125
+
126
+ class SelectorColumn(Column):
127
+ def calcValue(self, row):
128
+ return [x for x in row.select(self.expr)]
129
+
130
+
131
+ # urls=list of urls to scrape
132
+ class HtmlDocsSheet(TableSheet):
133
+ help='''# HtmlDocsSheet
134
+
135
+ - `Enter` to open the current request as list of HTMLElements
136
+ - `;` to add column of elements matching given css selector
137
+ - this is how to cross-tabulate data from multiple pages
138
+ '''
139
+ rowtype='requests' # rowdef: requests.Response
140
+ columns = [
141
+ AttrColumn('url'),
142
+ AttrColumn('status_code', type=int),
143
+ AttrColumn('from_cache'),
144
+ AttrColumn('fetched_at', 'created_at', type=date, width=0),
145
+ AttrColumn('expires', type=date),
146
+ AttrColumn('reason'),
147
+ AttrColumn('soup.title.string'),
148
+ ]
149
+ def iterload(self):
150
+ requests = vd.importExternal('requests')
151
+ self.colnames = {}
152
+ # with concurrent.futures.ThreadPoolExecutor(max_workers=8) as executor:
153
+ # yield from executor.map(requests.get, Progress(self.urls))
154
+ for url in Progress(self.urls):
155
+ yield requests.get(url)
156
+
157
+ def addRow(self, row, index=None):
158
+ super().addRow(row, index=index)
159
+ row.soup = vd.callNoExceptions(vd.soup, row.text)
160
+
161
+ def openRow(self, row):
162
+ return HtmlElementsSheet(row.url, source=self, elements=[row.soup])
163
+
164
+ def soupstr(coll):
165
+ return ' '.join(v.string for v in coll)
166
+
167
+ vdtype(soupstr, 's')
168
+
169
+ @TableSheet.api
170
+ def scrape_urls(sheet, col, rows):
171
+ return HtmlDocsSheet(sheet.name, "selected_urls", urls=[col.getTypedValue(r) for r in rows])
172
+
173
+ HtmlElementsSheet.addCommand('~', 'type-soupstr', 'cursorCol.type=soupstr', 'set type of current column to list of html elements')
174
+ HtmlElementsSheet.addCommand('go', 'open-rows', 'for vs in openRows(selectedRows): vd.push(vs)', 'open sheet for each selected element')
175
+ TableSheet.addCommand('gzo', 'scrape-cells', 'vd.push(scrape_urls(cursorCol, selectedRows))', 'open HTML Documents sheet from selected URLs')
176
+ HtmlDocsSheet.addCommand(';', 'addcol-selector', 'sel=input("css selector: ", type="selector"); addColumn(DocsSelectorColumn(sel, expr=sel, cache="async"))', 'add column derived from css selector of current column')
177
+ HtmlElementsSheet.addCommand(';', 'addcol-selector', 'sel=input("css selector: ", type="selector"); addColumn(SelectorColumn(sel, expr=sel, cache="async"))', 'add column derived from css selector of current column')
178
+
179
+ vd.addGlobals({
180
+ 'HtmlDocsSheet':SelectorColumn,
181
+ 'SelectorColumn':SelectorColumn,
182
+ 'DocsSelectorColumn':DocsSelectorColumn,
183
+ 'soupstr':soupstr
184
+ })
185
+
186
+ vd.addMenuItem('Data', '+Scrape', 'selected cells', 'scrape-cells')
visidata/loaders/shp.py CHANGED
@@ -1,13 +1,14 @@
1
1
  import json
2
+ from copy import copy
2
3
 
3
- from visidata import VisiData, vd, Sheet, Column, Progress, date, copy, InvertedCanvas, asyncthread
4
+ from visidata import VisiData, vd, Sheet, Column, Progress, date, InvertedCanvas, asyncthread
4
5
 
5
6
  # requires pyshp
6
7
 
7
8
 
8
9
  @VisiData.api
9
10
  def open_shp(vd, p):
10
- return ShapeSheet(p.name, source=p)
11
+ return ShapeSheet(p.base_stem, source=p)
11
12
 
12
13
  VisiData.open_dbf = VisiData.open_shp
13
14
 
@@ -33,7 +34,7 @@ class ShapeSheet(Sheet):
33
34
  Column('shapeType', width=0, getter=lambda col,row: row.shape.shapeType)
34
35
  ]
35
36
  def iterload(self):
36
- import shapefile
37
+ shapefile = vd.importExternal('shapefile', 'pyshp')
37
38
  self.sf = shapefile.Reader(str(self.source))
38
39
  self.reloadCols()
39
40
  for shaperec in Progress(self.sf.iterShapeRecords(), total=self.sf.numRecords):
@@ -97,10 +98,10 @@ def save_geojson(vd, p, vs):
97
98
  'type': 'FeatureCollection',
98
99
  'features': features,
99
100
  }
100
- with p.open_text(mode='w', encoding=vs.options.encoding) as fp:
101
+ with p.open(mode='w', encoding=vs.options.save_encoding) as fp:
101
102
  for chunk in json.JSONEncoder().iterencode(featcoll):
102
103
  fp.write(chunk)
103
104
 
104
105
  ShapeSheet.addCommand('.', 'plot-row', 'vd.push(ShapeMap(name+"_map", source=sheet, sourceRows=[cursorRow], textCol=cursorCol))', 'plot geospatial vector in current row')
105
106
  ShapeSheet.addCommand('g.', 'plot-rows', 'vd.push(ShapeMap(name+"_map", source=sheet, sourceRows=rows, textCol=cursorCol))', 'plot all geospatial vectors in current sheet')
106
- ShapeMap.addCommand('^S', 'save-sheet', 'vd.saveSheets(inputPath("save to: ", value=getDefaultSaveName(sheet)), sheet, confirm_overwrite=options.confirm_overwrite)', 'save current sheet to filename in format determined by extension (default .geojson)')
107
+ ShapeMap.addCommand('^S', 'save-sheet', 'vd.saveSheets(inputPath("save to: ", value=getDefaultSaveName(sheet)), sheet)', 'save current sheet to filename in format determined by extension (default .geojson)')
visidata/loaders/spss.py CHANGED
@@ -1,22 +1,21 @@
1
- from visidata import VisiData, Sheet, Progress, asyncthread, ColumnItem
1
+ from visidata import VisiData, Sheet, Progress, asyncthread, ItemColumn, vd
2
2
 
3
3
 
4
4
  @VisiData.api
5
5
  def open_spss(vd, p):
6
- return SpssSheet(p.name, source=p)
6
+ return SpssSheet(p.base_stem, source=p)
7
7
  VisiData.open_sav = VisiData.open_spss
8
8
 
9
9
 
10
10
  class SpssSheet(Sheet):
11
- @asyncthread
12
- def reload(self):
13
- import savReaderWriter
11
+ def loader(self):
12
+ savReaderWriter = vd.importExternal('savReaderWriter')
14
13
  self.rdr = savReaderWriter.SavReader(str(self.source))
15
14
  with self.rdr as reader:
16
15
  self.columns = []
17
16
  for i, vname in enumerate(reader.varNames):
18
17
  vtype = float if reader.varTypes[vname] == 0 else str
19
- self.addColumn(ColumnItem(vname.decode('utf-8'), i, type=vtype))
18
+ self.addColumn(ItemColumn(vname.decode('utf-8'), i, type=vtype))
20
19
 
21
20
  self.rows = []
22
21
  for r in Progress(reader, total=reader.shape.nrows):
@@ -1,6 +1,7 @@
1
+ from copy import copy
1
2
  import re
2
3
 
3
- from visidata import VisiData, vd, Sheet, options, Column, Progress, anytype, ColumnItem, asyncthread, TypedExceptionWrapper, TypedWrapper, IndexSheet, copy, clean_to_id, vlen
4
+ from visidata import VisiData, vd, Sheet, options, Column, Progress, anytype, ColumnItem, asyncthread, TypedExceptionWrapper, TypedWrapper, IndexSheet, vlen
4
5
  from visidata.type_date import date
5
6
 
6
7
  vd.option('sqlite_onconnect', '', 'sqlite statement to execute after opening a connection')
@@ -16,13 +17,21 @@ def requery(url, **kwargs):
16
17
  return urlunparse(url_parts)
17
18
 
18
19
 
20
+ @VisiData.api
21
+ def guess_sqlite(vd, p):
22
+ if p.open_bytes().read(16).startswith(b'SQLite format'):
23
+ return dict(filetype='sqlite')
24
+
25
+
19
26
  @VisiData.api
20
27
  def open_sqlite(vd, p):
21
- return SqliteIndexSheet(p.name, source=p)
28
+ if not p.is_local():
29
+ vd.fail('sqlite requires an uncompressed, local file')
30
+ return SqliteIndexSheet(p.base_stem, source=p)
22
31
 
23
32
  @VisiData.api
24
33
  def openurl_sqlite(vd, p, filetype=None):
25
- return SqliteIndexSheet(p.name, source=p)
34
+ return SqliteIndexSheet(p.base_stem, source=p)
26
35
 
27
36
  VisiData.open_sqlite3 = VisiData.open_sqlite
28
37
  VisiData.open_db = VisiData.open_sqlite
@@ -32,15 +41,14 @@ class SqliteSheet(Sheet):
32
41
  'Provide functionality for importing SQLite databases.'
33
42
  savesToSource = True
34
43
  defer = True
35
-
36
- def resolve(self):
37
- 'Resolve all the way back to the original source Path.'
38
- return self.source.resolve()
44
+ query = ''
45
+ tableName = ''
39
46
 
40
47
  def conn(self):
41
48
  import sqlite3
42
- pathname = str(self.resolve())
43
- url = pathname if '://' in pathname else f'file:{pathname}'
49
+ localpath = self.rootSheet().source
50
+
51
+ url = localpath if localpath.is_url() else f'file:{localpath.resolve()}'
44
52
  url = requery(url, **self.options.getall('sqlite_param_'))
45
53
 
46
54
  con = sqlite3.connect(url, uri=True, **self.options.getall('sqlite_connect_'))
@@ -49,13 +57,25 @@ class SqliteSheet(Sheet):
49
57
  con.execute(self.options.sqlite_onconnect)
50
58
  return con
51
59
 
60
+ def rawSql(self, q:str) -> 'SqliteSheet':
61
+ return SqliteSheet('query', source=self.source, query=q)
62
+
63
+ @property
64
+ def sidebar(self):
65
+ if self.query:
66
+ return '# SQL\n' + self.query
67
+ else:
68
+ return super().sidebar
69
+
52
70
  def execute(self, conn, sql, parms=None):
53
71
  parms = parms or []
54
72
  vd.debug(sql)
55
73
  return conn.execute(sql, parms)
56
74
 
57
- def iterload(self):
58
- import sqlite3
75
+ def iterload_table(self, tblname:str):
76
+ '''Generate all rows from `tblname` in database at self.source,
77
+ including type information from table_xinfo(), and getting each rowid
78
+ if available (for simpler updates).'''
59
79
 
60
80
  def parse_sqlite_type(t):
61
81
  m = re.match(r'(\w+)(\((\d+)(,(\d+))?\))?', t.upper())
@@ -70,7 +90,6 @@ class SqliteSheet(Sheet):
70
90
 
71
91
  self.rowidColumn = None
72
92
  with self.conn() as conn:
73
- tblname = self.tableName
74
93
  if not isinstance(self, SqliteIndexSheet):
75
94
  self.columns = []
76
95
  for r in self.execute(conn, 'PRAGMA TABLE_XINFO("%s")' % tblname):
@@ -92,6 +111,32 @@ class SqliteSheet(Sheet):
92
111
  r = self.execute(conn, 'SELECT NULL, * FROM "%s"' % tblname)
93
112
  yield from Progress(r, total=r.rowcount-1)
94
113
 
114
+ def iterload_query(self, query:str):
115
+ '''Generate rows from `query` to database at self.source,
116
+ including type information from table_xinfo(), and getting each rowid
117
+ if available (for simpler updates).'''
118
+
119
+ with self.conn() as conn:
120
+ self.columns = []
121
+ for c in type(self).columns:
122
+ self.addColumn(copy(c))
123
+
124
+ self.result = self.execute(conn, query, parms=getattr(self, 'parms', []))
125
+
126
+ for i, desc in enumerate(self.result.description):
127
+ self.addColumn(ColumnItem(desc[0], i))
128
+
129
+ for row in self.result:
130
+ yield row
131
+
132
+ def iterload(self):
133
+ if self.tableName:
134
+ yield from self.iterload_table(self.tableName)
135
+ elif self.query:
136
+ yield from self.iterload_query(self.query)
137
+ else:
138
+ vd.fail('no query or tablename to load')
139
+
95
140
  @asyncthread
96
141
  def putChanges(self):
97
142
  adds, mods, dels = self.getDeferredChanges()
@@ -192,24 +237,14 @@ class SqliteIndexSheet(SqliteSheet, IndexSheet):
192
237
  self.preloadHook()
193
238
  self.reload()
194
239
 
195
- class SqliteQuerySheet(SqliteSheet):
196
- def iterload(self):
197
- with self.conn() as conn:
198
- self.columns = []
199
- for c in type(self).columns:
200
- self.addColumn(copy(c))
201
- self.result = self.execute(conn, self.query, parms=getattr(self, 'parms', []))
202
- for i, desc in enumerate(self.result.description):
203
- self.addColumn(ColumnItem(desc[0], i))
204
-
205
- for row in self.result:
206
- yield row
207
-
208
240
 
209
241
 
210
242
  @VisiData.api
211
243
  def save_sqlite(vd, p, *vsheets):
212
244
  import sqlite3
245
+ import json
246
+ jsonenc = json.JSONEncoder() #1589: list/dict values as json
247
+
213
248
  conn = sqlite3.connect(str(p))
214
249
  conn.text_factory = lambda s, enc=vsheets[0].options.encoding: s.decode(enc)
215
250
  conn.row_factory = sqlite3.Row
@@ -231,7 +266,7 @@ def save_sqlite(vd, p, *vsheets):
231
266
  vd.sync()
232
267
 
233
268
  for vs in vsheets:
234
- tblname = clean_to_id(vs.name)
269
+ tblname = vd.cleanName(vs.name)
235
270
  sqlcols = []
236
271
  for col in vs.visibleCols:
237
272
  sqlcols.append('"%s" %s' % (col.name, sqltypes.get(col.type, 'TEXT')))
@@ -247,6 +282,8 @@ def save_sqlite(vd, p, *vsheets):
247
282
  v = options.safe_error
248
283
  else:
249
284
  v = None
285
+ elif isinstance(v, (list, tuple, dict)):
286
+ v = jsonenc.encode(v)
250
287
  elif not isinstance(v, (int, float, str)):
251
288
  v = col.getDisplayValue(r)
252
289
  sqlvals.append(v)
@@ -255,16 +292,19 @@ def save_sqlite(vd, p, *vsheets):
255
292
 
256
293
  conn.commit()
257
294
 
258
- vd.status("%s save finished" % p)
259
295
 
296
+ SqliteSheet.addCommand('', 'exec-sql', 'vd.push(rawSql(input("execute SQL: ", type="sql")))', 'execute raw SQL statement')
260
297
 
261
298
  SqliteIndexSheet.addCommand('a', 'add-table', 'fail("create a new table by saving a sheet to this database file")', 'stub; add table by saving a sheet to the db file instead')
262
299
  SqliteIndexSheet.bindkey('ga', 'add-table')
263
300
  SqliteSheet.options.header = 0
264
301
  VisiData.save_db = VisiData.save_sqlite
265
302
 
303
+ vd.addMenuItems('''
304
+ Data > execute SQL query > exec-sql
305
+ ''')
306
+
266
307
  vd.addGlobals({
267
308
  'SqliteIndexSheet': SqliteIndexSheet,
268
309
  'SqliteSheet': SqliteSheet,
269
- 'SqliteQuerySheet': SqliteQuerySheet
270
310
  })
@@ -7,7 +7,7 @@ try:
7
7
  def save_table(path, *sheets, fmt=fmt):
8
8
  import tabulate
9
9
 
10
- with path.open_text(mode='w', encoding=sheets[0].options.encoding) as fp:
10
+ with path.open(mode='w', encoding=sheets[0].options.save_encoding) as fp:
11
11
  for vs in sheets:
12
12
  fp.write(tabulate.tabulate(
13
13
  vs.itervals(*vs.visibleCols, format=True),
@@ -0,0 +1,60 @@
1
+ from visidata import (
2
+ ColumnItem,
3
+ PythonSheet,
4
+ VisiData,
5
+ asyncthread,
6
+ deduceType,
7
+ vd,
8
+ )
9
+
10
+
11
+ @VisiData.api
12
+ def open_toml(vd, p):
13
+ return TomlSheet(p.base_stem, source=p)
14
+
15
+
16
+ class TomlSheet(PythonSheet):
17
+ """A Sheet representing the top level of a loaded TOML file.
18
+
19
+ This is an intentionally minimal loader with cues taken from
20
+ VisiData built-in JSON and Python object sheet types.
21
+ """
22
+ guide = '''# Toml Sheet
23
+ This sheet represents the top level of {sheet.source.name}.{sheet.source.ext}.
24
+
25
+ Each cell within this sheet can contain dictionaries (representing TOML key:value pairs), lists (representing TOML arrays), or scalars.
26
+
27
+ Some helpful commands when working with cells of lists and dictionaries:
28
+
29
+ - `(` (`expand-col`) on a column with lists or dictionaries will "expand" the structures in the cells into new columns within the current sheet.
30
+ - `zEnter` on a cell with lists or dictionaries will "dive" into the current cell, expanding its structures into rows and columns in a separate sheet.
31
+ '''
32
+
33
+ rowtype = "values" # rowdef: dict values, possibly nested
34
+
35
+ def loader(self):
36
+ """Loading a TOML file produces a single dict. Use
37
+ its keys as column headings, and populate a single
38
+ row.
39
+ """
40
+ self.columns = []
41
+ self.rows = []
42
+
43
+ try:
44
+ # Python 3.11+
45
+ import tomllib
46
+ except ModuleNotFoundError:
47
+ # Python 3.10 and below
48
+ tomllib = vd.importExternal("tomli")
49
+
50
+ data = tomllib.loads(self.source.read_text())
51
+ for k, v in data.items():
52
+ self.addColumn(ColumnItem(k, type=deduceType(v)))
53
+ self.addRow(data)
54
+
55
+
56
+ vd.addGlobals(
57
+ {
58
+ "TomlSheet": TomlSheet,
59
+ }
60
+ )
visidata/loaders/tsv.py CHANGED
@@ -2,8 +2,10 @@ import os
2
2
  import contextlib
3
3
  import itertools
4
4
  import collections
5
+ import math
6
+ import time
5
7
 
6
- from visidata import vd, asyncthread, options, Progress, ColumnItem, SequenceSheet, Sheet, FileExistsError, getType, VisiData
8
+ from visidata import vd, asyncthread, options, Progress, ColumnItem, SequenceSheet, Sheet, VisiData
7
9
  from visidata import namedlist, filesize
8
10
 
9
11
  vd.option('delimiter', '\t', 'field delimiter to use for tsv/usv filetype', replay=True)
@@ -14,23 +16,55 @@ vd.option('tsv_safe_tab', '\u001f', 'replacement for tab character when saving t
14
16
 
15
17
  @VisiData.api
16
18
  def open_tsv(vd, p):
17
- return TsvSheet(p.name, source=p)
19
+ return TsvSheet(p.base_stem, source=p)
20
+
21
+
22
+ def adaptive_bufferer(fp, max_buffer_size=65536):
23
+ """Loading e.g. tsv files goes faster with a large buffer. But when the input stream
24
+ is slow (e.g. 1 byte/second) and the buffer size is large, it can take a long time until
25
+ the buffer is filled. Only when the buffer is filled (or the input stream is finished)
26
+ you can see the data visiualized in visidata. That's why we use an adaptive buffer.
27
+ For fast input streams, the buffer becomes large, for slow input streams, the buffer stays
28
+ small"""
29
+ buffer_size = 8
30
+ processed_buffer_size = 0
31
+ previous_start_time = time.time()
32
+ while True:
33
+ next_chunk = fp.read(max(buffer_size, 1))
34
+ if not next_chunk:
35
+ break
18
36
 
37
+ yield next_chunk
19
38
 
20
- def splitter(fp, delim='\n'):
21
- 'Generates one line/row/record at a time from fp, separated by delim'
39
+ processed_buffer_size += len(next_chunk)
22
40
 
23
- buf = ''
24
- while True:
25
- nextbuf = fp.read(65536)
26
- if not nextbuf:
27
- break
28
- buf += nextbuf
41
+ current_time = time.time()
42
+ current_delta = current_time - previous_start_time
43
+
44
+ if current_delta < 1:
45
+ # if it takes longer than one second to fill the buffer, double the size of the buffer
46
+ buffer_size = min(buffer_size * 2, max_buffer_size)
47
+ else:
48
+ # if it takes less than one second, increase the buffer size so it takes about
49
+ # 1 second to fill it
50
+ previous_start_time = current_time
51
+ buffer_size = math.ceil(min(processed_buffer_size / current_delta, max_buffer_size))
52
+ processed_buffer_size = 0
53
+
54
+ def splitter(stream, delim='\n'):
55
+ 'Generates one line/row/record at a time from stream, separated by delim'
56
+
57
+ buf = type(delim)()
58
+
59
+ for chunk in stream:
60
+ buf += chunk
29
61
 
30
62
  *rows, buf = buf.split(delim)
31
63
  yield from rows
32
64
 
33
- yield from buf.rstrip(delim).split(delim)
65
+ buf = buf.rstrip(delim) # trim empty trailing lines
66
+ if buf:
67
+ yield from buf.rstrip(delim).split(delim)
34
68
 
35
69
 
36
70
  # rowdef: list
@@ -42,8 +76,8 @@ class TsvSheet(SequenceSheet):
42
76
  delim = self.delimiter or self.options.delimiter
43
77
  rowdelim = self.row_delimiter or self.options.row_delimiter
44
78
 
45
- with self.source.open_text(encoding=self.options.encoding) as fp:
46
- for line in splitter(fp, rowdelim):
79
+ with self.open_text_source() as fp:
80
+ for line in splitter(adaptive_bufferer(fp), rowdelim):
47
81
  if not line:
48
82
  continue
49
83
 
@@ -63,7 +97,7 @@ def save_tsv(vd, p, vs, delimiter='', row_delimiter=''):
63
97
  rowsep = row_delimiter or vs.options.row_delimiter
64
98
  trdict = vs.safe_trdict()
65
99
 
66
- with p.open_text(mode='w', encoding=vs.options.encoding) as fp:
100
+ with p.open(mode='w', encoding=vs.options.save_encoding) as fp:
67
101
  colhdr = unitsep.join(col.name.translate(trdict) for col in vs.visibleCols) + rowsep
68
102
  fp.write(colhdr)
69
103
 
@@ -71,8 +105,6 @@ def save_tsv(vd, p, vs, delimiter='', row_delimiter=''):
71
105
  fp.write(unitsep.join(dispvals.values()))
72
106
  fp.write(rowsep)
73
107
 
74
- vd.status('%s save finished' % p)
75
-
76
108
 
77
109
  @Sheet.api
78
110
  def append_tsv_row(vs, row):
@@ -87,14 +119,24 @@ def append_tsv_row(vs, row):
87
119
  trdict = vs.safe_trdict()
88
120
  unitsep = options.delimiter
89
121
 
90
- with vs.source.open_text(mode='w', encoding=vs.options.encoding) as fp:
122
+ with vs.source.open(mode='w') as fp:
91
123
  colhdr = unitsep.join(col.name.translate(trdict) for col in vs.visibleCols) + vs.options.row_delimiter
92
124
  if colhdr.strip(): # is anything but whitespace
93
125
  fp.write(colhdr)
94
126
 
95
- with vs.source.open_text(mode='a', encoding=vs.options.encoding) as fp:
96
- fp.write('\t'.join(col.getDisplayValue(row) for col in vs.visibleCols) + '\n')
127
+ newrow = ''
128
+
129
+ contents = vs.source.open(mode='r').read()
130
+ if not contents.endswith('\n'): #1569
131
+ newrow += '\n'
132
+
133
+ newrow += '\t'.join(col.getDisplayValue(row) for col in vs.visibleCols) + '\n'
134
+
135
+ with vs.source.open(mode='a') as fp:
136
+ fp.write(newrow)
137
+
97
138
 
139
+ TsvSheet.options.regex_skip = '^#.*'
98
140
 
99
141
  vd.addGlobals({
100
142
  'TsvSheet': TsvSheet,