visidata 2.11.dev0__py3-none-any.whl → 3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (253) hide show
  1. visidata/__init__.py +72 -91
  2. visidata/_input.py +263 -44
  3. visidata/_open.py +84 -29
  4. visidata/_types.py +22 -4
  5. visidata/_urlcache.py +17 -4
  6. visidata/aggregators.py +65 -25
  7. visidata/apps/__init__.py +0 -0
  8. visidata/apps/vdsql/__about__.py +8 -0
  9. visidata/apps/vdsql/__init__.py +5 -0
  10. visidata/apps/vdsql/__main__.py +27 -0
  11. visidata/apps/vdsql/_ibis.py +748 -0
  12. visidata/apps/vdsql/bigquery.py +61 -0
  13. visidata/apps/vdsql/clickhouse.py +53 -0
  14. visidata/apps/vdsql/setup.py +40 -0
  15. visidata/apps/vdsql/snowflake.py +67 -0
  16. visidata/apps/vgit/__init__.py +13 -0
  17. visidata/apps/vgit/__main__.py +3 -0
  18. visidata/apps/vgit/abort.py +23 -0
  19. visidata/apps/vgit/blame.py +76 -0
  20. visidata/apps/vgit/branch.py +153 -0
  21. visidata/apps/vgit/config.py +95 -0
  22. visidata/apps/vgit/diff.py +169 -0
  23. visidata/apps/vgit/gitsheet.py +161 -0
  24. visidata/apps/vgit/grep.py +37 -0
  25. visidata/apps/vgit/log.py +81 -0
  26. visidata/apps/vgit/main.py +55 -0
  27. visidata/apps/vgit/remote.py +57 -0
  28. visidata/apps/vgit/repos.py +71 -0
  29. visidata/apps/vgit/setup.py +37 -0
  30. visidata/apps/vgit/stash.py +69 -0
  31. visidata/apps/vgit/status.py +204 -0
  32. visidata/apps/vgit/statusbar.py +34 -0
  33. visidata/basesheet.py +59 -50
  34. visidata/canvas.py +251 -99
  35. visidata/choose.py +15 -11
  36. visidata/clean_names.py +29 -0
  37. visidata/clipboard.py +84 -18
  38. visidata/cliptext.py +220 -46
  39. visidata/cmdlog.py +89 -114
  40. visidata/color.py +142 -56
  41. visidata/column.py +134 -131
  42. visidata/ddw/input.ddw +74 -79
  43. visidata/ddw/regex.ddw +57 -0
  44. visidata/ddwplay.py +33 -14
  45. visidata/deprecated.py +77 -3
  46. visidata/desktop/visidata.desktop +7 -0
  47. visidata/editor.py +12 -6
  48. visidata/errors.py +5 -1
  49. visidata/experimental/__init__.py +0 -0
  50. visidata/experimental/diff_sheet.py +29 -0
  51. visidata/experimental/digit_autoedit.py +6 -0
  52. visidata/experimental/gdrive.py +89 -0
  53. visidata/experimental/google.py +37 -0
  54. visidata/experimental/gsheets.py +79 -0
  55. visidata/experimental/live_search.py +37 -0
  56. visidata/experimental/liveupdate.py +45 -0
  57. visidata/experimental/mark.py +133 -0
  58. visidata/experimental/noahs_tapestry/__init__.py +1 -0
  59. visidata/experimental/noahs_tapestry/tapestry.py +147 -0
  60. visidata/experimental/rownum.py +73 -0
  61. visidata/experimental/slide_cells.py +26 -0
  62. visidata/expr.py +8 -4
  63. visidata/extensible.py +32 -6
  64. visidata/features/__init__.py +0 -0
  65. visidata/features/addcol_audiometadata.py +42 -0
  66. visidata/features/addcol_histogram.py +34 -0
  67. visidata/features/canvas_save_svg.py +69 -0
  68. visidata/features/change_precision.py +46 -0
  69. visidata/features/cmdpalette.py +163 -0
  70. visidata/features/colorbrewer.py +363 -0
  71. visidata/{colorsheet.py → features/colorsheet.py} +17 -16
  72. visidata/features/command_server.py +105 -0
  73. visidata/features/currency_to_usd.py +70 -0
  74. visidata/{customdate.py → features/customdate.py} +2 -0
  75. visidata/features/dedupe.py +132 -0
  76. visidata/{describe.py → features/describe.py} +17 -15
  77. visidata/features/errors_guide.py +26 -0
  78. visidata/features/expand_cols.py +202 -0
  79. visidata/{fill.py → features/fill.py} +4 -2
  80. visidata/{freeze.py → features/freeze.py} +11 -6
  81. visidata/features/graph_seaborn.py +79 -0
  82. visidata/features/helloworld.py +10 -0
  83. visidata/features/hint_types.py +17 -0
  84. visidata/{incr.py → features/incr.py} +5 -0
  85. visidata/{join.py → features/join.py} +107 -53
  86. visidata/features/known_cols.py +21 -0
  87. visidata/features/layout.py +62 -0
  88. visidata/{melt.py → features/melt.py} +33 -21
  89. visidata/features/normcol.py +118 -0
  90. visidata/features/open_config.py +7 -0
  91. visidata/features/open_syspaste.py +18 -0
  92. visidata/features/ping.py +157 -0
  93. visidata/features/procmgr.py +208 -0
  94. visidata/features/random_sample.py +6 -0
  95. visidata/{regex.py → features/regex.py} +47 -31
  96. visidata/features/reload_every.py +55 -0
  97. visidata/features/rename_col_cascade.py +30 -0
  98. visidata/features/scroll_context.py +60 -0
  99. visidata/features/select_equal_selected.py +11 -0
  100. visidata/features/setcol_fake.py +65 -0
  101. visidata/{slide.py → features/slide.py} +75 -21
  102. visidata/features/sparkline.py +48 -0
  103. visidata/features/status_source.py +20 -0
  104. visidata/{sysedit.py → features/sysedit.py} +2 -1
  105. visidata/features/sysopen_mailcap.py +46 -0
  106. visidata/features/term_extras.py +13 -0
  107. visidata/{transpose.py → features/transpose.py} +5 -4
  108. visidata/features/type_ipaddr.py +73 -0
  109. visidata/features/type_url.py +11 -0
  110. visidata/{unfurl.py → features/unfurl.py} +9 -9
  111. visidata/{window.py → features/window.py} +2 -2
  112. visidata/form.py +50 -21
  113. visidata/freqtbl.py +81 -33
  114. visidata/fuzzymatch.py +414 -0
  115. visidata/graph.py +105 -33
  116. visidata/guide.py +180 -0
  117. visidata/help.py +75 -44
  118. visidata/hint.py +39 -0
  119. visidata/indexsheet.py +109 -0
  120. visidata/input_history.py +55 -0
  121. visidata/interface.py +58 -0
  122. visidata/keys.py +17 -16
  123. visidata/loaders/__init__.py +9 -0
  124. visidata/loaders/_pandas.py +61 -21
  125. visidata/loaders/api_airtable.py +70 -0
  126. visidata/loaders/api_bitio.py +102 -0
  127. visidata/loaders/api_matrix.py +148 -0
  128. visidata/loaders/api_reddit.py +306 -0
  129. visidata/loaders/api_zulip.py +249 -0
  130. visidata/loaders/archive.py +41 -7
  131. visidata/loaders/arrow.py +7 -7
  132. visidata/loaders/conll.py +49 -0
  133. visidata/loaders/csv.py +25 -7
  134. visidata/loaders/eml.py +3 -4
  135. visidata/loaders/f5log.py +1204 -0
  136. visidata/loaders/fec.py +325 -0
  137. visidata/loaders/fixed_width.py +3 -5
  138. visidata/loaders/frictionless.py +3 -3
  139. visidata/loaders/geojson.py +8 -5
  140. visidata/loaders/google.py +48 -0
  141. visidata/loaders/graphviz.py +4 -4
  142. visidata/loaders/hdf5.py +4 -4
  143. visidata/loaders/html.py +48 -10
  144. visidata/loaders/http.py +84 -30
  145. visidata/loaders/imap.py +20 -10
  146. visidata/loaders/jrnl.py +52 -0
  147. visidata/loaders/json.py +83 -29
  148. visidata/loaders/jsonla.py +74 -0
  149. visidata/loaders/lsv.py +15 -11
  150. visidata/loaders/mailbox.py +40 -0
  151. visidata/loaders/markdown.py +1 -3
  152. visidata/loaders/mbtiles.py +4 -5
  153. visidata/loaders/mysql.py +11 -13
  154. visidata/loaders/npy.py +7 -7
  155. visidata/loaders/odf.py +4 -1
  156. visidata/loaders/orgmode.py +428 -0
  157. visidata/loaders/pandas_freqtbl.py +14 -20
  158. visidata/loaders/parquet.py +62 -6
  159. visidata/loaders/pcap.py +3 -3
  160. visidata/loaders/pdf.py +4 -3
  161. visidata/loaders/png.py +19 -13
  162. visidata/loaders/postgres.py +9 -8
  163. visidata/loaders/rec.py +7 -3
  164. visidata/loaders/s3.py +342 -0
  165. visidata/loaders/sas.py +5 -5
  166. visidata/loaders/scrape.py +186 -0
  167. visidata/loaders/shp.py +6 -5
  168. visidata/loaders/spss.py +5 -6
  169. visidata/loaders/sqlite.py +68 -28
  170. visidata/loaders/texttables.py +1 -1
  171. visidata/loaders/toml.py +60 -0
  172. visidata/loaders/tsv.py +61 -19
  173. visidata/loaders/ttf.py +19 -7
  174. visidata/loaders/unzip_http.py +6 -5
  175. visidata/loaders/usv.py +1 -1
  176. visidata/loaders/vcf.py +16 -16
  177. visidata/loaders/vds.py +10 -7
  178. visidata/loaders/vdx.py +30 -5
  179. visidata/loaders/xlsb.py +8 -1
  180. visidata/loaders/xlsx.py +145 -25
  181. visidata/loaders/xml.py +6 -3
  182. visidata/loaders/xword.py +4 -4
  183. visidata/loaders/yaml.py +15 -5
  184. visidata/macos.py +1 -1
  185. visidata/macros.py +130 -41
  186. visidata/main.py +119 -94
  187. visidata/mainloop.py +101 -154
  188. visidata/man/parse_options.py +2 -2
  189. visidata/man/vd.1 +302 -147
  190. visidata/man/vd.txt +291 -151
  191. visidata/memory.py +3 -3
  192. visidata/menu.py +104 -423
  193. visidata/metasheets.py +59 -141
  194. visidata/modify.py +79 -23
  195. visidata/motd.py +3 -3
  196. visidata/mouse.py +137 -0
  197. visidata/movement.py +43 -35
  198. visidata/optionssheet.py +99 -0
  199. visidata/path.py +131 -43
  200. visidata/pivot.py +74 -47
  201. visidata/plugins.py +65 -192
  202. visidata/pyobj.py +50 -201
  203. visidata/rename_col.py +20 -0
  204. visidata/save.py +42 -20
  205. visidata/search.py +54 -10
  206. visidata/selection.py +84 -5
  207. visidata/settings.py +162 -24
  208. visidata/sheets.py +229 -257
  209. visidata/shell.py +51 -21
  210. visidata/sidebar.py +162 -0
  211. visidata/sort.py +11 -4
  212. visidata/statusbar.py +113 -104
  213. visidata/stored_list.py +43 -0
  214. visidata/stored_prop.py +38 -0
  215. visidata/tests/conftest.py +3 -3
  216. visidata/tests/test_cliptext.py +39 -0
  217. visidata/tests/test_commands.py +62 -7
  218. visidata/tests/test_edittext.py +2 -2
  219. visidata/tests/test_features.py +17 -0
  220. visidata/tests/test_menu.py +14 -0
  221. visidata/tests/test_path.py +13 -4
  222. visidata/text_source.py +53 -0
  223. visidata/textsheet.py +10 -3
  224. visidata/theme.py +44 -0
  225. visidata/themes/__init__.py +0 -0
  226. visidata/themes/ascii8.py +84 -0
  227. visidata/themes/asciimono.py +84 -0
  228. visidata/themes/light.py +17 -0
  229. visidata/threads.py +87 -39
  230. visidata/tuiwin.py +22 -0
  231. visidata/type_currency.py +22 -3
  232. visidata/type_date.py +31 -9
  233. visidata/type_floatsi.py +5 -1
  234. visidata/undo.py +18 -6
  235. visidata/utils.py +106 -23
  236. visidata/vdobj.py +28 -17
  237. visidata/windows.py +10 -0
  238. visidata/wrappers.py +9 -3
  239. visidata-3.0.data/data/share/applications/visidata.desktop +7 -0
  240. {visidata-2.11.dev0.data → visidata-3.0.data}/data/share/man/man1/vd.1 +302 -147
  241. {visidata-2.11.dev0.data → visidata-3.0.data}/data/share/man/man1/visidata.1 +302 -147
  242. visidata-3.0.data/scripts/vd2to3.vdx +9 -0
  243. {visidata-2.11.dev0.dist-info → visidata-3.0.dist-info}/METADATA +13 -11
  244. visidata-3.0.dist-info/RECORD +257 -0
  245. {visidata-2.11.dev0.dist-info → visidata-3.0.dist-info}/WHEEL +1 -1
  246. {visidata-2.11.dev0.dist-info → visidata-3.0.dist-info}/entry_points.txt +0 -1
  247. visidata/layout.py +0 -44
  248. visidata/misc.py +0 -5
  249. visidata-2.11.dev0.dist-info/RECORD +0 -142
  250. /visidata/{repeat.py → features/repeat.py} +0 -0
  251. {visidata-2.11.dev0.data → visidata-3.0.data}/scripts/vd +0 -0
  252. {visidata-2.11.dev0.dist-info → visidata-3.0.dist-info}/LICENSE.gpl3 +0 -0
  253. {visidata-2.11.dev0.dist-info → visidata-3.0.dist-info}/top_level.txt +0 -0
@@ -1,28 +1,84 @@
1
- from visidata import VisiData, Sheet, Column
1
+ from visidata import Sheet, VisiData, TypedWrapper, anytype, date, vlen, Column, vd
2
+ from collections import defaultdict
2
3
 
3
4
 
4
5
  @VisiData.api
5
6
  def open_parquet(vd, p):
6
- return ParquetSheet(p.name, source=p)
7
+ return ParquetSheet(p.base_stem, source=p)
8
+
7
9
 
8
10
  class ParquetColumn(Column):
9
11
  def calcValue(self, row):
10
- return self.source[row['__rownum__']].as_py()
12
+ val = self.source[row["__rownum__"]]
13
+ if val.type == 'large_string':
14
+ return memoryview(val.as_buffer())[:2**20].tobytes().decode('utf-8')
15
+ else:
16
+ return val.as_py()
11
17
 
12
18
 
13
19
  class ParquetSheet(Sheet):
14
20
  # rowdef: {'__rownum__':int, parquet_col:overridden_value, ...}
15
21
  def iterload(self):
16
- import pyarrow.parquet as pq
22
+ pa = vd.importExternal("pyarrow", "pyarrow")
23
+ pq = vd.importExternal("pyarrow.parquet", "pyarrow")
17
24
  from visidata.loaders.arrow import arrow_to_vdtype
18
25
 
19
- self.tbl = pq.read_table(self.source)
26
+ if self.source.is_dir():
27
+ self.tbl = pq.read_table(str(self.source))
28
+ else:
29
+ with self.source.open('rb') as f:
30
+ self.tbl = pq.read_table(f)
31
+
20
32
  self.columns = []
21
33
  for colname, col in zip(self.tbl.column_names, self.tbl.columns):
22
34
  c = ParquetColumn(colname,
23
35
  type=arrow_to_vdtype(col.type),
24
- source=col)
36
+ source=col,
37
+ cache=(col.type.id == pa.lib.Type_LARGE_STRING))
25
38
  self.addColumn(c)
26
39
 
27
40
  for i in range(self.tbl.num_rows):
28
41
  yield dict(__rownum__=i)
42
+
43
+
44
+ @VisiData.api
45
+ def save_parquet(vd, p, sheet):
46
+ pa = vd.importExternal("pyarrow")
47
+ pq = vd.importExternal("pyarrow.parquet", "pyarrow")
48
+
49
+ typemap = {
50
+ anytype: pa.string(),
51
+ int: pa.int64(),
52
+ vlen: pa.int64(),
53
+ float: pa.float64(),
54
+ str: pa.string(),
55
+ date: pa.date64(),
56
+ # list: pa.array(),
57
+ }
58
+
59
+ for t in vd.numericTypes:
60
+ if t not in typemap:
61
+ typemap[t] = pa.float64()
62
+
63
+ databycol = defaultdict(list) # col -> [values]
64
+
65
+ for typedvals in sheet.iterdispvals(format=False):
66
+ for col, val in typedvals.items():
67
+ if isinstance(val, TypedWrapper):
68
+ val = None
69
+
70
+ databycol[col].append(val)
71
+
72
+ data = [
73
+ pa.array(vals, type=typemap.get(col.type, pa.string()))
74
+ for col, vals in databycol.items()
75
+ ]
76
+
77
+ schema = pa.schema(
78
+ [(c.name, typemap.get(c.type, pa.string())) for c in sheet.visibleCols]
79
+ )
80
+ with p.open_bytes(mode="w") as outf:
81
+ with pq.ParquetWriter(outf, schema) as writer:
82
+ writer.write_batch(
83
+ pa.record_batch(data, names=[c.name for c in sheet.visibleCols])
84
+ )
visidata/loaders/pcap.py CHANGED
@@ -17,7 +17,7 @@ services = {} # [('tcp', 25)] -> 'smtp'
17
17
 
18
18
  @VisiData.api
19
19
  def open_pcap(vd, p):
20
- return PcapSheet(p.name, source=p)
20
+ return PcapSheet(p.base_stem, source=p)
21
21
 
22
22
  open_cap = open_pcap
23
23
  open_pcapng = open_pcap
@@ -83,8 +83,8 @@ def init_pcap():
83
83
  return
84
84
 
85
85
  global dpkt, dnslib
86
- import dpkt
87
- import dnslib
86
+ dpkt = vd.importExternal('dpkt')
87
+ dnslib = vd.importExternal('dnslib')
88
88
 
89
89
  load_consts(protocols['ethernet'], dpkt.ethernet, 'ETH_TYPE_')
90
90
  load_consts(protocols['ip'], dpkt.ip, 'IP_PROTO_')
visidata/loaders/pdf.py CHANGED
@@ -8,8 +8,8 @@ vd.option('pdf_tables', False, 'parse PDF for tables instead of pages of text',
8
8
  @VisiData.api
9
9
  def open_pdf(vd, p):
10
10
  if vd.options.pdf_tables:
11
- return TabulaSheet(p.name, source=p)
12
- return PdfMinerSheet(p.name, source=p)
11
+ return TabulaSheet(p.base_stem, source=p)
12
+ return PdfMinerSheet(p.base_stem, source=p)
13
13
 
14
14
 
15
15
  class PdfMinerSheet(TableSheet):
@@ -20,6 +20,7 @@ class PdfMinerSheet(TableSheet):
20
20
  ColumnItem('contents', 2),
21
21
  ]
22
22
  def iterload(self):
23
+ vd.importExternal('pdfminer.high_level', 'pdfminer.six')
23
24
  import pdfminer.high_level
24
25
  from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
25
26
  from pdfminer.converter import TextConverter, PDFPageAggregator
@@ -38,6 +39,6 @@ class PdfMinerSheet(TableSheet):
38
39
 
39
40
  class TabulaSheet(IndexSheet):
40
41
  def iterload(self):
41
- import tabula
42
+ tabula = vd.importExternal('tabula')
42
43
  for i, t in enumerate(tabula.read_pdf(self.source, pages='all', multiple_tables=True)):
43
44
  yield PandasSheet(self.source.name, i, source=t)
visidata/loaders/png.py CHANGED
@@ -1,32 +1,24 @@
1
1
  import functools
2
2
 
3
- from visidata import VisiData, Sheet, Column, Progress, colors, ColumnItem, Canvas, asyncthread
3
+ from visidata import VisiData, Sheet, Column, Progress, colors, ColumnItem, Canvas, asyncthread, vd, rgb_to_attr
4
4
 
5
5
 
6
6
  @VisiData.api
7
7
  def open_png(vd, p):
8
- return PNGSheet(p.name, source=p)
8
+ return PNGSheet(p.base_stem, source=p)
9
9
 
10
- @functools.lru_cache(256)
11
- def rgb_to_attr(r,g,b,a):
12
- if a == 0: return 0
13
- if r > g and r > b: return colors['red']
14
- if g > r and g > b: return colors['green']
15
- if b > r and b > g: return colors['blue']
16
- if a == 255: return colors['white']
17
- return 0
18
10
 
19
11
  class PNGSheet(Sheet):
20
12
  rowtype = 'pixels' # rowdef: list(x, y, r, g, b, a)
21
13
  columns = [ColumnItem(name, i, type=int) for i, name in enumerate('x y R G B A'.split())] + [
22
- Column('attr', type=int, getter=lambda col,row: rgb_to_attr(*row[2:]))
14
+ Column('attr', getter=lambda col,row: rgb_to_attr(*row[2:]))
23
15
  ]
24
16
  nKeys = 2
25
17
  def newRow(self):
26
18
  return list((None, None, 0, 0, 0, 0))
27
19
 
28
20
  def iterload(self):
29
- import png
21
+ png = vd.importExternal('png', 'pypng')
30
22
  self.png = png.Reader(bytes=self.source.read_bytes())
31
23
  self.width, self.height, pixels, md = self.png.asRGBA()
32
24
  for y, row in enumerate(pixels):
@@ -78,7 +70,7 @@ def save_png(vd, p, vs):
78
70
 
79
71
  vd.status('saving %sx%s' % (vs.width, vs.height))
80
72
 
81
- import png
73
+ vd.importExternal('png', 'pypng')
82
74
  img = png.from_array(pixels, mode='RGBA')
83
75
  with open(p, 'wb') as fp:
84
76
  img.write(fp)
@@ -86,4 +78,18 @@ def save_png(vd, p, vs):
86
78
  vd.status('saved')
87
79
 
88
80
 
81
+ def blockchar(i:int):
82
+ '''1 8 into 1 2
83
+ 2 16
84
+ 4 32 4 8
85
+ 64 128
86
+ '''
87
+ UL = bool(i & 1 or i & 2)
88
+ UR = bool(i & 8 or i & 16)
89
+ LL = bool(i & 4 or i & 64)
90
+ LR = bool(i & 32 or i & 128)
91
+ return ' ▘▝▀▖▌▞▛▗▚▐▜▄▙▟█'[UL*1+UR*2+LL*4+LR*8]
92
+
93
+
94
+ PNGDrawing.options.disp_canvas_charset = ''.join(blockchar(i) for i in range(256))
89
95
  PNGSheet.addCommand('.', 'plot-sheet', 'vd.push(PNGDrawing(name+"_plot", source=sheet, sourceRows=rows))', 'plot this png')
@@ -1,13 +1,14 @@
1
1
  import random
2
+ from urllib.parse import urlparse
2
3
 
3
- from visidata import VisiData, vd, Sheet, options, anytype, urlparse, asyncthread, ColumnItem
4
+ from visidata import VisiData, vd, Sheet, options, anytype, asyncthread, ColumnItem
4
5
 
5
6
  __all__ = ['openurl_postgres', 'openurl_postgresql', 'openurl_rds', 'PgTable', 'PgTablesSheet']
6
7
 
7
8
  vd.option('postgres_schema', 'public', 'The desired schema for the Postgres database')
8
9
 
9
10
  def codeToType(type_code, colname):
10
- import psycopg2
11
+ psycopg2 = vd.importExternal('psycopg2', 'psycopg2-binary')
11
12
  try:
12
13
  tname = psycopg2._psycopg.string_types[type_code].name
13
14
  if 'INTEGER' in tname:
@@ -21,8 +22,8 @@ def codeToType(type_code, colname):
21
22
 
22
23
  @VisiData.api
23
24
  def openurl_rds(vd, url, filetype=None):
24
- import boto3
25
- import psycopg2
25
+ boto3 = vd.importExternal('boto3')
26
+ psycopg2 = vd.importExternal('psycopg2', 'psycopg2-binary')
26
27
 
27
28
  rds = boto3.client('rds')
28
29
  url = urlparse(url.given)
@@ -42,7 +43,7 @@ def openurl_rds(vd, url, filetype=None):
42
43
 
43
44
  @VisiData.api
44
45
  def openurl_postgres(vd, url, filetype=None):
45
- import psycopg2
46
+ psycopg2 = vd.importExternal('psycopg2', 'psycopg2-binary')
46
47
 
47
48
  url = urlparse(url.given)
48
49
  dbname = url.path[1:]
@@ -87,7 +88,7 @@ def postgresGetColumns(vd, cur):
87
88
  class PgTablesSheet(Sheet):
88
89
  rowtype = 'tables'
89
90
 
90
- def reload(self):
91
+ def loader(self):
91
92
  schema = options.postgres_schema
92
93
  qstr = f'''
93
94
  SELECT relname table_name, column_count.ncols, reltuples::bigint est_nrows
@@ -122,9 +123,9 @@ class PgTable(Sheet):
122
123
  @asyncthread
123
124
  def reload(self):
124
125
  if self.options.postgres_schema:
125
- source = f"{self.options.postgres_schema}.{self.source}"
126
+ source = f'"{self.options.postgres_schema}"."{self.source}"'
126
127
  else:
127
- source = self.source
128
+ source = f'"{self.source}"'
128
129
  with self.sql.cur(f"SELECT * FROM {source}") as cur:
129
130
  self.rows = []
130
131
  r = cur.fetchone()
visidata/loaders/rec.py CHANGED
@@ -5,7 +5,7 @@ from visidata import VisiData, vd, Progress, TableSheet, IndexSheet, ItemColumn,
5
5
 
6
6
  @VisiData.api
7
7
  def open_rec(vd, p):
8
- return RecIndexSheet(p.name, source=p)
8
+ return RecIndexSheet(p.base_stem, source=p)
9
9
 
10
10
  def decode_multiline(line, fp):
11
11
  'Parse *line* and lookahead into *fp* as iterator for continuing lines. Return (multiline, next_line) where *multiline* can contain newlines and *next_line is the line after the combined *multiline*. Handle "\\" at end and "+" at beginning of lines. *next_line* will be None iff iterator is exhausted.'
@@ -49,6 +49,7 @@ class RecIndexSheet(IndexSheet):
49
49
 
50
50
  fp = iter(self.source)
51
51
  while next_line is not None:
52
+ try:
52
53
  line, next_line = decode_multiline(next_line, fp)
53
54
  line = line.lstrip()
54
55
 
@@ -62,6 +63,7 @@ class RecIndexSheet(IndexSheet):
62
63
 
63
64
  if not sheet or (newRecord and line[0] == '%'):
64
65
  sheet = RecSheet('', columns=[], rows=[], source=self, comments=comments)
66
+ sheet.columns = []
65
67
  comments = []
66
68
  yield sheet
67
69
  newRecord = False
@@ -81,7 +83,7 @@ class RecIndexSheet(IndexSheet):
81
83
  if colname not in sheet.colnames:
82
84
  sheet.addColumn(ItemColumn(colname, keycol=i+1))
83
85
  elif desc in ['sort']:
84
- sheet.orderBy([sheet.column(colname) for colname in rest.split()])
86
+ sheet._ordering = [(colname, False) for colname in rest.split()]
85
87
  elif desc in ['type', 'typedef']:
86
88
  pass
87
89
  elif desc in ['auto']: # autoincrement columns should be present already
@@ -112,6 +114,8 @@ class RecIndexSheet(IndexSheet):
112
114
  row[name].append(rest)
113
115
  else:
114
116
  row[name] = rest
117
+ except Exception as e:
118
+ vd.exceptionCaught(e)
115
119
 
116
120
  for sheet in Progress(self.rows):
117
121
  sheet.sort()
@@ -119,7 +123,7 @@ class RecIndexSheet(IndexSheet):
119
123
 
120
124
  @VisiData.api
121
125
  def save_rec(vd, p, *vsheets):
122
- with p.open_text(mode='w') as fp:
126
+ with p.open(mode='w') as fp:
123
127
  for vs in vsheets:
124
128
  comments = getattr(vs, 'comments', [])
125
129
  if comments:
visidata/loaders/s3.py ADDED
@@ -0,0 +1,342 @@
1
+ """Allow VisiData to work directly with Amazon S3 paths.
2
+
3
+ Functionality is more limited than local paths, but supports:
4
+
5
+ * Navigating among directories (S3 prefixes)
6
+ * Opening supported filetypes, including compressed files
7
+ * Versioned buckets
8
+ """
9
+
10
+ import textwrap
11
+ from visidata import (
12
+ ENTER,
13
+ Column,
14
+ ItemColumn,
15
+ Path,
16
+ Sheet,
17
+ VisiData,
18
+ asyncthread,
19
+ date,
20
+ vd,
21
+ )
22
+
23
+ vd.option(
24
+ "s3_endpoint",
25
+ "",
26
+ "alternate S3 endpoint, used for local testing or alternative S3-compatible services",
27
+ replay=True,
28
+ )
29
+ vd.option("s3_glob", True, "enable glob-matching for S3 paths", replay=True)
30
+ vd.option(
31
+ "s3_version_aware",
32
+ False,
33
+ "show all object versions in a versioned bucket",
34
+ replay=True,
35
+ )
36
+
37
+
38
+ class S3Path(Path):
39
+ """A Path-like object representing an S3 file (object) or directory (prefix)."""
40
+
41
+ _fs = None
42
+
43
+ def __init__(self, path, version_aware=None, version_id=None):
44
+ super().__init__(path)
45
+ self.given = path
46
+ self.version_aware = version_aware or vd.options.s3_version_aware
47
+ self.version_id = self.version_aware and version_id or None
48
+
49
+ @property
50
+ def fs(self):
51
+ if self._fs is None:
52
+ s3fs_core = vd.importExternal("s3fs.core", "s3fs")
53
+ self._fs = s3fs_core.S3FileSystem(
54
+ client_kwargs={"endpoint_url": vd.options.s3_endpoint or None},
55
+ version_aware=self.version_aware,
56
+ )
57
+
58
+ return self._fs
59
+
60
+ @fs.setter
61
+ def fs(self, val):
62
+ self._fs = val
63
+
64
+ def open(self, mode='r', **kwargs):
65
+ """Open the current S3 path, decompressing along the way if needed."""
66
+
67
+ fp = self.fs.open(self.given, mode="rb" if self.compression else mode, version_id=self.version_id)
68
+
69
+ # Workaround for https://github.com/ajkerrigan/visidata-plugins/issues/12
70
+ if hasattr(fp, "cache") and fp.cache.size != fp.size:
71
+ vd.debug(
72
+ f"updating cache size from {fp.cache.size} to {fp.size} to match object size"
73
+ )
74
+ fp.cache.size = fp.size
75
+
76
+ if self.compression == "gz":
77
+ import gzip
78
+
79
+ return gzip.open(fp, mode, **kwargs)
80
+
81
+ if self.compression == "bz2":
82
+ import bz2
83
+
84
+ return bz2.open(fp, mode, **kwargs)
85
+
86
+ if self.compression == "xz":
87
+ import lzma
88
+
89
+ return lzma.open(fp, mode, **kwargs)
90
+
91
+ return fp
92
+
93
+
94
+ class S3DirSheet(Sheet):
95
+ """Display a listing of files and directories (objects and prefixes) in an S3 path.
96
+
97
+ Allow single or multiple entries to be opened in separate sheets.
98
+ """
99
+
100
+ columns = [
101
+ Column("name", getter=lambda col, row: col.sheet.object_display_name(row)),
102
+ ItemColumn("type"),
103
+ ItemColumn("size", type=int),
104
+ ItemColumn("modtime", "LastModified", type=date),
105
+ ItemColumn("latest", "IsLatest", type=bool),
106
+ ItemColumn("version_id", "VersionId", type=str, width=0),
107
+ ]
108
+
109
+ def __init__(self, name, source, version_aware=None):
110
+ import re
111
+
112
+ super().__init__(name=name, source=source)
113
+ self.rowtype = "files"
114
+ self.nKeys = 1
115
+ self.use_glob_matching = vd.options.s3_glob and re.search(
116
+ r"[*?\[\]]", self.source.given
117
+ )
118
+ self.version_aware = (
119
+ vd.options.s3_version_aware if version_aware is None else version_aware
120
+ )
121
+ self.fs = source.fs
122
+
123
+ def object_display_name(self, row):
124
+ """Provide a friendly display name for an S3 path.
125
+
126
+ When listing the contents of a single S3 prefix, the name can chop off
127
+ prefix bits to imitate a directory browser. When glob matching,
128
+ include the full key name for each entry.
129
+ """
130
+ return (
131
+ row.get("name")
132
+ if self.use_glob_matching
133
+ else row.get("name").rpartition("/")[2]
134
+ )
135
+
136
+ def iterload(self):
137
+ """Delegate to the underlying filesystem to fetch S3 entries."""
138
+ list_func = self.fs.glob if self.use_glob_matching else self.fs.ls
139
+
140
+ if not (
141
+ self.use_glob_matching
142
+ or self.fs.exists(self.source.given)
143
+ or self.fs.isdir(self.source.given)
144
+ ):
145
+ vd.fail(f"unable to open S3 path: {self.source.given}")
146
+
147
+ if self.version_aware:
148
+ self.column("latest").hide(False)
149
+ else:
150
+ self.column("latest").hide(True)
151
+
152
+ for key in list_func(str(self.source)):
153
+ if self.version_aware and self.fs.isfile(key):
154
+ yield from (
155
+ {**obj_version, "name": key, "type": "file"}
156
+ for obj_version in self.fs.object_version_info(key)
157
+ if key.partition("/")[2] == obj_version["Key"]
158
+ )
159
+ else:
160
+ yield self.fs.stat(key)
161
+
162
+ @asyncthread
163
+ def download(self, rows, savepath):
164
+ """Download files and directories to a local path.
165
+
166
+ Recurse through through subdirectories.
167
+ """
168
+ remote_files = [row["name"] for row in rows]
169
+ self.fs.download(remote_files, str(savepath), recursive=True)
170
+
171
+ def open_rows(self, rows):
172
+ """Open new sheets for the target rows."""
173
+ return (
174
+ vd.openSource(
175
+ S3Path(
176
+ "s3://{}".format(row["name"]),
177
+ version_aware=self.version_aware,
178
+ version_id=row.get("VersionId"),
179
+ )
180
+ )
181
+ for row in rows
182
+ )
183
+
184
+ def join_rows(self, rows):
185
+ """Open new sheets for the target rows and concatenate their contents."""
186
+ sheets = list(self.open_rows(rows))
187
+ for sheet in vd.Progress(sheets):
188
+ sheet.reload()
189
+
190
+ # Wait for all sheets to fully load before joining them.
191
+ # 'append' is the only join type that makes sense here,
192
+ # since we're joining freshly opened sheets with no key
193
+ # columns.
194
+ vd.sync()
195
+ return sheets[0].openJoin(sheets[1:], jointype="append")
196
+
197
+ def refresh_path(self, path=None):
198
+ """Clear the s3fs cache for the given path and reload.
199
+
200
+ By default, clear the entire cache.
201
+ """
202
+ self.fs.invalidate_cache(path)
203
+ self.reload()
204
+
205
+ def toggle_versioning(self):
206
+ """Enable or disable support for S3 versioning."""
207
+ self.version_aware = not self.version_aware
208
+ self.fs.version_aware = self.version_aware
209
+ vd.status(f's3 versioning {"enabled" if self.version_aware else "disabled"}')
210
+ if self.currentThreads:
211
+ vd.debug("cancelling threads before reloading")
212
+ vd.cancelThread(*self.currentThreads)
213
+ self.reload()
214
+
215
+
216
+ @VisiData.api
217
+ def openurl_s3(vd, p, filetype):
218
+ """Open a sheet for an S3 path.
219
+
220
+ S3 directories (prefixes) require special handling, but files (objects)
221
+ can use standard VisiData "open" functions.
222
+ """
223
+
224
+ # Non-obvious behavior here: For the default case, we don't want to send
225
+ # a custom endpoint to s3fs. However, using None as a default trips up
226
+ # VisiData's type detection for the endpoint option. So we use an empty
227
+ # string as the default instead, and convert back to None here.
228
+ endpoint = vd.options.s3_endpoint or None
229
+
230
+ p = S3Path(
231
+ str(p.given),
232
+ version_aware=getattr(p, "version_aware", vd.options.s3_version_aware),
233
+ version_id=getattr(p, "version_id", None),
234
+ )
235
+
236
+ p.fs.version_aware = p.version_aware
237
+ if p.fs.client_kwargs.get("endpoint_url", "") != endpoint:
238
+ p.fs.client_kwargs = {"endpoint_url": endpoint}
239
+ p.fs.connect()
240
+
241
+ if not p.fs.isfile(str(p.given)):
242
+ return S3DirSheet(p.base_stem, source=p, version_aware=p.version_aware)
243
+
244
+ if not filetype:
245
+ filetype = p.ext or "txt"
246
+
247
+ openfunc = getattr(vd, f"open_{filetype.lower()}")
248
+ if not openfunc:
249
+ vd.warning(f"no loader found for {filetype} files, falling back to txt")
250
+ filetype = "txt"
251
+ openfunc = vd.open_txt
252
+
253
+ assert callable(openfunc), f"no function/method available to open {p.given}"
254
+ vs = openfunc(p)
255
+ vd.status(
256
+ f'opening {p.given} as {filetype} (version id: {p.version_id or "latest"})'
257
+ )
258
+ return vs
259
+
260
+
261
+ S3DirSheet.addCommand(
262
+ ENTER,
263
+ "s3-open-row",
264
+ "vd.push(next(sheet.open_rows([cursorRow])))",
265
+ "open the current S3 entry",
266
+ )
267
+ S3DirSheet.addCommand(
268
+ "g" + ENTER,
269
+ "s3-open-rows",
270
+ "for vs in sheet.open_rows(selectedRows): vd.push(vs)",
271
+ "open all selected S3 entries",
272
+ )
273
+ S3DirSheet.addCommand(
274
+ "z^R",
275
+ "s3-refresh-sheet",
276
+ "sheet.refresh_path(str(sheet.source))",
277
+ "clear the s3fs cache for this path, then reload",
278
+ )
279
+ S3DirSheet.addCommand(
280
+ "gz^R",
281
+ "s3-refresh-sheet-all",
282
+ "sheet.refresh_path()",
283
+ "clear the entire s3fs cache, then reload",
284
+ )
285
+ S3DirSheet.addCommand(
286
+ "^V",
287
+ "s3-toggle-versioning",
288
+ "sheet.toggle_versioning()",
289
+ "enable/disable support for S3 versioning",
290
+ )
291
+ S3DirSheet.addCommand(
292
+ "&",
293
+ "s3-join-rows",
294
+ "vd.push(sheet.join_rows(selectedRows))",
295
+ "open and join sheets for selected S3 entries",
296
+ )
297
+ S3DirSheet.addCommand(
298
+ "gx",
299
+ "s3-download-rows",
300
+ textwrap.dedent(
301
+ """
302
+ savepath = inputPath("download selected rows to: ", value=".")
303
+ sheet.download(selectedRows, savepath)
304
+ """
305
+ ),
306
+ "download selected files and directories",
307
+ )
308
+
309
+ S3DirSheet.addCommand(
310
+ "x",
311
+ "s3-download-row",
312
+ # Note about the use of `_path.name` here. Given a `visidata.Path`
313
+ # object `path`, `path._path` is a `pathlib.Path` object.
314
+ #
315
+ # `visidata.Path` objects do some fun parsing to pick out
316
+ # file types and extensions, handle compression transparently,
317
+ # etc. That parsing leaves the `name` attribute without a file
318
+ # extension, and makes it a little tricky to tack back on.
319
+ #
320
+ # `pathlib.Path` objects have a `name` with the extension intact.
321
+ # That makes `path._path.name` a convenient default output path.
322
+ textwrap.dedent(
323
+ """
324
+ savepath = inputPath("download to: ", value=Path(cursorRow["name"])._path.name)
325
+ sheet.download([cursorRow], savepath)
326
+ """
327
+ ),
328
+ "download the file or directory in the cursor row",
329
+ )
330
+
331
+ vd.addMenuItems(
332
+ """
333
+ File > Toggle versioning > s3-toggle-versioning
334
+ File > Refresh > Current path > s3-refresh-sheet
335
+ File > Refresh > All > s3-refresh-sheet-all
336
+ Row > Download > Current row > s3-download-row
337
+ Row > Download > Selected rows > s3-download-rows
338
+ Data > Join > Selected rows > s3-join-rows
339
+ """
340
+ )
341
+
342
+ vd.addGlobals(S3DirSheet=S3DirSheet)
visidata/loaders/sas.py CHANGED
@@ -1,6 +1,6 @@
1
1
  import logging
2
2
 
3
- from visidata import VisiData, Sheet, Progress, ColumnItem, anytype
3
+ from visidata import VisiData, Sheet, Progress, ColumnItem, anytype, vd
4
4
 
5
5
  SASTypes = {
6
6
  'string': str,
@@ -9,15 +9,15 @@ SASTypes = {
9
9
 
10
10
  @VisiData.api
11
11
  def open_xpt(vd, p):
12
- return XptSheet(p.name, source=p)
12
+ return XptSheet(p.base_stem, source=p)
13
13
 
14
14
  @VisiData.api
15
15
  def open_sas7bdat(vd, p):
16
- return SasSheet(p.name, source=p)
16
+ return SasSheet(p.base_stem, source=p)
17
17
 
18
18
  class XptSheet(Sheet):
19
19
  def iterload(self):
20
- import xport
20
+ xport = vd.importExternal('xport')
21
21
  with open(self.source, 'rb') as fp:
22
22
  self.rdr = xport.Reader(fp)
23
23
 
@@ -30,7 +30,7 @@ class XptSheet(Sheet):
30
30
 
31
31
  class SasSheet(Sheet):
32
32
  def iterload(self):
33
- import sas7bdat
33
+ sas7bdat = vd.importExternal('sas7bdat')
34
34
  self.dat = sas7bdat.SAS7BDAT(str(self.source), skip_header=True, log_level=logging.CRITICAL)
35
35
  self.columns = []
36
36
  for col in self.dat.columns: