visidata 2.11.1__py3-none-any.whl → 3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (255) hide show
  1. visidata/__init__.py +72 -91
  2. visidata/_input.py +259 -42
  3. visidata/_open.py +84 -29
  4. visidata/_types.py +21 -3
  5. visidata/_urlcache.py +17 -4
  6. visidata/aggregators.py +65 -25
  7. visidata/apps/__init__.py +0 -0
  8. visidata/apps/vdsql/__about__.py +8 -0
  9. visidata/apps/vdsql/__init__.py +5 -0
  10. visidata/apps/vdsql/__main__.py +27 -0
  11. visidata/apps/vdsql/_ibis.py +748 -0
  12. visidata/apps/vdsql/bigquery.py +61 -0
  13. visidata/apps/vdsql/clickhouse.py +53 -0
  14. visidata/apps/vdsql/setup.py +40 -0
  15. visidata/apps/vdsql/snowflake.py +67 -0
  16. visidata/apps/vgit/__init__.py +13 -0
  17. {vgit → visidata/apps/vgit}/blame.py +5 -2
  18. {vgit → visidata/apps/vgit}/branch.py +31 -16
  19. {vgit → visidata/apps/vgit}/config.py +3 -3
  20. visidata/apps/vgit/diff.py +169 -0
  21. visidata/apps/vgit/gitsheet.py +161 -0
  22. {vgit → visidata/apps/vgit}/grep.py +6 -5
  23. visidata/apps/vgit/log.py +81 -0
  24. {vgit → visidata/apps/vgit}/main.py +18 -5
  25. {vgit → visidata/apps/vgit}/remote.py +8 -4
  26. visidata/apps/vgit/repos.py +71 -0
  27. {vgit → visidata/apps/vgit}/setup.py +6 -4
  28. visidata/apps/vgit/stash.py +69 -0
  29. visidata/apps/vgit/status.py +204 -0
  30. {vgit → visidata/apps/vgit}/statusbar.py +2 -0
  31. visidata/basesheet.py +59 -50
  32. visidata/canvas.py +208 -93
  33. visidata/choose.py +6 -6
  34. visidata/clean_names.py +29 -0
  35. visidata/clipboard.py +73 -17
  36. visidata/cliptext.py +220 -46
  37. visidata/cmdlog.py +88 -114
  38. visidata/color.py +142 -56
  39. visidata/column.py +121 -129
  40. visidata/ddw/input.ddw +74 -79
  41. visidata/ddw/regex.ddw +57 -0
  42. visidata/ddwplay.py +33 -14
  43. visidata/deprecated.py +77 -3
  44. visidata/desktop/visidata.desktop +7 -0
  45. visidata/editor.py +12 -6
  46. visidata/errors.py +5 -1
  47. visidata/experimental/__init__.py +0 -0
  48. visidata/experimental/diff_sheet.py +29 -0
  49. visidata/experimental/digit_autoedit.py +6 -0
  50. visidata/experimental/gdrive.py +89 -0
  51. visidata/experimental/google.py +37 -0
  52. visidata/experimental/gsheets.py +79 -0
  53. visidata/experimental/live_search.py +37 -0
  54. visidata/experimental/liveupdate.py +45 -0
  55. visidata/experimental/mark.py +133 -0
  56. visidata/experimental/noahs_tapestry/__init__.py +1 -0
  57. visidata/experimental/noahs_tapestry/tapestry.py +147 -0
  58. visidata/experimental/rownum.py +73 -0
  59. visidata/experimental/slide_cells.py +26 -0
  60. visidata/expr.py +8 -4
  61. visidata/extensible.py +30 -5
  62. visidata/features/__init__.py +0 -0
  63. visidata/features/addcol_audiometadata.py +42 -0
  64. visidata/features/addcol_histogram.py +34 -0
  65. visidata/features/canvas_save_svg.py +69 -0
  66. visidata/features/change_precision.py +46 -0
  67. visidata/features/cmdpalette.py +163 -0
  68. visidata/features/colorbrewer.py +363 -0
  69. visidata/{colorsheet.py → features/colorsheet.py} +17 -16
  70. visidata/features/command_server.py +105 -0
  71. visidata/features/currency_to_usd.py +70 -0
  72. visidata/{customdate.py → features/customdate.py} +2 -0
  73. visidata/features/dedupe.py +132 -0
  74. visidata/{describe.py → features/describe.py} +17 -15
  75. visidata/features/errors_guide.py +26 -0
  76. visidata/features/expand_cols.py +202 -0
  77. visidata/{fill.py → features/fill.py} +3 -1
  78. visidata/{freeze.py → features/freeze.py} +11 -6
  79. visidata/features/graph_seaborn.py +79 -0
  80. visidata/features/helloworld.py +10 -0
  81. visidata/features/hint_types.py +17 -0
  82. visidata/{incr.py → features/incr.py} +5 -0
  83. visidata/{join.py → features/join.py} +107 -53
  84. visidata/features/known_cols.py +21 -0
  85. visidata/features/layout.py +62 -0
  86. visidata/{melt.py → features/melt.py} +32 -21
  87. visidata/features/normcol.py +118 -0
  88. visidata/features/open_config.py +7 -0
  89. visidata/features/open_syspaste.py +18 -0
  90. visidata/features/ping.py +157 -0
  91. visidata/features/procmgr.py +208 -0
  92. visidata/features/random_sample.py +6 -0
  93. visidata/{regex.py → features/regex.py} +47 -31
  94. visidata/features/reload_every.py +55 -0
  95. visidata/features/rename_col_cascade.py +30 -0
  96. visidata/features/scroll_context.py +60 -0
  97. visidata/features/select_equal_selected.py +11 -0
  98. visidata/features/setcol_fake.py +65 -0
  99. visidata/{slide.py → features/slide.py} +75 -21
  100. visidata/features/sparkline.py +48 -0
  101. visidata/features/status_source.py +20 -0
  102. visidata/{sysedit.py → features/sysedit.py} +2 -1
  103. visidata/features/sysopen_mailcap.py +46 -0
  104. visidata/features/term_extras.py +13 -0
  105. visidata/{transpose.py → features/transpose.py} +5 -4
  106. visidata/features/type_ipaddr.py +73 -0
  107. visidata/features/type_url.py +11 -0
  108. visidata/{unfurl.py → features/unfurl.py} +9 -9
  109. visidata/{window.py → features/window.py} +2 -2
  110. visidata/form.py +50 -21
  111. visidata/freqtbl.py +81 -33
  112. visidata/fuzzymatch.py +414 -0
  113. visidata/graph.py +105 -33
  114. visidata/guide.py +180 -0
  115. visidata/help.py +75 -44
  116. visidata/hint.py +39 -0
  117. visidata/indexsheet.py +109 -0
  118. visidata/input_history.py +55 -0
  119. visidata/interface.py +58 -0
  120. visidata/keys.py +17 -16
  121. visidata/loaders/__init__.py +9 -0
  122. visidata/loaders/_pandas.py +61 -21
  123. visidata/loaders/api_airtable.py +70 -0
  124. visidata/loaders/api_bitio.py +102 -0
  125. visidata/loaders/api_matrix.py +148 -0
  126. visidata/loaders/api_reddit.py +306 -0
  127. visidata/loaders/api_zulip.py +249 -0
  128. visidata/loaders/archive.py +41 -7
  129. visidata/loaders/arrow.py +7 -7
  130. visidata/loaders/conll.py +49 -0
  131. visidata/loaders/csv.py +25 -7
  132. visidata/loaders/eml.py +3 -4
  133. visidata/loaders/f5log.py +1204 -0
  134. visidata/loaders/fec.py +325 -0
  135. visidata/loaders/fixed_width.py +2 -4
  136. visidata/loaders/frictionless.py +3 -3
  137. visidata/loaders/geojson.py +8 -5
  138. visidata/loaders/google.py +48 -0
  139. visidata/loaders/graphviz.py +4 -4
  140. visidata/loaders/hdf5.py +4 -4
  141. visidata/loaders/html.py +48 -10
  142. visidata/loaders/http.py +84 -30
  143. visidata/loaders/imap.py +20 -10
  144. visidata/loaders/jrnl.py +52 -0
  145. visidata/loaders/json.py +83 -29
  146. visidata/loaders/jsonla.py +74 -0
  147. visidata/loaders/lsv.py +15 -11
  148. visidata/loaders/mailbox.py +40 -0
  149. visidata/loaders/markdown.py +1 -3
  150. visidata/loaders/mbtiles.py +4 -5
  151. visidata/loaders/mysql.py +11 -13
  152. visidata/loaders/npy.py +7 -7
  153. visidata/loaders/odf.py +4 -1
  154. visidata/loaders/orgmode.py +428 -0
  155. visidata/loaders/pandas_freqtbl.py +14 -20
  156. visidata/loaders/parquet.py +62 -6
  157. visidata/loaders/pcap.py +3 -3
  158. visidata/loaders/pdf.py +4 -3
  159. visidata/loaders/png.py +19 -13
  160. visidata/loaders/postgres.py +9 -8
  161. visidata/loaders/rec.py +7 -3
  162. visidata/loaders/s3.py +342 -0
  163. visidata/loaders/sas.py +5 -5
  164. visidata/loaders/scrape.py +186 -0
  165. visidata/loaders/shp.py +6 -5
  166. visidata/loaders/spss.py +5 -6
  167. visidata/loaders/sqlite.py +68 -28
  168. visidata/loaders/texttables.py +1 -1
  169. visidata/loaders/toml.py +60 -0
  170. visidata/loaders/tsv.py +61 -19
  171. visidata/loaders/ttf.py +19 -7
  172. visidata/loaders/unzip_http.py +6 -5
  173. visidata/loaders/usv.py +1 -1
  174. visidata/loaders/vcf.py +16 -16
  175. visidata/loaders/vds.py +10 -7
  176. visidata/loaders/vdx.py +30 -5
  177. visidata/loaders/xlsb.py +8 -1
  178. visidata/loaders/xlsx.py +145 -25
  179. visidata/loaders/xml.py +6 -3
  180. visidata/loaders/xword.py +4 -4
  181. visidata/loaders/yaml.py +15 -5
  182. visidata/macros.py +129 -42
  183. visidata/main.py +119 -94
  184. visidata/mainloop.py +101 -155
  185. visidata/man/parse_options.py +2 -2
  186. visidata/man/vd.1 +301 -148
  187. visidata/man/vd.txt +290 -153
  188. visidata/memory.py +3 -3
  189. visidata/menu.py +104 -423
  190. visidata/metasheets.py +59 -141
  191. visidata/modify.py +78 -23
  192. visidata/motd.py +3 -3
  193. visidata/mouse.py +137 -0
  194. visidata/movement.py +43 -35
  195. visidata/optionssheet.py +99 -0
  196. visidata/path.py +113 -32
  197. visidata/pivot.py +73 -47
  198. visidata/plugins.py +65 -192
  199. visidata/pyobj.py +50 -201
  200. visidata/rename_col.py +20 -0
  201. visidata/save.py +37 -20
  202. visidata/search.py +54 -10
  203. visidata/selection.py +84 -5
  204. visidata/settings.py +162 -25
  205. visidata/sheets.py +229 -257
  206. visidata/shell.py +51 -21
  207. visidata/sidebar.py +162 -0
  208. visidata/sort.py +11 -4
  209. visidata/statusbar.py +113 -104
  210. visidata/stored_list.py +43 -0
  211. visidata/stored_prop.py +38 -0
  212. visidata/tests/conftest.py +3 -3
  213. visidata/tests/test_cliptext.py +39 -0
  214. visidata/tests/test_commands.py +62 -7
  215. visidata/tests/test_edittext.py +2 -2
  216. visidata/tests/test_features.py +17 -0
  217. visidata/tests/test_menu.py +14 -0
  218. visidata/tests/test_path.py +13 -4
  219. visidata/text_source.py +53 -0
  220. visidata/textsheet.py +10 -3
  221. visidata/theme.py +44 -0
  222. visidata/themes/__init__.py +0 -0
  223. visidata/themes/ascii8.py +84 -0
  224. visidata/themes/asciimono.py +84 -0
  225. visidata/themes/light.py +17 -0
  226. visidata/threads.py +87 -39
  227. visidata/tuiwin.py +22 -0
  228. visidata/type_currency.py +22 -3
  229. visidata/type_date.py +31 -9
  230. visidata/type_floatsi.py +5 -1
  231. visidata/undo.py +17 -5
  232. visidata/utils.py +106 -23
  233. visidata/vdobj.py +28 -17
  234. visidata/windows.py +10 -0
  235. visidata/wrappers.py +9 -3
  236. visidata-3.0.data/data/share/applications/visidata.desktop +7 -0
  237. {visidata-2.11.1.data → visidata-3.0.data}/data/share/man/man1/vd.1 +301 -148
  238. {visidata-2.11.1.data → visidata-3.0.data}/data/share/man/man1/visidata.1 +301 -148
  239. visidata-3.0.data/scripts/vd2to3.vdx +9 -0
  240. {visidata-2.11.1.dist-info → visidata-3.0.dist-info}/METADATA +12 -8
  241. visidata-3.0.dist-info/RECORD +257 -0
  242. {visidata-2.11.1.dist-info → visidata-3.0.dist-info}/WHEEL +1 -1
  243. vgit/__init__.py +0 -1
  244. vgit/gitsheet.py +0 -164
  245. visidata/layout.py +0 -44
  246. visidata/misc.py +0 -5
  247. visidata-2.11.1.data/scripts/vgit +0 -9
  248. visidata-2.11.1.dist-info/RECORD +0 -155
  249. {vgit → visidata/apps/vgit}/__main__.py +0 -0
  250. {vgit → visidata/apps/vgit}/abort.py +0 -0
  251. /visidata/{repeat.py → features/repeat.py} +0 -0
  252. {visidata-2.11.1.data → visidata-3.0.data}/scripts/vd +0 -0
  253. {visidata-2.11.1.dist-info → visidata-3.0.dist-info}/LICENSE.gpl3 +0 -0
  254. {visidata-2.11.1.dist-info → visidata-3.0.dist-info}/entry_points.txt +0 -0
  255. {visidata-2.11.1.dist-info → visidata-3.0.dist-info}/top_level.txt +0 -0
visidata/loaders/http.py CHANGED
@@ -1,11 +1,29 @@
1
- from visidata import Path, RepeatFile, options, vd, VisiData
1
+ import re
2
2
 
3
- content_filetypes = {
4
- 'tab-separated-values': 'tsv'
5
- }
3
+ from visidata import Path, RepeatFile, vd, VisiData
4
+ from visidata.loaders.tsv import splitter
6
5
 
7
6
  vd.option('http_max_next', 0, 'max next.url pages to follow in http response') #848
8
7
  vd.option('http_req_headers', {}, 'http headers to send to requests')
8
+ vd.option('http_ssl_verify', True, 'verify host and certificates for https')
9
+
10
+
11
+ @VisiData.api
12
+ def guessurl_mimetype(vd, path, response):
13
+ content_filetypes = {
14
+ 'tab-separated-values': 'tsv'
15
+ }
16
+
17
+ for k in dir(vd):
18
+ if k.startswith('open_'):
19
+ ft = k[5:]
20
+ content_filetypes[ft] = ft
21
+
22
+ contenttype = response.getheader('content-type')
23
+ subtype = contenttype.split(';')[0].split('/')[-1]
24
+ if subtype in content_filetypes:
25
+ return dict(filetype=content_filetypes.get(subtype), _likelihood=10)
26
+
9
27
 
10
28
 
11
29
  @VisiData.api
@@ -18,54 +36,90 @@ def openurl_http(vd, path, filetype=None):
18
36
  vd.fail(f'no vd.openhttp_{sch}')
19
37
  return openfunc(Path(schemes[-1]+'://'+path.given.split('://')[1]))
20
38
 
21
- import requests
39
+ import urllib.request
40
+ import urllib.error
41
+ import mimetypes
22
42
 
23
- response = requests.get(path.given, stream=True, **vd.options.getall('http_req_'))
24
- response.raise_for_status()
43
+ ctx = None
44
+ if not vd.options.http_ssl_verify:
45
+ import ssl
25
46
 
26
- if not filetype:
27
- # try auto-detect from extension
28
- ext = path.suffix[1:].lower()
29
- openfunc = getattr(vd, f'open_{ext}', vd.getGlobals().get(f'open_{ext}'))
47
+ ctx = ssl.create_default_context()
48
+ ctx.check_hostname = False
49
+ ctx.verify_mode = ssl.CERT_NONE
30
50
 
31
- if openfunc:
32
- filetype = ext
33
- else:
34
- # if extension unknown, fallback to mime-type
35
- contenttype = response.headers['content-type']
36
- subtype = contenttype.split(';')[0].split('/')[-1]
37
- filetype = content_filetypes.get(subtype, subtype)
38
-
39
- # If no charset is provided by response headers, use the user-specified
40
- # encoding option (which defaults to UTF-8) and hope for the best. The
41
- # alternative is an error because iter_lines() will produce bytes. We're
42
- # streaming so can't use response.apparent_encoding.
43
- if not response.encoding:
44
- response.encoding = options.encoding
51
+ req = urllib.request.Request(path.given, **vd.options.getall('http_req_'))
52
+ response = urllib.request.urlopen(req, context=ctx)
53
+
54
+ filetype = filetype or vd.guessFiletype(path, response, funcprefix='guessurl_').get('filetype') # try guessing by url
55
+ filetype = filetype or vd.guessFiletype(path, funcprefix='guess_').get('filetype') # try guessing by contents
45
56
 
46
57
  # Automatically paginate if a 'next' URL is given
47
- def _iter_lines(path=path, response=response, max_next=options.http_max_next):
58
+ def _iter_lines(path=path, response=response, max_next=vd.options.http_max_next):
48
59
  path.responses = []
49
60
  n = 0
50
61
  while response:
51
62
  path.responses.append(response)
52
- yield from response.iter_lines(decode_unicode=True)
63
+ with response as fp:
64
+ for line in splitter(response, delim=b'\n'):
65
+ yield line.decode(vd.options.encoding)
66
+
67
+ linkhdr = response.getheader('Link')
68
+ src = None
69
+ if linkhdr:
70
+ links = parse_header_links(linkhdr)
71
+ link_data = {}
72
+ for link in links:
73
+ key = link.get('rel') or link.get('url')
74
+ link_data[key] = link
75
+ src = link_data.get('next', {}).get('url', None)
53
76
 
54
- src = response.links.get('next', {}).get('url', None)
55
77
  if not src:
56
78
  break
57
79
 
58
80
  n += 1
59
81
  if n > max_next:
60
- vd.warning(f'stopping at max {max_next} pages')
82
+ vd.warning(f'stopping at max next pages: {max_next} pages')
61
83
  break
62
84
 
63
85
  vd.status(f'fetching next page from {src}')
64
- response = requests.get(src, stream=True, **vd.options.getall('http_req_'))
86
+ req = urllib.request.Request(src, **vd.options.getall('http_req_'))
87
+ response = urllib.request.urlopen(req)
65
88
 
66
89
  # add resettable iterator over contents as an already-open fp
67
90
  path.fptext = RepeatFile(_iter_lines())
68
91
 
69
92
  return vd.openSource(path, filetype=filetype)
70
93
 
94
+ def parse_header_links(link_header):
95
+ '''Return a list of dictionaries:
96
+ [{'url': 'https://example.com/content?page=1', 'rel': 'prev'},
97
+ {'url': 'https://example.com/content?page=3', 'rel': 'next'}]
98
+ Takes a link header string, of the form
99
+ '<https://example.com/content?page=1>; rel="prev", <https://example.com/content?page=3>; rel="next"'
100
+ See https://datatracker.ietf.org/doc/html/rfc8288#section-3
101
+ '''
102
+
103
+ links = []
104
+ quote_space = ' \'"'
105
+ link_header = link_header.strip(quote_space)
106
+ if not link_header: return []
107
+ for link_value in re.split(', *<', link_header):
108
+ if ';' in link_value:
109
+ url, params = link_value.split(';', maxsplit=1)
110
+ else:
111
+ url, params = link_value, ''
112
+ link = {'url': url.strip('<>' + quote_space)}
113
+
114
+ for param in params.split(';'):
115
+ if '=' in param:
116
+ key, value = param.split('=')
117
+ key = key.strip(quote_space)
118
+ value = value.strip(quote_space)
119
+ link[key] = value
120
+ else:
121
+ break
122
+ links.append(link)
123
+ return links
124
+
71
125
  VisiData.openurl_https = VisiData.openurl_http
visidata/loaders/imap.py CHANGED
@@ -1,11 +1,12 @@
1
- from visidata import vd, TableSheet, asyncthread, ColumnItem, Column, ColumnAttr, Progress
1
+ from visidata import VisiData, vd, TableSheet, asyncthread, ColumnItem, Column, ColumnAttr, Progress
2
+ import visidata.loaders.google
2
3
  from urllib.parse import urlparse
3
4
 
4
5
 
5
- def openurl_imap(p, **kwargs):
6
- url = urlparse(p.given)
7
- password = url.password or vd.error('no password given in url') # vd.input("imap password for %s" % user, display=False))
8
- return ImapSheet(url.hostname, source=url, password=password)
6
+ @VisiData.api
7
+ def openurl_imap(vd, url, **kwargs):
8
+ url_parsed = urlparse(str(url))
9
+ return ImapSheet(url_parsed.hostname, source=url_parsed, password=url_parsed.password)
9
10
 
10
11
 
11
12
  class ImapSheet(TableSheet):
@@ -22,17 +23,26 @@ class ImapSheet(TableSheet):
22
23
  ]
23
24
  nKeys = 1
24
25
 
25
- @asyncthread
26
- def reload(self):
26
+ def iterload(self):
27
27
  import imaplib
28
28
  import email.parser
29
29
 
30
30
  m = imaplib.IMAP4_SSL(host=self.source.hostname)
31
+ # m.debug=4
31
32
  user = self.source.username
32
- m.login(user, self.password)
33
+
34
+ if self.source.hostname == 'imap.gmail.com':
35
+ credentials=vd.google_auth(scopes='https://mail.google.com/')
36
+ header_template = 'user=%s\1auth=Bearer %s\1\1'
37
+ m.authenticate('XOAUTH2', lambda x: header_template % (user, credentials.token))
38
+ else:
39
+ if self.password is None:
40
+ vd.error('no password given in url') # vd.input("imap password for %s" % user, display=False))
41
+ m.login(user, self.source.password)
42
+
33
43
  typ, folders = m.list()
34
44
  for r in Progress(folders, gerund="downloading"):
35
- fname = r.decode('utf-8').split()[-1][1:-1]
45
+ fname = r.decode('utf-8').split()[-1]
36
46
  try:
37
47
  m.select(fname)
38
48
  typ, data = m.search(None, 'ALL')
@@ -44,7 +54,7 @@ class ImapSheet(TableSheet):
44
54
 
45
55
  msg = email.message_from_bytes(msgbytes[0][1])
46
56
  msg['folder'] = fname
47
- self.addRow(msg)
57
+ yield msg
48
58
 
49
59
  m.close()
50
60
  except Exception:
@@ -0,0 +1,52 @@
1
+ '''Loader for the jrnl.sh CLI journal file format'''
2
+
3
+ import re
4
+
5
+
6
+ from visidata import VisiData, TableSheet, ItemColumn, AttrDict
7
+
8
+
9
+ @VisiData.api
10
+ def open_jrnl(vd, p):
11
+ return JrnlSheet(p.base_stem, source=p)
12
+
13
+
14
+ class JrnlSheet(TableSheet):
15
+ # rowdef: AttrDict
16
+ columns = [
17
+ ItemColumn('date'),
18
+ ItemColumn('time'),
19
+ ItemColumn('title'),
20
+ ItemColumn('body'),
21
+ ItemColumn('tags'),
22
+ ]
23
+ def iterload(self):
24
+ re_title = re.compile(r'\[(.*?)\s(.*?)\] (.*)')
25
+ prevline = ''
26
+ for line in self.source:
27
+ tags = re.findall(r'(?<!\S)(@[-+*#/\w]+)', line)
28
+ if not prevline:
29
+ m = re_title.match(line)
30
+ if m:
31
+ row = AttrDict()
32
+ row.date, row.time, row.title = m.groups()
33
+ row.body = ''
34
+ row.tags = ' '.join(tags)
35
+ yield row
36
+ continue
37
+
38
+ row.body += line + '\n'
39
+ row.tags = ' '.join([row.tags]+tags)
40
+ prevline = line.strip()
41
+
42
+
43
+ @VisiData.api
44
+ def save_jrnl(vd, p, *vsheets):
45
+ with p.open(mode='w', encoding=vsheets[0].options.save_encoding) as fp:
46
+ for vs in vsheets:
47
+ for r in vs.iterrows():
48
+ fp.write(f'[{r.date} {r.time}] {r.title}\n')
49
+ body = r.body.strip()
50
+ if body:
51
+ fp.write(body + '\n')
52
+ fp.write('\n')
visidata/loaders/json.py CHANGED
@@ -1,30 +1,49 @@
1
1
  import json
2
2
 
3
- from visidata import vd, date, VisiData, PyobjSheet, deepcopy, AttrDict, stacktrace, TypedExceptionWrapper, options, visidata, ColumnItem, deduceType, wrapply, TypedWrapper, Progress, Sheet, InferColumnsSheet
3
+ from visidata import vd, date, anytype, VisiData, PyobjSheet, AttrDict, stacktrace, TypedExceptionWrapper, AlwaysDict, ItemColumn, wrapply, TypedWrapper, Progress, Sheet
4
4
 
5
5
  vd.option('json_indent', None, 'indent to use when saving json')
6
6
  vd.option('json_sort_keys', False, 'sort object keys when saving to json')
7
+ vd.option('json_ensure_ascii', True, 'ensure ascii encode when saving json')
7
8
  vd.option('default_colname', '', 'column name to use for non-dict rows')
8
9
 
10
+ @VisiData.api
11
+ def guess_json(vd, p):
12
+ with p.open(encoding=vd.options.encoding) as fp:
13
+ line = next(fp)
14
+
15
+ line = line.strip()
16
+
17
+ if line.startswith('{') and line.endswith('}'):
18
+ return dict(filetype='jsonl')
19
+
20
+ if line.startswith(tuple('[{')):
21
+ return dict(filetype='json')
22
+
23
+
9
24
  @VisiData.api
10
25
  def open_jsonobj(vd, p):
11
- return JsonSheet(p.name, source=p)
26
+ return JsonSheet(p.base_stem, source=p)
12
27
 
13
28
  @VisiData.api
14
29
  def open_jsonl(vd, p):
15
- return JsonSheet(p.name, source=p)
30
+ return JsonSheet(p.base_stem, source=p)
16
31
 
17
32
  VisiData.open_ndjson = VisiData.open_ldjson = VisiData.open_json = VisiData.open_jsonl
18
33
 
19
34
 
20
- class JsonSheet(InferColumnsSheet):
35
+ class JsonSheet(Sheet):
36
+ _rowtype = AttrDict
37
+ def resetCols(self):
38
+ self._knownKeys = set()
39
+ super().resetCols()
40
+
21
41
  def iterload(self):
22
- with self.source.open_text(encoding=self.options.encoding) as fp:
42
+ with self.open_text_source() as fp:
23
43
  for L in fp:
44
+ L = L.strip()
24
45
  try:
25
- if L.startswith('#'): # skip commented lines
26
- continue
27
- elif not L.strip(): # skip blank lines
46
+ if not L: # skip blank lines
28
47
  continue
29
48
  ret = json.loads(L, object_hook=AttrDict)
30
49
  if isinstance(ret, list):
@@ -37,7 +56,7 @@ class JsonSheet(InferColumnsSheet):
37
56
  e.stacktrace = stacktrace()
38
57
  yield TypedExceptionWrapper(json.loads, L, exception=e) # an error on one line
39
58
  else:
40
- with self.source.open_text(encoding=self.options.encoding) as fp:
59
+ with self.open_text_source() as fp:
41
60
  ret = json.load(fp)
42
61
  if isinstance(ret, list):
43
62
  yield from ret
@@ -45,22 +64,36 @@ class JsonSheet(InferColumnsSheet):
45
64
  yield ret
46
65
  break
47
66
 
67
+ def addColumn(self, *cols, index=None):
68
+ for c in cols:
69
+ self._knownKeys.add(c.expr or c.name)
70
+ return super().addColumn(*cols, index=index)
71
+
48
72
  def addRow(self, row, index=None):
49
73
  # Wrap non-dict rows in a dummy object with a predictable key name.
50
74
  # This allows for more consistent handling of rows containing scalars
51
75
  # or lists.
52
76
  if not isinstance(row, dict):
53
- v = {options.default_colname: row}
54
- row = visidata.AlwaysDict(row, **v)
77
+ v = {self.options.default_colname: row}
78
+ row = AlwaysDict(row, **v)
55
79
 
56
- return super().addRow(row, index=index)
80
+ ret = super().addRow(row, index=index)
81
+
82
+ for k in row:
83
+ if k not in self._knownKeys:
84
+ c = ItemColumn(k, type=float if isinstance(row[k], (float, int)) else anytype)
85
+ self.addColumn(c)
86
+
87
+ return ret
57
88
 
58
89
  def newRow(self, **fields):
59
- return fields
90
+ return AttrDict(fields)
60
91
 
61
92
  def openRow(self, row):
62
93
  return PyobjSheet("%s[%s]" % (self.name, self.keystr(row)), source=row)
63
94
 
95
+ JsonSheet.init('_knownKeys', set, copy=True) # set of row keys already seen
96
+
64
97
  ## saving json and jsonl
65
98
 
66
99
  class _vjsonEncoder(json.JSONEncoder):
@@ -68,17 +101,23 @@ class _vjsonEncoder(json.JSONEncoder):
68
101
  return str(obj)
69
102
 
70
103
 
71
- def _rowdict(cols, row):
104
+ @VisiData.api
105
+ def get_json_value(vd, col, row):
106
+ o = wrapply(col.getTypedValue, row)
107
+ if isinstance(o, TypedExceptionWrapper):
108
+ o = col.sheet.options.safe_error or str(o.exception)
109
+ elif isinstance(o, TypedWrapper):
110
+ o = o.val
111
+ elif isinstance(o, date):
112
+ o = col.getDisplayValue(row)
113
+ return o
114
+
115
+
116
+ def _rowdict(cols, row, keep_nulls=False):
72
117
  ret = {}
73
118
  for col in cols:
74
- o = wrapply(col.getTypedValue, row)
75
- if isinstance(o, TypedExceptionWrapper):
76
- o = col.sheet.options.safe_error or str(o.exception)
77
- elif isinstance(o, TypedWrapper):
78
- o = o.val
79
- elif isinstance(o, date):
80
- o = col.getDisplayValue(row)
81
- if o is not None:
119
+ o = vd.get_json_value(col, row)
120
+ if keep_nulls or o is not None:
82
121
  ret[col.name] = o
83
122
  return ret
84
123
 
@@ -92,13 +131,13 @@ def encode_json(vd, row, cols, enc=_vjsonEncoder(sort_keys=False)):
92
131
  @VisiData.api
93
132
  def save_json(vd, p, *vsheets):
94
133
  vs = vsheets[0]
95
- with p.open_text(mode='w', encoding=vs.options.encoding) as fp:
134
+ with p.open(mode='w', encoding=vs.options.save_encoding) as fp:
96
135
  try:
97
136
  indent = int(vs.options.json_indent)
98
137
  except Exception:
99
138
  indent = vs.options.json_indent
100
139
 
101
- jsonenc = _vjsonEncoder(indent=indent, sort_keys=vs.options.json_sort_keys)
140
+ jsonenc = _vjsonEncoder(indent=indent, sort_keys=vs.options.json_sort_keys, ensure_ascii=vs.options.json_ensure_ascii)
102
141
 
103
142
  if len(vsheets) == 1:
104
143
  fp.write('[\n')
@@ -107,11 +146,11 @@ def save_json(vd, p, *vsheets):
107
146
  for i, row in enumerate(vs.iterrows()):
108
147
  if i > 0:
109
148
  fp.write(',\n')
110
- rd = _rowdict(vs.visibleCols, row)
149
+ rd = _rowdict(vs.visibleCols, row, keep_nulls=(i==0))
111
150
  fp.write(jsonenc.encode(rd))
112
151
  fp.write('\n]\n')
113
152
  else:
114
- it = {vs.name: [_rowdict(vs.visibleCols, row) for row in vs.iterrows()] for vs in vsheets}
153
+ it = {vs.name: [_rowdict(vs.visibleCols, row, keep_nulls=(i==0)) for i, row in enumerate(vs.iterrows())] for vs in vsheets}
115
154
 
116
155
  with Progress(gerund='saving'):
117
156
  for chunk in jsonenc.iterencode(it):
@@ -123,19 +162,34 @@ def write_jsonl(vs, fp):
123
162
  vcols = vs.visibleCols
124
163
  jsonenc = _vjsonEncoder()
125
164
  with Progress(gerund='saving'):
126
- for row in vs.iterrows():
127
- rowdict = _rowdict(vcols, row)
165
+ for i, row in enumerate(vs.iterrows()):
166
+ rowdict = _rowdict(vcols, row, keep_nulls=(i==0))
128
167
  fp.write(jsonenc.encode(rowdict) + '\n')
129
168
 
169
+ if len(vs) == 0:
170
+ vd.warning(
171
+ "Output file is empty - cannot save headers without data for jsonl.\n"
172
+ "Use `.jsonla` filetype to save as JSONL arrays format "
173
+ "rather than JSONL dict format to preserve the headers."
174
+ )
175
+
130
176
 
131
177
  @VisiData.api
132
178
  def save_jsonl(vd, p, *vsheets):
133
- with p.open_text(mode='w', encoding=vsheets[0].options.encoding) as fp:
179
+ with p.open(mode='w', encoding=vsheets[0].options.save_encoding) as fp:
134
180
  for vs in vsheets:
135
181
  vs.write_jsonl(fp)
136
182
 
137
183
 
184
+ @VisiData.api
185
+ def JSON(vd, s:str):
186
+ 'Parse `s` as JSON.'
187
+ return json.loads(s)
188
+
189
+
138
190
  JsonSheet.options.encoding = 'utf-8'
191
+ JsonSheet.options.regex_skip = r'^(//|#).*'
192
+
139
193
  VisiData.save_ndjson = VisiData.save_jsonl
140
194
  VisiData.save_ldjson = VisiData.save_jsonl
141
195
 
@@ -0,0 +1,74 @@
1
+ import json
2
+
3
+ from visidata import VisiData, vd, SequenceSheet, deduceType, Progress
4
+
5
+
6
+ @VisiData.api
7
+ def guess_jsonla(vd, p):
8
+ '''A JSONLA file is a JSONL file with rows of arrays, where the first row
9
+ is a header array:
10
+
11
+ ["A", "B", "C"]
12
+ [1, "blue", true]
13
+ [2, "yellow", false]
14
+
15
+ The header array must be a flat array of strings
16
+
17
+ If no suitable header is found, fall back to generic JSON load.
18
+ '''
19
+
20
+ with p.open(encoding=vd.options.encoding) as fp:
21
+ first_line = next(fp)
22
+
23
+ if first_line.strip().startswith('['):
24
+ ret = json.loads(first_line)
25
+ if isinstance(ret, list) and all(isinstance(v, str) for v in ret):
26
+ return dict(filetype='jsonla')
27
+
28
+
29
+ @VisiData.api
30
+ def open_jsonla(vd, p):
31
+ return JsonlArraySheet(p.base_stem, source=p)
32
+
33
+
34
+ class JsonlArraySheet(SequenceSheet):
35
+ rowtype = 'rows' # rowdef: list of Python objects decoded from JSON
36
+ def iterload(self):
37
+ with self.open_text_source() as fp:
38
+ for L in fp:
39
+ yield json.loads(L)
40
+
41
+ # set column types from first row
42
+ for i, c in enumerate(self.columns):
43
+ c.type = deduceType(self.rows[0][i])
44
+
45
+
46
+ def get_jsonla_rows(sheet, cols):
47
+ for row in Progress(sheet.rows):
48
+ yield [vd.get_json_value(col, row) for col in cols]
49
+
50
+
51
+ class _vjsonEncoder(json.JSONEncoder):
52
+ def default(self, obj):
53
+ return str(obj)
54
+
55
+
56
+ def write_jsonla(vs, fp):
57
+ vcols = vs.visibleCols
58
+ jsonenc = _vjsonEncoder()
59
+ with Progress(gerund='saving'):
60
+ header = [col.name for col in vcols]
61
+ fp.write(jsonenc.encode(header) + '\n')
62
+ rows = get_jsonla_rows(vs, vcols)
63
+ for row in rows:
64
+ fp.write(jsonenc.encode(row) + '\n')
65
+
66
+
67
+ @VisiData.api
68
+ def save_jsonla(vd, p, *vsheets):
69
+ with p.open(mode='w', encoding=vsheets[0].options.save_encoding) as fp:
70
+ for vs in vsheets:
71
+ write_jsonla(vs, fp)
72
+
73
+
74
+ JsonlArraySheet.options.regex_skip = r'^(//|#).*'
visidata/loaders/lsv.py CHANGED
@@ -1,17 +1,19 @@
1
1
  import collections
2
2
 
3
+ #1179 Line Separated Values for e.g. awk
4
+
3
5
  from visidata import VisiData, Sheet, ItemColumn
4
6
 
5
7
 
6
8
  @VisiData.api
7
9
  def open_lsv(vd, p):
8
- return LsvSheet(p.name, source=p)
10
+ return LsvSheet(p.base_stem, source=p)
9
11
 
10
12
 
11
13
  @VisiData.api
12
14
  def save_lsv(vd, p, *vsheets):
13
15
  vs = vsheets[0]
14
- with p.open_text(mode='w', encoding=vs.options.encoding) as fp:
16
+ with p.open(mode='w', encoding=vs.options.save_encoding) as fp:
15
17
  for row in vs.iterrows():
16
18
  for col in vs.visibleCols:
17
19
  fp.write('%s: %s\n' % (col.name, col.getDisplayValue(row)))
@@ -33,17 +35,19 @@ class LsvSheet(Sheet):
33
35
  self._knownCols = set()
34
36
  row = collections.defaultdict(str)
35
37
  k = ''
36
- for line in self.source.open_text():
37
- line = line.strip()
38
- if not line:
39
- yield row
40
- row = collections.defaultdict(str)
41
38
 
42
- if ':' in line:
43
- k, line = line.split(':', maxsplit=1)
44
- # else append to previous k
39
+ with self.open_text_source() as fp:
40
+ for line in fp:
41
+ line = line.strip()
42
+ if not line:
43
+ yield row
44
+ row = collections.defaultdict(str)
45
+
46
+ if ':' in line:
47
+ k, line = line.split(':', maxsplit=1)
48
+ # else append to previous k
45
49
 
46
- row[k.strip()] += line.strip()
50
+ row[k.strip()] += line.strip()
47
51
 
48
52
  if row:
49
53
  yield row
@@ -0,0 +1,40 @@
1
+ from visidata import VisiData, Sheet, ItemColumn, date, Column
2
+
3
+
4
+ @VisiData.api
5
+ def open_mbox(vd, p):
6
+ return MboxSheet(p.base_stem, source=p, format='mbox')
7
+
8
+ @VisiData.api
9
+ def open_maildir(vd, p):
10
+ return MboxSheet(p.base_stem, source=p, format='Maildir')
11
+
12
+ @VisiData.api
13
+ def open_mmdf(vd, p):
14
+ return MboxSheet(p.base_stem, source=p, format='MMDF')
15
+
16
+ @VisiData.api
17
+ def open_babyl(vd, p):
18
+ return MboxSheet(p.base_stem, source=p, format='Babyl')
19
+
20
+ @VisiData.api
21
+ def open_mh(vd, p):
22
+ return MboxSheet(p.base_stem, source=p, format='MH')
23
+
24
+
25
+ class MboxSheet(Sheet):
26
+ columns = [
27
+ ItemColumn('Date', type=date),
28
+ ItemColumn('From'),
29
+ ItemColumn('To'),
30
+ ItemColumn('Cc'),
31
+ ItemColumn('Subject'),
32
+ Column('Payload', getter=lambda c,r: r.get_payload(decode=True),
33
+ setter=lambda c,r,v: r.set_payload(v)),
34
+ ]
35
+ def iterload(self):
36
+ import mailbox
37
+ cls = getattr(mailbox, self.format)
38
+ self.mailbox = cls(str(self.source), create=False)
39
+ for r in self.mailbox.itervalues():
40
+ yield r
@@ -26,7 +26,7 @@ def write_md(p, *vsheets, md_style='orgmode'):
26
26
  else:
27
27
  delim = '|'
28
28
 
29
- with p.open_text(mode='w', encoding=vsheets[0].options.encoding) as fp:
29
+ with p.open(mode='w', encoding=vsheets[0].options.save_encoding) as fp:
30
30
  for vs in vsheets:
31
31
  if len(vsheets) > 1:
32
32
  fp.write('# %s\n\n' % vs.name)
@@ -44,8 +44,6 @@ def write_md(p, *vsheets, md_style='orgmode'):
44
44
  fp.write(s)
45
45
  fp.write('\n')
46
46
 
47
- vd.status('%s save finished' % p)
48
-
49
47
 
50
48
  @VisiData.api
51
49
  def save_md(vd, p, *sheets):