visidata 2.11.1__py3-none-any.whl → 3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- visidata/__init__.py +72 -91
- visidata/_input.py +259 -42
- visidata/_open.py +84 -29
- visidata/_types.py +21 -3
- visidata/_urlcache.py +17 -4
- visidata/aggregators.py +65 -25
- visidata/apps/__init__.py +0 -0
- visidata/apps/vdsql/__about__.py +8 -0
- visidata/apps/vdsql/__init__.py +5 -0
- visidata/apps/vdsql/__main__.py +27 -0
- visidata/apps/vdsql/_ibis.py +748 -0
- visidata/apps/vdsql/bigquery.py +61 -0
- visidata/apps/vdsql/clickhouse.py +53 -0
- visidata/apps/vdsql/setup.py +40 -0
- visidata/apps/vdsql/snowflake.py +67 -0
- visidata/apps/vgit/__init__.py +13 -0
- {vgit → visidata/apps/vgit}/blame.py +5 -2
- {vgit → visidata/apps/vgit}/branch.py +31 -16
- {vgit → visidata/apps/vgit}/config.py +3 -3
- visidata/apps/vgit/diff.py +169 -0
- visidata/apps/vgit/gitsheet.py +161 -0
- {vgit → visidata/apps/vgit}/grep.py +6 -5
- visidata/apps/vgit/log.py +81 -0
- {vgit → visidata/apps/vgit}/main.py +18 -5
- {vgit → visidata/apps/vgit}/remote.py +8 -4
- visidata/apps/vgit/repos.py +71 -0
- {vgit → visidata/apps/vgit}/setup.py +6 -4
- visidata/apps/vgit/stash.py +69 -0
- visidata/apps/vgit/status.py +204 -0
- {vgit → visidata/apps/vgit}/statusbar.py +2 -0
- visidata/basesheet.py +59 -50
- visidata/canvas.py +208 -93
- visidata/choose.py +6 -6
- visidata/clean_names.py +29 -0
- visidata/clipboard.py +73 -17
- visidata/cliptext.py +220 -46
- visidata/cmdlog.py +88 -114
- visidata/color.py +142 -56
- visidata/column.py +121 -129
- visidata/ddw/input.ddw +74 -79
- visidata/ddw/regex.ddw +57 -0
- visidata/ddwplay.py +33 -14
- visidata/deprecated.py +77 -3
- visidata/desktop/visidata.desktop +7 -0
- visidata/editor.py +12 -6
- visidata/errors.py +5 -1
- visidata/experimental/__init__.py +0 -0
- visidata/experimental/diff_sheet.py +29 -0
- visidata/experimental/digit_autoedit.py +6 -0
- visidata/experimental/gdrive.py +89 -0
- visidata/experimental/google.py +37 -0
- visidata/experimental/gsheets.py +79 -0
- visidata/experimental/live_search.py +37 -0
- visidata/experimental/liveupdate.py +45 -0
- visidata/experimental/mark.py +133 -0
- visidata/experimental/noahs_tapestry/__init__.py +1 -0
- visidata/experimental/noahs_tapestry/tapestry.py +147 -0
- visidata/experimental/rownum.py +73 -0
- visidata/experimental/slide_cells.py +26 -0
- visidata/expr.py +8 -4
- visidata/extensible.py +30 -5
- visidata/features/__init__.py +0 -0
- visidata/features/addcol_audiometadata.py +42 -0
- visidata/features/addcol_histogram.py +34 -0
- visidata/features/canvas_save_svg.py +69 -0
- visidata/features/change_precision.py +46 -0
- visidata/features/cmdpalette.py +163 -0
- visidata/features/colorbrewer.py +363 -0
- visidata/{colorsheet.py → features/colorsheet.py} +17 -16
- visidata/features/command_server.py +105 -0
- visidata/features/currency_to_usd.py +70 -0
- visidata/{customdate.py → features/customdate.py} +2 -0
- visidata/features/dedupe.py +132 -0
- visidata/{describe.py → features/describe.py} +17 -15
- visidata/features/errors_guide.py +26 -0
- visidata/features/expand_cols.py +202 -0
- visidata/{fill.py → features/fill.py} +3 -1
- visidata/{freeze.py → features/freeze.py} +11 -6
- visidata/features/graph_seaborn.py +79 -0
- visidata/features/helloworld.py +10 -0
- visidata/features/hint_types.py +17 -0
- visidata/{incr.py → features/incr.py} +5 -0
- visidata/{join.py → features/join.py} +107 -53
- visidata/features/known_cols.py +21 -0
- visidata/features/layout.py +62 -0
- visidata/{melt.py → features/melt.py} +32 -21
- visidata/features/normcol.py +118 -0
- visidata/features/open_config.py +7 -0
- visidata/features/open_syspaste.py +18 -0
- visidata/features/ping.py +157 -0
- visidata/features/procmgr.py +208 -0
- visidata/features/random_sample.py +6 -0
- visidata/{regex.py → features/regex.py} +47 -31
- visidata/features/reload_every.py +55 -0
- visidata/features/rename_col_cascade.py +30 -0
- visidata/features/scroll_context.py +60 -0
- visidata/features/select_equal_selected.py +11 -0
- visidata/features/setcol_fake.py +65 -0
- visidata/{slide.py → features/slide.py} +75 -21
- visidata/features/sparkline.py +48 -0
- visidata/features/status_source.py +20 -0
- visidata/{sysedit.py → features/sysedit.py} +2 -1
- visidata/features/sysopen_mailcap.py +46 -0
- visidata/features/term_extras.py +13 -0
- visidata/{transpose.py → features/transpose.py} +5 -4
- visidata/features/type_ipaddr.py +73 -0
- visidata/features/type_url.py +11 -0
- visidata/{unfurl.py → features/unfurl.py} +9 -9
- visidata/{window.py → features/window.py} +2 -2
- visidata/form.py +50 -21
- visidata/freqtbl.py +81 -33
- visidata/fuzzymatch.py +414 -0
- visidata/graph.py +105 -33
- visidata/guide.py +180 -0
- visidata/help.py +75 -44
- visidata/hint.py +39 -0
- visidata/indexsheet.py +109 -0
- visidata/input_history.py +55 -0
- visidata/interface.py +58 -0
- visidata/keys.py +17 -16
- visidata/loaders/__init__.py +9 -0
- visidata/loaders/_pandas.py +61 -21
- visidata/loaders/api_airtable.py +70 -0
- visidata/loaders/api_bitio.py +102 -0
- visidata/loaders/api_matrix.py +148 -0
- visidata/loaders/api_reddit.py +306 -0
- visidata/loaders/api_zulip.py +249 -0
- visidata/loaders/archive.py +41 -7
- visidata/loaders/arrow.py +7 -7
- visidata/loaders/conll.py +49 -0
- visidata/loaders/csv.py +25 -7
- visidata/loaders/eml.py +3 -4
- visidata/loaders/f5log.py +1204 -0
- visidata/loaders/fec.py +325 -0
- visidata/loaders/fixed_width.py +2 -4
- visidata/loaders/frictionless.py +3 -3
- visidata/loaders/geojson.py +8 -5
- visidata/loaders/google.py +48 -0
- visidata/loaders/graphviz.py +4 -4
- visidata/loaders/hdf5.py +4 -4
- visidata/loaders/html.py +48 -10
- visidata/loaders/http.py +84 -30
- visidata/loaders/imap.py +20 -10
- visidata/loaders/jrnl.py +52 -0
- visidata/loaders/json.py +83 -29
- visidata/loaders/jsonla.py +74 -0
- visidata/loaders/lsv.py +15 -11
- visidata/loaders/mailbox.py +40 -0
- visidata/loaders/markdown.py +1 -3
- visidata/loaders/mbtiles.py +4 -5
- visidata/loaders/mysql.py +11 -13
- visidata/loaders/npy.py +7 -7
- visidata/loaders/odf.py +4 -1
- visidata/loaders/orgmode.py +428 -0
- visidata/loaders/pandas_freqtbl.py +14 -20
- visidata/loaders/parquet.py +62 -6
- visidata/loaders/pcap.py +3 -3
- visidata/loaders/pdf.py +4 -3
- visidata/loaders/png.py +19 -13
- visidata/loaders/postgres.py +9 -8
- visidata/loaders/rec.py +7 -3
- visidata/loaders/s3.py +342 -0
- visidata/loaders/sas.py +5 -5
- visidata/loaders/scrape.py +186 -0
- visidata/loaders/shp.py +6 -5
- visidata/loaders/spss.py +5 -6
- visidata/loaders/sqlite.py +68 -28
- visidata/loaders/texttables.py +1 -1
- visidata/loaders/toml.py +60 -0
- visidata/loaders/tsv.py +61 -19
- visidata/loaders/ttf.py +19 -7
- visidata/loaders/unzip_http.py +6 -5
- visidata/loaders/usv.py +1 -1
- visidata/loaders/vcf.py +16 -16
- visidata/loaders/vds.py +10 -7
- visidata/loaders/vdx.py +30 -5
- visidata/loaders/xlsb.py +8 -1
- visidata/loaders/xlsx.py +145 -25
- visidata/loaders/xml.py +6 -3
- visidata/loaders/xword.py +4 -4
- visidata/loaders/yaml.py +15 -5
- visidata/macros.py +129 -42
- visidata/main.py +119 -94
- visidata/mainloop.py +101 -155
- visidata/man/parse_options.py +2 -2
- visidata/man/vd.1 +301 -148
- visidata/man/vd.txt +290 -153
- visidata/memory.py +3 -3
- visidata/menu.py +104 -423
- visidata/metasheets.py +59 -141
- visidata/modify.py +78 -23
- visidata/motd.py +3 -3
- visidata/mouse.py +137 -0
- visidata/movement.py +43 -35
- visidata/optionssheet.py +99 -0
- visidata/path.py +113 -32
- visidata/pivot.py +73 -47
- visidata/plugins.py +65 -192
- visidata/pyobj.py +50 -201
- visidata/rename_col.py +20 -0
- visidata/save.py +37 -20
- visidata/search.py +54 -10
- visidata/selection.py +84 -5
- visidata/settings.py +162 -25
- visidata/sheets.py +229 -257
- visidata/shell.py +51 -21
- visidata/sidebar.py +162 -0
- visidata/sort.py +11 -4
- visidata/statusbar.py +113 -104
- visidata/stored_list.py +43 -0
- visidata/stored_prop.py +38 -0
- visidata/tests/conftest.py +3 -3
- visidata/tests/test_cliptext.py +39 -0
- visidata/tests/test_commands.py +62 -7
- visidata/tests/test_edittext.py +2 -2
- visidata/tests/test_features.py +17 -0
- visidata/tests/test_menu.py +14 -0
- visidata/tests/test_path.py +13 -4
- visidata/text_source.py +53 -0
- visidata/textsheet.py +10 -3
- visidata/theme.py +44 -0
- visidata/themes/__init__.py +0 -0
- visidata/themes/ascii8.py +84 -0
- visidata/themes/asciimono.py +84 -0
- visidata/themes/light.py +17 -0
- visidata/threads.py +87 -39
- visidata/tuiwin.py +22 -0
- visidata/type_currency.py +22 -3
- visidata/type_date.py +31 -9
- visidata/type_floatsi.py +5 -1
- visidata/undo.py +17 -5
- visidata/utils.py +106 -23
- visidata/vdobj.py +28 -17
- visidata/windows.py +10 -0
- visidata/wrappers.py +9 -3
- visidata-3.0.data/data/share/applications/visidata.desktop +7 -0
- {visidata-2.11.1.data → visidata-3.0.data}/data/share/man/man1/vd.1 +301 -148
- {visidata-2.11.1.data → visidata-3.0.data}/data/share/man/man1/visidata.1 +301 -148
- visidata-3.0.data/scripts/vd2to3.vdx +9 -0
- {visidata-2.11.1.dist-info → visidata-3.0.dist-info}/METADATA +12 -8
- visidata-3.0.dist-info/RECORD +257 -0
- {visidata-2.11.1.dist-info → visidata-3.0.dist-info}/WHEEL +1 -1
- vgit/__init__.py +0 -1
- vgit/gitsheet.py +0 -164
- visidata/layout.py +0 -44
- visidata/misc.py +0 -5
- visidata-2.11.1.data/scripts/vgit +0 -9
- visidata-2.11.1.dist-info/RECORD +0 -155
- {vgit → visidata/apps/vgit}/__main__.py +0 -0
- {vgit → visidata/apps/vgit}/abort.py +0 -0
- /visidata/{repeat.py → features/repeat.py} +0 -0
- {visidata-2.11.1.data → visidata-3.0.data}/scripts/vd +0 -0
- {visidata-2.11.1.dist-info → visidata-3.0.dist-info}/LICENSE.gpl3 +0 -0
- {visidata-2.11.1.dist-info → visidata-3.0.dist-info}/entry_points.txt +0 -0
- {visidata-2.11.1.dist-info → visidata-3.0.dist-info}/top_level.txt +0 -0
visidata/fuzzymatch.py
ADDED
@@ -0,0 +1,414 @@
|
|
1
|
+
''' Fuzzy String Matching.
|
2
|
+
|
3
|
+
This module is a pretty verbatim Python port of fzf's FuzzyMatchV2
|
4
|
+
trimmed down to a basic usecase of matching ASCII strings case sensitively.
|
5
|
+
|
6
|
+
For more information check out the source, I have not bothered to copy
|
7
|
+
the introductory comment/documentation:
|
8
|
+
|
9
|
+
https://github.com/junegunn/fzf/blob/b1a0ab8086/src/algo/algo.go
|
10
|
+
|
11
|
+
'''
|
12
|
+
import collections
|
13
|
+
from dataclasses import dataclass
|
14
|
+
from enum import Enum
|
15
|
+
from visidata import VisiData, vd
|
16
|
+
|
17
|
+
# Overwrite to true to get some diagnostic visualization
|
18
|
+
DEBUG = False
|
19
|
+
|
20
|
+
scoreMatch = 16
|
21
|
+
scoreGapStart = -3
|
22
|
+
scoreGapExtension = -1
|
23
|
+
|
24
|
+
# We prefer matches at the beginning of a word, but the bonus should not be
|
25
|
+
# too great to prevent the longer acronym matches from always winning over
|
26
|
+
# shorter fuzzy matches. The bonus point here was specifically chosen that
|
27
|
+
# the bonus is cancelled when the gap between the acronyms grows over
|
28
|
+
# 8 characters, which is approximately the average length of the words found
|
29
|
+
# in web2 dictionary and my file system.
|
30
|
+
bonusBoundary = scoreMatch / 2
|
31
|
+
|
32
|
+
# Although bonus point for non-word characters is non-contextual, we need it
|
33
|
+
# for computing bonus points for consecutive chunks starting with a non-word
|
34
|
+
# character.
|
35
|
+
bonusNonWord = scoreMatch / 2
|
36
|
+
|
37
|
+
# Edge-triggered bonus for matches in camelCase words.
|
38
|
+
# Compared to word-boundary case, they don't accompany single-character gaps
|
39
|
+
# (e.g. FooBar vs. foo-bar), so we deduct bonus point accordingly.
|
40
|
+
bonusCamel123 = bonusBoundary + scoreGapExtension
|
41
|
+
|
42
|
+
# Minimum bonus point given to characters in consecutive chunks.
|
43
|
+
# Note that bonus points for consecutive matches shouldn't have needed if we
|
44
|
+
# used fixed match score as in the original algorithm.
|
45
|
+
bonusConsecutive = -(scoreGapStart + scoreGapExtension)
|
46
|
+
|
47
|
+
# The first character in the typed pattern usually has more significance
|
48
|
+
# than the rest so it's important that it appears at special positions where
|
49
|
+
# bonus points are given, e.g. 'to-go' vs. 'ongoing' on 'og' or on 'ogo'.
|
50
|
+
# The amount of the extra bonus should be limited so that the gap penalty is
|
51
|
+
# still respected.
|
52
|
+
bonusFirstCharMultiplier = 2
|
53
|
+
|
54
|
+
# Extra bonus for word boundary after whitespace character or beginning of the string
|
55
|
+
bonusBoundaryWhite = bonusBoundary + 2
|
56
|
+
|
57
|
+
# Extra bonus for word boundary after slash, colon, semi-colon, and comma
|
58
|
+
bonusBoundaryDelimiter = bonusBoundary + 1
|
59
|
+
|
60
|
+
delimiterChars = '/,:;|'
|
61
|
+
|
62
|
+
vd.theme_option('color_match', 'red', 'color for matching chars in palette chooser')
|
63
|
+
|
64
|
+
whiteChars = ' \t\n\v\f\r\x85\xA0'
|
65
|
+
|
66
|
+
(
|
67
|
+
charWhite,
|
68
|
+
charNonWord,
|
69
|
+
charDelimiter,
|
70
|
+
charLower,
|
71
|
+
charUpper,
|
72
|
+
charLetter,
|
73
|
+
charNumber,
|
74
|
+
) = range(7)
|
75
|
+
initialCharClass = charWhite
|
76
|
+
|
77
|
+
|
78
|
+
def asciiFuzzyIndex(target, pattern):
|
79
|
+
'''Return a fuzzy* starting position of the pattern,
|
80
|
+
or -1, if pattern isn't a fuzzy match.
|
81
|
+
|
82
|
+
*the position is adapted one back, if possible,
|
83
|
+
for bonus determination reasons.
|
84
|
+
'''
|
85
|
+
first_idx, idx = 0, 0
|
86
|
+
for pidx in range(len(pattern)):
|
87
|
+
idx = target.find(pattern[pidx], idx)
|
88
|
+
if idx < 0:
|
89
|
+
return -1
|
90
|
+
if pidx == 0 and idx > 0:
|
91
|
+
# Step back to find the right bonus point
|
92
|
+
first_idx = idx - 1
|
93
|
+
idx += 1
|
94
|
+
return first_idx
|
95
|
+
|
96
|
+
|
97
|
+
def charClassOfAscii(char):
|
98
|
+
if char >= 'a' and char <= 'z':
|
99
|
+
return charLower
|
100
|
+
elif char >= 'A' and char <= 'Z':
|
101
|
+
return charUpper
|
102
|
+
elif char >= '0' and char <= '9':
|
103
|
+
return charNumber
|
104
|
+
elif char in whiteChars:
|
105
|
+
return charWhite
|
106
|
+
elif char in delimiterChars:
|
107
|
+
return charDelimiter
|
108
|
+
return charNonWord
|
109
|
+
|
110
|
+
|
111
|
+
def bonusFor(prevClass, class_):
|
112
|
+
if class_ > charNonWord:
|
113
|
+
if prevClass == charWhite:
|
114
|
+
# Word boundary after whitespace
|
115
|
+
return bonusBoundaryWhite
|
116
|
+
elif prevClass == charDelimiter:
|
117
|
+
# Word boundary after a delimiter character
|
118
|
+
return bonusBoundaryDelimiter
|
119
|
+
elif prevClass == charNonWord:
|
120
|
+
# Word boundary
|
121
|
+
return bonusBoundary
|
122
|
+
if (
|
123
|
+
prevClass == charLower
|
124
|
+
and class_ == charUpper
|
125
|
+
or prevClass != charNumber
|
126
|
+
and class_ == charNumber
|
127
|
+
):
|
128
|
+
# camelCase letter123
|
129
|
+
return bonusCamel123
|
130
|
+
elif class_ == charNonWord:
|
131
|
+
return bonusNonWord
|
132
|
+
elif class_ == charWhite:
|
133
|
+
return bonusBoundaryWhite
|
134
|
+
return 0
|
135
|
+
|
136
|
+
|
137
|
+
def debugV2(T, pattern, F, lastIdx, H, C):
|
138
|
+
'''Visualize the score matrix and matching positions.'''
|
139
|
+
width = lastIdx - F[0] + 1
|
140
|
+
|
141
|
+
for i, f in enumerate(F):
|
142
|
+
I = i * width
|
143
|
+
if i == 0:
|
144
|
+
print(' ', end='')
|
145
|
+
for j in range(f, lastIdx + 1):
|
146
|
+
print(f' {T[j]} ', end='')
|
147
|
+
print()
|
148
|
+
print(pattern[i] + ' ', end='')
|
149
|
+
for idx in range(F[0], f):
|
150
|
+
print(' 0 ', end='')
|
151
|
+
for idx in range(f, lastIdx + 1):
|
152
|
+
print(f'{int(H[i*width+idx-int(F[0])]):2d} ', end='')
|
153
|
+
print()
|
154
|
+
|
155
|
+
print(' ', end='')
|
156
|
+
for idx, p in enumerate(C[I : I + width]):
|
157
|
+
if idx + int(F[0]) < int(F[i]):
|
158
|
+
p = 0
|
159
|
+
if p > 0:
|
160
|
+
print(f'{p:2d} ', end='')
|
161
|
+
else:
|
162
|
+
print(' ', end='')
|
163
|
+
print()
|
164
|
+
|
165
|
+
|
166
|
+
@dataclass
|
167
|
+
class MatchResult:
|
168
|
+
'''Represents a scored match of a fuzzymatching search.
|
169
|
+
|
170
|
+
start: starting index of where the pattern is in the target sequence
|
171
|
+
end: Similarly, the end index (exclusive)
|
172
|
+
score: A value of how good the match is.
|
173
|
+
positions: A list of indices, indexing into the target sequence.
|
174
|
+
Corresponds to every position a letter of the pattern was found
|
175
|
+
for this particular alignment.
|
176
|
+
'''
|
177
|
+
|
178
|
+
start: int
|
179
|
+
end: int
|
180
|
+
score: int
|
181
|
+
positions: 'list[int]'
|
182
|
+
|
183
|
+
|
184
|
+
def _fuzzymatch(target: str, pattern: str) -> MatchResult:
|
185
|
+
'''Fuzzy string matching algorithm.
|
186
|
+
|
187
|
+
For a target sequence, check whether (and how good) a pattern is matching.
|
188
|
+
|
189
|
+
Returns a MatchResult, which contains start and end index of the match,
|
190
|
+
a score, and the positions where the pattern occurred.
|
191
|
+
|
192
|
+
The matching is case sensitive, so it's necessary to lower input and pattern
|
193
|
+
in the caller, if preferred otherwise.
|
194
|
+
|
195
|
+
The functionality is based on fzf's FuzzyMatchV2, minus some advanced features.
|
196
|
+
'''
|
197
|
+
patternLength = len(pattern)
|
198
|
+
if patternLength == 0:
|
199
|
+
return MatchResult(0, 0, 0, [])
|
200
|
+
targetLength = len(target)
|
201
|
+
|
202
|
+
# Phase 1: Optimized search for ASCII string
|
203
|
+
idx = asciiFuzzyIndex(target, pattern)
|
204
|
+
if idx < 0:
|
205
|
+
return MatchResult(-1, -1, 0, None)
|
206
|
+
|
207
|
+
H0 = [0] * targetLength
|
208
|
+
C0 = [0] * targetLength
|
209
|
+
# Bonus point for each position
|
210
|
+
B = [0] * targetLength
|
211
|
+
# The first occurrence of each character in the pattern
|
212
|
+
F = [0] * patternLength
|
213
|
+
T = list(target)
|
214
|
+
|
215
|
+
# Phase 2: Calculate bonus for each point
|
216
|
+
maxScore, maxScorePos = 0, 0
|
217
|
+
pidx, lastIdx = 0, 0
|
218
|
+
pchar0, pchar, prevH0, prevClass, inGap = (
|
219
|
+
pattern[0],
|
220
|
+
pattern[0],
|
221
|
+
0,
|
222
|
+
initialCharClass,
|
223
|
+
False,
|
224
|
+
)
|
225
|
+
Tsub = T[idx:]
|
226
|
+
H0sub, C0sub, Bsub = H0[idx:], C0[idx:], B[idx:]
|
227
|
+
|
228
|
+
for off, char in enumerate(Tsub):
|
229
|
+
class_ = charClassOfAscii(char)
|
230
|
+
bonus = bonusFor(prevClass, class_)
|
231
|
+
Bsub[off] = bonus
|
232
|
+
prevClass = class_
|
233
|
+
|
234
|
+
if char == pchar:
|
235
|
+
if pidx < patternLength:
|
236
|
+
F[pidx] = idx + off
|
237
|
+
pidx += 1
|
238
|
+
pchar = pattern[min(pidx, patternLength - 1)]
|
239
|
+
lastIdx = idx + off
|
240
|
+
|
241
|
+
if char == pchar0:
|
242
|
+
score = scoreMatch + bonus * bonusFirstCharMultiplier
|
243
|
+
H0sub[off] = score
|
244
|
+
C0sub[off] = 1
|
245
|
+
if patternLength == 1 and (score > maxScore):
|
246
|
+
maxScore, maxScorePos = score, idx + off
|
247
|
+
if bonus >= bonusBoundary:
|
248
|
+
break
|
249
|
+
inGap = False
|
250
|
+
else:
|
251
|
+
if inGap:
|
252
|
+
H0sub[off] = max(prevH0 + scoreGapExtension, 0)
|
253
|
+
else:
|
254
|
+
H0sub[off] = max(prevH0 + scoreGapStart, 0)
|
255
|
+
C0sub[off] = 0
|
256
|
+
inGap = True
|
257
|
+
prevH0 = H0sub[off]
|
258
|
+
|
259
|
+
# write back, because slices in python are a full copy (as opposed to go)
|
260
|
+
H0[idx:], C0[idx:], B[idx:] = H0sub, C0sub, Bsub
|
261
|
+
|
262
|
+
if pidx != patternLength:
|
263
|
+
return MatchResult(-1, -1, 0, None)
|
264
|
+
if patternLength == 1:
|
265
|
+
return MatchResult(maxScorePos, maxScorePos + 1, maxScore, [maxScorePos])
|
266
|
+
|
267
|
+
# Phase 3: Fill in score matrix (H)
|
268
|
+
# do not allow omission.
|
269
|
+
f0 = F[0]
|
270
|
+
width = lastIdx - f0 + 1
|
271
|
+
H = [0] * width * patternLength
|
272
|
+
H[:width] = list(H0[f0 : lastIdx + 1])
|
273
|
+
|
274
|
+
# Possible length of consecutive chunk at each position.
|
275
|
+
C = [0] * width * patternLength
|
276
|
+
C[:width] = C0[f0 : lastIdx + 1]
|
277
|
+
|
278
|
+
Fsub = F[1:]
|
279
|
+
Psub = pattern[1:]
|
280
|
+
for off, f in enumerate(Fsub):
|
281
|
+
pchar = Psub[off]
|
282
|
+
pidx = off + 1
|
283
|
+
row = pidx * width
|
284
|
+
inGap = False
|
285
|
+
Tsub = T[f : lastIdx + 1]
|
286
|
+
Bsub = B[f:][: len(Tsub)]
|
287
|
+
H[row + f - f0 - 1] = 0
|
288
|
+
for off, char in enumerate(Tsub):
|
289
|
+
Cdiag = C[row + f - f0 - 1 - width :][: len(Tsub)]
|
290
|
+
Hleft = H[row + f - f0 - 1 :][: len(Tsub)]
|
291
|
+
Hdiag = H[row + f - f0 - 1 - width :][: len(Tsub)]
|
292
|
+
col = off + f
|
293
|
+
s1, s2, consecutive = 0, 0, 0
|
294
|
+
|
295
|
+
if inGap:
|
296
|
+
s2 = Hleft[off] + scoreGapExtension
|
297
|
+
else:
|
298
|
+
s2 = Hleft[off] + scoreGapStart
|
299
|
+
|
300
|
+
if pchar == char:
|
301
|
+
s1 = Hdiag[off] + scoreMatch
|
302
|
+
b = Bsub[off]
|
303
|
+
consecutive = Cdiag[off] + 1
|
304
|
+
if consecutive > 1:
|
305
|
+
fb = B[col - consecutive + 1]
|
306
|
+
# Break consecutive chunk
|
307
|
+
if b >= bonusBoundary and b > fb:
|
308
|
+
consecutive = 1
|
309
|
+
else:
|
310
|
+
b = max(b, max(bonusConsecutive, fb))
|
311
|
+
if s1 + b < s2:
|
312
|
+
s1 += Bsub[off]
|
313
|
+
consecutive = 0
|
314
|
+
else:
|
315
|
+
s1 += b
|
316
|
+
C[row + f - f0 + off] = consecutive
|
317
|
+
|
318
|
+
inGap = s1 < s2
|
319
|
+
score = max(max(s1, s2), 0)
|
320
|
+
if pidx == patternLength - 1 and score > maxScore:
|
321
|
+
maxScore, maxScorePos = score, col
|
322
|
+
H[row + f - f0 + off] = score
|
323
|
+
|
324
|
+
if DEBUG:
|
325
|
+
debugV2(T, pattern, F, lastIdx, H, C)
|
326
|
+
|
327
|
+
# Phase 4. (Optional) Backtrace to find character positions
|
328
|
+
pos = []
|
329
|
+
i = patternLength - 1
|
330
|
+
j = maxScorePos
|
331
|
+
preferMatch = True
|
332
|
+
while True:
|
333
|
+
I = i * width
|
334
|
+
j0 = j - f0
|
335
|
+
s = H[I + j0]
|
336
|
+
|
337
|
+
s1, s2 = 0, 0
|
338
|
+
if i > 0 and j >= int(F[i]):
|
339
|
+
s1 = H[I - width + j0 - 1]
|
340
|
+
if j > int(F[i]):
|
341
|
+
s2 = H[I + j0 - 1]
|
342
|
+
|
343
|
+
if s > s1 and (s > s2 or s == s2 and preferMatch):
|
344
|
+
pos.append(j)
|
345
|
+
if i == 0:
|
346
|
+
break
|
347
|
+
i -= 1
|
348
|
+
preferMatch = (
|
349
|
+
C[I + j0] > 1 or I + width + j0 + 1 < len(C) and C[I + width + j0 + 1] > 0
|
350
|
+
)
|
351
|
+
j -= 1
|
352
|
+
|
353
|
+
# Start offset we return here is only relevant when begin tiebreak is used.
|
354
|
+
# However finding the accurate offset requires backtracking, and we don't
|
355
|
+
# want to pay extra cost for the option that has lost its importance.
|
356
|
+
return MatchResult(j, maxScorePos + 1, int(maxScore), pos)
|
357
|
+
|
358
|
+
|
359
|
+
def _format_match(s, positions):
|
360
|
+
out = list(s)
|
361
|
+
for p in positions:
|
362
|
+
out[p] = f'[:match]{out[p]}[/]'
|
363
|
+
return "".join(out)
|
364
|
+
|
365
|
+
CombinedMatch = collections.namedtuple('CombinedMatch', 'score formatted match')
|
366
|
+
|
367
|
+
|
368
|
+
@VisiData.api
|
369
|
+
def fuzzymatch(vd, haystack:"list[dict[str, str]]", needles:"list[str]) -> list[CombinedMatch]"):
|
370
|
+
'Return sorted list of matching dict values in haystack, augmenting the input dicts with _score:int and _positions:dict[k,set[int]] where k is each non-_ key in the haystack dict.'
|
371
|
+
|
372
|
+
matches = []
|
373
|
+
for h in haystack:
|
374
|
+
match = {}
|
375
|
+
formatted_hay = {}
|
376
|
+
for k, v in h.items():
|
377
|
+
for p in needles:
|
378
|
+
mr = _fuzzymatch(v, p)
|
379
|
+
if mr.score > 0:
|
380
|
+
match[k] = mr
|
381
|
+
formatted_hay[k] = _format_match(v, mr.positions)
|
382
|
+
|
383
|
+
if match:
|
384
|
+
# square to prefer larger scores in a single haystack
|
385
|
+
score = int(sum(mr.score**2 for mr in match.values()))
|
386
|
+
matches.append(CombinedMatch(score=score, formatted=formatted_hay, match=h))
|
387
|
+
|
388
|
+
return sorted(matches, key=lambda m: -m.score)
|
389
|
+
|
390
|
+
|
391
|
+
@VisiData.api
|
392
|
+
def test_fuzzymatch(vd):
|
393
|
+
assert asciiFuzzyIndex('helo', 'h') == 0
|
394
|
+
assert asciiFuzzyIndex('helo', 'hlo') == 0
|
395
|
+
assert asciiFuzzyIndex('helo', 'e') == 0
|
396
|
+
assert asciiFuzzyIndex('helo', 'el') == 0
|
397
|
+
assert asciiFuzzyIndex('helo', 'eo') == 0
|
398
|
+
assert asciiFuzzyIndex('helo', 'l') == 1
|
399
|
+
assert asciiFuzzyIndex('helo', 'lo') == 1
|
400
|
+
assert asciiFuzzyIndex('helo', 'o') == 2
|
401
|
+
assert asciiFuzzyIndex('helo', 'ooh') == -1
|
402
|
+
|
403
|
+
assert charClassOfAscii('a') == charLower
|
404
|
+
assert charClassOfAscii('C') == charUpper
|
405
|
+
assert charClassOfAscii('2') == charNumber
|
406
|
+
assert charClassOfAscii(' ') == charWhite
|
407
|
+
assert charClassOfAscii(',') == charDelimiter
|
408
|
+
|
409
|
+
assert _fuzzymatch('hello', '') == MatchResult(0, 0, 0, [])
|
410
|
+
assert _fuzzymatch('hello', 'nono') == MatchResult(-1, -1, 0, None)
|
411
|
+
assert _fuzzymatch('hello', 'l') == MatchResult(2, 3, 16, [2])
|
412
|
+
assert _fuzzymatch('hello world', 'elo wo') == MatchResult(
|
413
|
+
1, 8, 127, [7, 6, 5, 4, 2, 1]
|
414
|
+
)
|
visidata/graph.py
CHANGED
@@ -1,6 +1,10 @@
|
|
1
|
-
|
1
|
+
import math
|
2
2
|
|
3
|
-
|
3
|
+
from visidata import VisiData, Canvas, Sheet, Progress, BoundingBox, Point
|
4
|
+
from visidata import vd, asyncthread, dispwidth, colors, clipstr
|
5
|
+
|
6
|
+
vd.theme_option('color_graph_axis', 'bold', 'color for graph axis labels')
|
7
|
+
vd.theme_option('disp_graph_tick_x', '╵', 'character for graph x-axis ticks')
|
4
8
|
|
5
9
|
|
6
10
|
@VisiData.api
|
@@ -9,37 +13,67 @@ def numericCols(vd, cols):
|
|
9
13
|
|
10
14
|
|
11
15
|
class InvertedCanvas(Canvas):
|
16
|
+
@asyncthread
|
17
|
+
def render_async(self):
|
18
|
+
self.plot_elements(invert_y=True)
|
19
|
+
|
20
|
+
def fixPoint(self, plotterPoint, canvasPoint):
|
21
|
+
'adjust visibleBox.xymin so that canvasPoint is plotted at plotterPoint'
|
22
|
+
self.visibleBox.xmin = canvasPoint.x - self.canvasW(plotterPoint.x-self.plotviewBox.xmin)
|
23
|
+
self.visibleBox.ymin = canvasPoint.y - self.canvasH(self.plotviewBox.ymax-plotterPoint.y)
|
24
|
+
self.refresh()
|
25
|
+
|
12
26
|
def zoomTo(self, bbox):
|
13
27
|
super().zoomTo(bbox)
|
14
|
-
self.fixPoint(Point(self.plotviewBox.xmin, self.plotviewBox.
|
15
|
-
|
16
|
-
def plotpixel(self, x, y, attr, row=None):
|
17
|
-
y = self.plotviewBox.ymax-y
|
18
|
-
self.pixels[y][x][attr].append(row)
|
28
|
+
self.fixPoint(Point(self.plotviewBox.xmin, self.plotviewBox.ymin),
|
29
|
+
Point(bbox.xmin, bbox.ymax + 1/4*self.canvasCharHeight))
|
19
30
|
|
20
31
|
def scaleY(self, canvasY):
|
21
|
-
'returns plotter y coordinate, with y
|
22
|
-
|
23
|
-
return (self.plotviewBox.ymax-plotterY+4)
|
32
|
+
'returns a plotter y coordinate for a canvas y coordinate, with the y direction inverted'
|
33
|
+
return self.plotviewBox.ymax-round((canvasY-self.visibleBox.ymin)*self.yScaler)
|
24
34
|
|
25
|
-
def
|
26
|
-
|
35
|
+
def unscaleY(self, plotterY_inverted):
|
36
|
+
'performs the inverse of scaleY, returns a canvas y coordinate'
|
37
|
+
return (self.plotviewBox.ymax-plotterY_inverted)/self.yScaler + self.visibleBox.ymin
|
27
38
|
|
28
39
|
@property
|
29
40
|
def canvasMouse(self):
|
30
41
|
p = super().canvasMouse
|
31
|
-
|
42
|
+
if not p: return None
|
43
|
+
p.y = self.unscaleY(self.plotterMouse.y)
|
32
44
|
return p
|
33
45
|
|
46
|
+
def calcTopCursorY(self):
|
47
|
+
'ymin for the cursor that will align its top with the top edge of the graph'
|
48
|
+
return self.visibleBox.ymax - self.cursorBox.h
|
49
|
+
|
50
|
+
def calcBottomCursorY(self):
|
51
|
+
# Shift by 1 plotter pixel, like with goTopCursorY for Canvas. But shift in the
|
52
|
+
# opposite direction, because the y-coordinate system is inverted.
|
53
|
+
'ymin for the cursor that will align its bottom with the bottom edge of the graph'
|
54
|
+
return self.visibleBox.ymin - (1/4 * self.canvasCharHeight)
|
55
|
+
|
56
|
+
def startCursor(self):
|
57
|
+
res = super().startCursor()
|
58
|
+
if not res: return None
|
59
|
+
# Since the y coordinates for plotting increase in the opposite
|
60
|
+
# direction from Canvas, the cursor has to be shifted.
|
61
|
+
self.cursorBox.ymin -= self.canvasCharHeight
|
34
62
|
|
35
63
|
# provides axis labels, legend
|
36
64
|
class GraphSheet(InvertedCanvas):
|
37
65
|
def __init__(self, *names, **kwargs):
|
66
|
+
self.ylabel_maxw = 0
|
38
67
|
super().__init__(*names, **kwargs)
|
39
68
|
|
40
69
|
vd.numericCols(self.xcols) or vd.fail('at least one numeric key col necessary for x-axis')
|
41
70
|
self.ycols or vd.fail('%s is non-numeric' % '/'.join(yc.name for yc in kwargs.get('ycols')))
|
42
71
|
|
72
|
+
def resetCanvasDimensions(self, windowHeight, windowWidth):
|
73
|
+
if self.left_margin < self.ylabel_maxw:
|
74
|
+
self.left_margin = self.ylabel_maxw
|
75
|
+
super().resetCanvasDimensions(windowHeight, windowWidth)
|
76
|
+
|
43
77
|
@asyncthread
|
44
78
|
def reload(self):
|
45
79
|
nerrors = 0
|
@@ -62,10 +96,10 @@ class GraphSheet(InvertedCanvas):
|
|
62
96
|
attr = self.plotColor(k)
|
63
97
|
self.point(graph_x, graph_y, attr, row)
|
64
98
|
nplotted += 1
|
65
|
-
except Exception:
|
99
|
+
except Exception as e:
|
66
100
|
nerrors += 1
|
67
|
-
if options.debug:
|
68
|
-
|
101
|
+
if vd.options.debug:
|
102
|
+
vd.exceptionCaught(e)
|
69
103
|
|
70
104
|
|
71
105
|
vd.status('loaded %d points (%d errors)' % (nplotted, nerrors))
|
@@ -97,6 +131,26 @@ class GraphSheet(InvertedCanvas):
|
|
97
131
|
srccol = self.ycols[0]
|
98
132
|
return srccol.format(srccol.type(amt))
|
99
133
|
|
134
|
+
def formatXLabel(self, amt):
|
135
|
+
if self.xzoomlevel < 1:
|
136
|
+
labels = []
|
137
|
+
for xcol in self.xcols:
|
138
|
+
if vd.isNumeric(xcol):
|
139
|
+
col_amt = float(amt) if xcol.type is int else xcol.type(amt)
|
140
|
+
else:
|
141
|
+
continue
|
142
|
+
labels.append(xcol.format(col_amt))
|
143
|
+
return ','.join(labels)
|
144
|
+
else:
|
145
|
+
return self.formatX(amt)
|
146
|
+
|
147
|
+
def formatYLabel(self, amt):
|
148
|
+
srccol = self.ycols[0]
|
149
|
+
if srccol.type is int and self.yzoomlevel < 1:
|
150
|
+
return srccol.format(float(amt))
|
151
|
+
else:
|
152
|
+
return self.formatY(amt)
|
153
|
+
|
100
154
|
def parseX(self, txt):
|
101
155
|
return self.xcols[0].type(txt)
|
102
156
|
|
@@ -104,26 +158,39 @@ class GraphSheet(InvertedCanvas):
|
|
104
158
|
return self.ycols[0].type(txt)
|
105
159
|
|
106
160
|
def add_y_axis_label(self, frac):
|
107
|
-
txt = self.
|
161
|
+
txt = self.formatYLabel(self.visibleBox.ymin + frac*self.visibleBox.h)
|
162
|
+
w = (dispwidth(txt)+1)*2
|
163
|
+
if self.ylabel_maxw < w:
|
164
|
+
self.ylabel_maxw = w
|
108
165
|
|
109
166
|
# plot y-axis labels on the far left of the canvas, but within the plotview height-wise
|
110
|
-
|
111
|
-
self.plotlabel(0, self.plotviewBox.ymin + (1.0-frac)*self.plotviewBox.h, txt, attr)
|
167
|
+
self.plotlabel(0, self.plotviewBox.ymin + (1.0-frac)*self.plotviewBox.h, txt, 'graph_axis')
|
112
168
|
|
113
169
|
def add_x_axis_label(self, frac):
|
114
|
-
txt = self.
|
170
|
+
txt = self.formatXLabel(self.visibleBox.xmin + frac*self.visibleBox.w)
|
171
|
+
tick = vd.options.disp_graph_tick_x or ''
|
115
172
|
|
116
173
|
# plot x-axis labels below the plotviewBox.ymax, but within the plotview width-wise
|
117
|
-
|
118
|
-
|
119
|
-
if frac
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
174
|
+
x = self.plotviewBox.xmin + frac*self.plotviewBox.w
|
175
|
+
|
176
|
+
if frac < 1.0:
|
177
|
+
txt = tick + txt
|
178
|
+
else:
|
179
|
+
right_margin = self.plotwidth - 1 - self.plotviewBox.xmax
|
180
|
+
if (len(txt)+len(tick))*2 <= right_margin:
|
181
|
+
txt = tick + txt
|
182
|
+
else:
|
183
|
+
# shift rightmost label to be left of its tick
|
184
|
+
x -= len(txt)*2
|
185
|
+
if len(tick) == 0:
|
186
|
+
x += 1
|
187
|
+
txt = txt + tick
|
188
|
+
|
189
|
+
self.plotlabel(x, self.plotviewBox.ymax+4, txt, 'graph_axis')
|
124
190
|
|
125
191
|
def createLabels(self):
|
126
192
|
self.gridlabels = []
|
193
|
+
self.ylabel_maxw = self.leftMarginPixels
|
127
194
|
|
128
195
|
# y-axis
|
129
196
|
self.add_y_axis_label(1.00)
|
@@ -143,18 +210,18 @@ class GraphSheet(InvertedCanvas):
|
|
143
210
|
# TODO: grid lines corresponding to axis labels
|
144
211
|
|
145
212
|
xname = ','.join(xcol.name for xcol in self.xcols if vd.isNumeric(xcol)) or 'row#'
|
146
|
-
xname, _ = clipstr(xname, self.
|
147
|
-
self.plotlabel(0, self.plotviewBox.ymax+4, xname+'»',
|
213
|
+
xname, _ = clipstr(xname, self.left_margin//2-2)
|
214
|
+
self.plotlabel(0, self.plotviewBox.ymax+4, xname+'»', 'graph_axis')
|
148
215
|
|
149
216
|
|
150
217
|
Sheet.addCommand('.', 'plot-column', 'vd.push(GraphSheet(sheet.name, "graph", source=sheet, sourceRows=rows, xcols=keyCols, ycols=numericCols([cursorCol])))', 'plot current numeric column vs key columns; numeric key column is used for x-axis, while categorical key columns determine color')
|
151
218
|
Sheet.addCommand('g.', 'plot-numerics', 'vd.push(GraphSheet(sheet.name, "graph", source=sheet, sourceRows=rows, xcols=keyCols, ycols=numericCols(nonKeyVisibleCols)))', 'plot a graph of all visible numeric columns vs key columns')
|
152
219
|
|
153
220
|
# swap directions of up/down
|
154
|
-
InvertedCanvas.addCommand(None, 'go-up', 'sheet.cursorBox.ymin += cursorBox.h', 'move cursor up by its height')
|
155
|
-
InvertedCanvas.addCommand(None, 'go-down', 'sheet.cursorBox.ymin -= cursorBox.h', 'move cursor down by its height')
|
156
|
-
InvertedCanvas.addCommand(None, 'go-top',
|
157
|
-
InvertedCanvas.addCommand(None, 'go-bottom', 'sheet.cursorBox.ymin =
|
221
|
+
InvertedCanvas.addCommand(None, 'go-up', 'if cursorBox: sheet.cursorBox.ymin += cursorBox.h', 'move cursor up by its height')
|
222
|
+
InvertedCanvas.addCommand(None, 'go-down', 'if cursorBox: sheet.cursorBox.ymin -= cursorBox.h', 'move cursor down by its height')
|
223
|
+
InvertedCanvas.addCommand(None, 'go-top', 'if cursorBox: sheet.cursorBox.ymin = sheet.calcTopCursorY()', 'move cursor to top edge of visible canvas')
|
224
|
+
InvertedCanvas.addCommand(None, 'go-bottom', 'if cursorBox: sheet.cursorBox.ymin = sheet.calcBottomCursorY()', 'move cursor to bottom edge of visible canvas')
|
158
225
|
InvertedCanvas.addCommand(None, 'go-pagedown', 't=(visibleBox.ymax-visibleBox.ymin); sheet.cursorBox.ymin -= t; sheet.visibleBox.ymin -= t; sheet.refresh()', 'move cursor down to next visible page')
|
159
226
|
InvertedCanvas.addCommand(None, 'go-pageup', 't=(visibleBox.ymax-visibleBox.ymin); sheet.cursorBox.ymin += t; sheet.visibleBox.ymin += t; sheet.refresh()', 'move cursor up to previous visible page')
|
160
227
|
|
@@ -185,3 +252,8 @@ vd.addGlobals({
|
|
185
252
|
'GraphSheet': GraphSheet,
|
186
253
|
'InvertedCanvas': InvertedCanvas,
|
187
254
|
})
|
255
|
+
|
256
|
+
vd.addMenuItems('''
|
257
|
+
Plot > Graph > current column > plot-column
|
258
|
+
Plot > Graph > all numeric columns > plot-numerics
|
259
|
+
''')
|