upsetplot-bombcell 0.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- doc/conf.py +289 -0
- examples/plot_customize_after_plot.py +22 -0
- examples/plot_diabetes.py +77 -0
- examples/plot_discrete.py +39 -0
- examples/plot_generated.py +52 -0
- examples/plot_hide.py +42 -0
- examples/plot_highlight.py +75 -0
- examples/plot_highlight_categories.py +41 -0
- examples/plot_missingness.py +23 -0
- examples/plot_sizing.py +49 -0
- examples/plot_theming.py +82 -0
- examples/plot_vertical.py +28 -0
- upsetplot/__init__.py +24 -0
- upsetplot/data.py +420 -0
- upsetplot/plotting.py +1158 -0
- upsetplot/reformat.py +440 -0
- upsetplot/tests/__init__.py +0 -0
- upsetplot/tests/test_data.py +238 -0
- upsetplot/tests/test_examples.py +19 -0
- upsetplot/tests/test_reformat.py +47 -0
- upsetplot/tests/test_upsetplot.py +1234 -0
- upsetplot/util.py +70 -0
- upsetplot_bombcell-0.10.0.dist-info/LICENSE +30 -0
- upsetplot_bombcell-0.10.0.dist-info/METADATA +220 -0
- upsetplot_bombcell-0.10.0.dist-info/RECORD +27 -0
- upsetplot_bombcell-0.10.0.dist-info/WHEEL +5 -0
- upsetplot_bombcell-0.10.0.dist-info/top_level.txt +3 -0
doc/conf.py
ADDED
|
@@ -0,0 +1,289 @@
|
|
|
1
|
+
# project-template documentation build configuration file, created by
|
|
2
|
+
# sphinx-quickstart on Mon Jan 18 14:44:12 2016.
|
|
3
|
+
#
|
|
4
|
+
# This file is execfile()d with the current directory set to its
|
|
5
|
+
# containing dir.
|
|
6
|
+
#
|
|
7
|
+
# Note that not all possible configuration values are present in this
|
|
8
|
+
# autogenerated file.
|
|
9
|
+
#
|
|
10
|
+
# All configuration values have a default; values that are commented out
|
|
11
|
+
# serve to show the default.
|
|
12
|
+
|
|
13
|
+
import os
|
|
14
|
+
import re
|
|
15
|
+
import sys
|
|
16
|
+
import warnings
|
|
17
|
+
|
|
18
|
+
# project root
|
|
19
|
+
sys.path.insert(0, os.path.abspath(".."))
|
|
20
|
+
|
|
21
|
+
import matplotlib # noqa
|
|
22
|
+
|
|
23
|
+
matplotlib.use("agg")
|
|
24
|
+
warnings.filterwarnings(
|
|
25
|
+
"ignore",
|
|
26
|
+
category=UserWarning,
|
|
27
|
+
message="Matplotlib is currently using agg, which is a"
|
|
28
|
+
" non-GUI backend, so cannot show the figure."
|
|
29
|
+
"|(\n|.)*is non-interactive, and thus cannot be shown",
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
import sphinx_rtd_theme # noqa
|
|
33
|
+
from sphinx_gallery.sorting import ExampleTitleSortKey # noqa
|
|
34
|
+
from upsetplot import __version__ as release # noqa
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
# If extensions (or modules to document with autodoc) are in another directory,
|
|
38
|
+
# add these directories to sys.path here. If the directory is relative to the
|
|
39
|
+
# documentation root, use os.path.abspath to make it absolute, like shown here.
|
|
40
|
+
|
|
41
|
+
# -- General configuration ---------------------------------------------------
|
|
42
|
+
|
|
43
|
+
# If your documentation needs a minimal Sphinx version, state it here.
|
|
44
|
+
# needs_sphinx = '1.0'
|
|
45
|
+
|
|
46
|
+
# Add any Sphinx extension module names here, as strings. They can be
|
|
47
|
+
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
|
|
48
|
+
# ones.
|
|
49
|
+
extensions = [
|
|
50
|
+
"sphinx_gallery.gen_gallery",
|
|
51
|
+
"sphinx.ext.autodoc",
|
|
52
|
+
"sphinx.ext.autosummary",
|
|
53
|
+
"sphinx.ext.doctest",
|
|
54
|
+
"sphinx.ext.intersphinx",
|
|
55
|
+
"sphinx.ext.todo",
|
|
56
|
+
"numpydoc",
|
|
57
|
+
"sphinx.ext.ifconfig",
|
|
58
|
+
"sphinx.ext.viewcode",
|
|
59
|
+
"sphinx_issues",
|
|
60
|
+
"nbsphinx",
|
|
61
|
+
]
|
|
62
|
+
|
|
63
|
+
# Add any paths that contain templates here, relative to this directory.
|
|
64
|
+
templates_path = ["_templates"]
|
|
65
|
+
|
|
66
|
+
# The suffix of source filenames.
|
|
67
|
+
source_suffix = ".rst"
|
|
68
|
+
|
|
69
|
+
# The encoding of source files.
|
|
70
|
+
# source_encoding = 'utf-8-sig'
|
|
71
|
+
|
|
72
|
+
# The master toctree document.
|
|
73
|
+
master_doc = "index"
|
|
74
|
+
|
|
75
|
+
# General information about the project.
|
|
76
|
+
project = "upsetplot"
|
|
77
|
+
copyright = "2018-2024, Joel Nothman"
|
|
78
|
+
|
|
79
|
+
# The version info for the project you're documenting, acts as replacement for
|
|
80
|
+
# |version| and |release|, also used in various other places throughout the
|
|
81
|
+
# built documents.
|
|
82
|
+
#
|
|
83
|
+
# The short X.Y version.
|
|
84
|
+
|
|
85
|
+
version = re.match(r"^\d+(\.\d+)*", release).group()
|
|
86
|
+
|
|
87
|
+
# version = upsetplot.__version__
|
|
88
|
+
# The full version, including alpha/beta/rc tags.
|
|
89
|
+
# release = version
|
|
90
|
+
|
|
91
|
+
# The language for content autogenerated by Sphinx. Refer to documentation
|
|
92
|
+
# for a list of supported languages.
|
|
93
|
+
# language = None
|
|
94
|
+
|
|
95
|
+
# There are two options for replacing |today|: either, you set today to some
|
|
96
|
+
# non-false value, then it is used:
|
|
97
|
+
# today = ''
|
|
98
|
+
# Else, today_fmt is used as the format for a strftime call.
|
|
99
|
+
# today_fmt = '%B %d, %Y'
|
|
100
|
+
|
|
101
|
+
# List of patterns, relative to source directory, that match files and
|
|
102
|
+
# directories to ignore when looking for source files.
|
|
103
|
+
exclude_patterns = ["_build"]
|
|
104
|
+
|
|
105
|
+
# The reST default role (used for this markup: `text`) to use for all
|
|
106
|
+
# documents.
|
|
107
|
+
default_role = "any"
|
|
108
|
+
|
|
109
|
+
# If true, '()' will be appended to :func: etc. cross-reference text.
|
|
110
|
+
# add_function_parentheses = True
|
|
111
|
+
|
|
112
|
+
# If true, the current module name will be prepended to all description
|
|
113
|
+
# unit titles (such as .. function::).
|
|
114
|
+
# add_module_names = True
|
|
115
|
+
|
|
116
|
+
# If true, sectionauthor and moduleauthor directives will be shown in the
|
|
117
|
+
# output. They are ignored by default.
|
|
118
|
+
# show_authors = False
|
|
119
|
+
|
|
120
|
+
# The name of the Pygments (syntax highlighting) style to use.
|
|
121
|
+
pygments_style = "sphinx"
|
|
122
|
+
|
|
123
|
+
# A list of ignored prefixes for module index sorting.
|
|
124
|
+
# modindex_common_prefix = []
|
|
125
|
+
|
|
126
|
+
# If true, keep warnings as "system message" paragraphs in the built documents.
|
|
127
|
+
# keep_warnings = False
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
# -- Options for HTML output ----------------------------------------------
|
|
131
|
+
|
|
132
|
+
# The theme to use for HTML and HTML Help pages. See the documentation for
|
|
133
|
+
# a list of builtin themes.
|
|
134
|
+
html_theme = "sphinx_rtd_theme"
|
|
135
|
+
|
|
136
|
+
# Theme options are theme-specific and customize the look and feel of a theme
|
|
137
|
+
# further. For a list of options available for each theme, see the
|
|
138
|
+
# documentation.
|
|
139
|
+
# html_theme_options = {}
|
|
140
|
+
|
|
141
|
+
# Add any paths that contain custom themes here, relative to this directory.
|
|
142
|
+
html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
|
|
143
|
+
|
|
144
|
+
# The name for this set of Sphinx documents. If None, it defaults to
|
|
145
|
+
# "<project> v<release> documentation".
|
|
146
|
+
# html_title = None
|
|
147
|
+
|
|
148
|
+
# A shorter title for the navigation bar. Default is the same as html_title.
|
|
149
|
+
# html_short_title = None
|
|
150
|
+
|
|
151
|
+
# The name of an image file (relative to this directory) to place at the top
|
|
152
|
+
# of the sidebar.
|
|
153
|
+
# html_logo = None
|
|
154
|
+
|
|
155
|
+
# The name of an image file (within the static path) to use as favicon of the
|
|
156
|
+
# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
|
|
157
|
+
# pixels large.
|
|
158
|
+
# html_favicon = None
|
|
159
|
+
|
|
160
|
+
# Add any paths that contain custom static files (such as style sheets) here,
|
|
161
|
+
# relative to this directory. They are copied after the builtin static files,
|
|
162
|
+
# so a file named "default.css" will overwrite the builtin "default.css".
|
|
163
|
+
html_static_path = ["_static"]
|
|
164
|
+
|
|
165
|
+
# Add any extra paths that contain custom files (such as robots.txt or
|
|
166
|
+
# .htaccess) here, relative to this directory. These files are copied
|
|
167
|
+
# directly to the root of the documentation.
|
|
168
|
+
# html_extra_path = []
|
|
169
|
+
|
|
170
|
+
# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
|
|
171
|
+
# using the given strftime format.
|
|
172
|
+
# html_last_updated_fmt = '%b %d, %Y'
|
|
173
|
+
|
|
174
|
+
# If true, SmartyPants will be used to convert quotes and dashes to
|
|
175
|
+
# typographically correct entities.
|
|
176
|
+
# html_use_smartypants = True
|
|
177
|
+
|
|
178
|
+
# Custom sidebar templates, maps document names to template names.
|
|
179
|
+
# html_sidebars = {}
|
|
180
|
+
|
|
181
|
+
# Additional templates that should be rendered to pages, maps page names to
|
|
182
|
+
# template names.
|
|
183
|
+
# html_additional_pages = {}
|
|
184
|
+
|
|
185
|
+
# If false, no module index is generated.
|
|
186
|
+
# html_domain_indices = True
|
|
187
|
+
|
|
188
|
+
# If false, no index is generated.
|
|
189
|
+
# html_use_index = True
|
|
190
|
+
|
|
191
|
+
# If true, the index is split into individual pages for each letter.
|
|
192
|
+
# html_split_index = False
|
|
193
|
+
|
|
194
|
+
# If true, links to the reST sources are added to the pages.
|
|
195
|
+
# html_show_sourcelink = True
|
|
196
|
+
|
|
197
|
+
# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
|
|
198
|
+
# html_show_sphinx = True
|
|
199
|
+
|
|
200
|
+
# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
|
|
201
|
+
# html_show_copyright = True
|
|
202
|
+
|
|
203
|
+
# If true, an OpenSearch description file will be output, and all pages will
|
|
204
|
+
# contain a <link> tag referring to it. The value of this option must be the
|
|
205
|
+
# base URL from which the finished HTML is served.
|
|
206
|
+
# html_use_opensearch = ''
|
|
207
|
+
|
|
208
|
+
# This is the file name suffix for HTML files (e.g. ".xhtml").
|
|
209
|
+
# html_file_suffix = None
|
|
210
|
+
|
|
211
|
+
# Output file base name for HTML help builder.
|
|
212
|
+
htmlhelp_basename = "project-templatedoc"
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
# -- Options for LaTeX output ---------------------------------------------
|
|
216
|
+
|
|
217
|
+
latex_elements = {
|
|
218
|
+
# The paper size ('letterpaper' or 'a4paper').
|
|
219
|
+
# 'papersize': 'letterpaper',
|
|
220
|
+
# The font size ('10pt', '11pt' or '12pt').
|
|
221
|
+
# 'pointsize': '10pt',
|
|
222
|
+
# Additional stuff for the LaTeX preamble.
|
|
223
|
+
# 'preamble': '',
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
# Grouping the document tree into LaTeX files. List of tuples
|
|
227
|
+
# (source start file, target name, title,
|
|
228
|
+
# author, documentclass [howto, manual, or own class]).
|
|
229
|
+
latex_documents = [
|
|
230
|
+
("index", "upsetplot.tex", "upsetplot Documentation", "Joel Nothman", "manual"),
|
|
231
|
+
]
|
|
232
|
+
|
|
233
|
+
# The name of an image file (relative to this directory) to place at the top of
|
|
234
|
+
# the title page.
|
|
235
|
+
# latex_logo = None
|
|
236
|
+
|
|
237
|
+
# For "manual" documents, if this is true, then toplevel headings are parts,
|
|
238
|
+
# not chapters.
|
|
239
|
+
# latex_use_parts = False
|
|
240
|
+
|
|
241
|
+
# If true, show page references after internal links.
|
|
242
|
+
# latex_show_pagerefs = False
|
|
243
|
+
|
|
244
|
+
# If true, show URL addresses after external links.
|
|
245
|
+
# latex_show_urls = False
|
|
246
|
+
|
|
247
|
+
# Documents to append as an appendix to all manuals.
|
|
248
|
+
# latex_appendices = []
|
|
249
|
+
|
|
250
|
+
# If false, no module index is generated.
|
|
251
|
+
# latex_domain_indices = True
|
|
252
|
+
|
|
253
|
+
# Documents to append as an appendix to all manuals.
|
|
254
|
+
# texinfo_appendices = []
|
|
255
|
+
|
|
256
|
+
# If false, no module index is generated.
|
|
257
|
+
# texinfo_domain_indices = True
|
|
258
|
+
|
|
259
|
+
# How to display URL addresses: 'footnote', 'no', or 'inline'.
|
|
260
|
+
# texinfo_show_urls = 'footnote'
|
|
261
|
+
|
|
262
|
+
# If true, do not generate a @detailmenu in the "Top" node's menu.
|
|
263
|
+
# texinfo_no_detailmenu = False
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
# Example configuration for intersphinx: refer to the Python standard library.
|
|
267
|
+
intersphinx_mapping = {
|
|
268
|
+
"python": ("http://docs.python.org/", None),
|
|
269
|
+
"numpy": ("https://docs.scipy.org/doc/numpy/", None),
|
|
270
|
+
"matplotlib": ("https://matplotlib.org/", None),
|
|
271
|
+
"pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None),
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
# Config for sphinx_issues
|
|
276
|
+
|
|
277
|
+
issues_uri = "https://github.com/jnothman/upsetplot/issues/{issue}"
|
|
278
|
+
issues_github_path = "jnothman/upsetplot"
|
|
279
|
+
issues_user_uri = "https://github.com/{user}"
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
sphinx_gallery_conf = {
|
|
283
|
+
# path to your examples scripts
|
|
284
|
+
"examples_dirs": "../examples",
|
|
285
|
+
# path where to save gallery generated examples
|
|
286
|
+
"gallery_dirs": "auto_examples",
|
|
287
|
+
"backreferences_dir": "_modules",
|
|
288
|
+
"within_subsection_order": ExampleTitleSortKey,
|
|
289
|
+
}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
"""
|
|
2
|
+
===============================
|
|
3
|
+
Design: Customizing axis labels
|
|
4
|
+
===============================
|
|
5
|
+
|
|
6
|
+
This example illustrates how the return value of the plot method can be used
|
|
7
|
+
to customize aspects of the plot, such as axis labels, legend position, etc.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from matplotlib import pyplot as plt
|
|
11
|
+
|
|
12
|
+
from upsetplot import generate_counts, plot
|
|
13
|
+
|
|
14
|
+
example = generate_counts()
|
|
15
|
+
print(example)
|
|
16
|
+
|
|
17
|
+
##########################################################################
|
|
18
|
+
|
|
19
|
+
plot_result = plot(example)
|
|
20
|
+
plot_result["intersections"].set_ylabel("Subset size")
|
|
21
|
+
plot_result["totals"].set_xlabel("Category size")
|
|
22
|
+
plt.show()
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
"""
|
|
2
|
+
==========================================
|
|
3
|
+
Data Vis: Feature distribution in Diabetes
|
|
4
|
+
==========================================
|
|
5
|
+
|
|
6
|
+
Explore above-average attributes in the Diabetes dataset (Efron et al, 2004).
|
|
7
|
+
|
|
8
|
+
Here we take some features correlated with disease progression, and look at the
|
|
9
|
+
distribution of that disease progression value when each of these features is
|
|
10
|
+
above average.
|
|
11
|
+
|
|
12
|
+
The most correlated features are:
|
|
13
|
+
|
|
14
|
+
- bmi body mass index
|
|
15
|
+
- bp average blood pressure
|
|
16
|
+
- s4 tch, total cholesterol / HDL
|
|
17
|
+
- s5 ltg, possibly log of serum triglycerides level
|
|
18
|
+
- s6 glu, blood sugar level
|
|
19
|
+
|
|
20
|
+
This kind of dataset analysis may not be a practical use of UpSet, but helps
|
|
21
|
+
to illustrate the :meth:`UpSet.add_catplot` feature.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
import pandas as pd
|
|
25
|
+
from matplotlib import pyplot as plt
|
|
26
|
+
from sklearn.datasets import load_diabetes
|
|
27
|
+
|
|
28
|
+
from upsetplot import UpSet
|
|
29
|
+
|
|
30
|
+
# Load the dataset into a DataFrame
|
|
31
|
+
diabetes = load_diabetes()
|
|
32
|
+
diabetes_df = pd.DataFrame(diabetes.data, columns=diabetes.feature_names)
|
|
33
|
+
|
|
34
|
+
# Get five features most correlated with median house value
|
|
35
|
+
correls = diabetes_df.corrwith(
|
|
36
|
+
pd.Series(diabetes.target), method="spearman"
|
|
37
|
+
).sort_values()
|
|
38
|
+
top_features = correls.index[-5:]
|
|
39
|
+
|
|
40
|
+
# Get a binary indicator of whether each top feature is above average
|
|
41
|
+
diabetes_above_avg = diabetes_df > diabetes_df.median(axis=0)
|
|
42
|
+
diabetes_above_avg = diabetes_above_avg[top_features]
|
|
43
|
+
diabetes_above_avg = diabetes_above_avg.rename(columns=lambda x: x + ">")
|
|
44
|
+
|
|
45
|
+
# Make this indicator mask an index of diabetes_df
|
|
46
|
+
diabetes_df = pd.concat([diabetes_df, diabetes_above_avg], axis=1)
|
|
47
|
+
diabetes_df = diabetes_df.set_index(list(diabetes_above_avg.columns))
|
|
48
|
+
|
|
49
|
+
# Also give us access to the target (median house value)
|
|
50
|
+
diabetes_df = diabetes_df.assign(progression=diabetes.target)
|
|
51
|
+
|
|
52
|
+
##########################################################################
|
|
53
|
+
|
|
54
|
+
# UpSet plot it!
|
|
55
|
+
upset = UpSet(diabetes_df, subset_size="count", intersection_plot_elements=3)
|
|
56
|
+
upset.add_catplot(value="progression", kind="strip", color="blue")
|
|
57
|
+
print(diabetes_df)
|
|
58
|
+
upset.add_catplot(value="bmi", kind="strip", color="black")
|
|
59
|
+
upset.plot()
|
|
60
|
+
plt.title("UpSet with catplots, for orientation='horizontal'")
|
|
61
|
+
plt.show()
|
|
62
|
+
|
|
63
|
+
##########################################################################
|
|
64
|
+
|
|
65
|
+
# And again in vertical orientation
|
|
66
|
+
|
|
67
|
+
upset = UpSet(
|
|
68
|
+
diabetes_df,
|
|
69
|
+
subset_size="count",
|
|
70
|
+
intersection_plot_elements=3,
|
|
71
|
+
orientation="vertical",
|
|
72
|
+
)
|
|
73
|
+
upset.add_catplot(value="progression", kind="strip", color="blue")
|
|
74
|
+
upset.add_catplot(value="bmi", kind="strip", color="black")
|
|
75
|
+
upset.plot()
|
|
76
|
+
plt.suptitle("UpSet with catplots, for orientation='vertical'")
|
|
77
|
+
plt.show()
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
"""
|
|
2
|
+
===========================================================
|
|
3
|
+
Data Vis: Plotting discrete variables as stacked bar charts
|
|
4
|
+
===========================================================
|
|
5
|
+
|
|
6
|
+
Currently, a somewhat contrived example of `add_stacked_bars`.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import pandas as pd
|
|
10
|
+
from matplotlib import cm
|
|
11
|
+
from matplotlib import pyplot as plt
|
|
12
|
+
|
|
13
|
+
from upsetplot import UpSet
|
|
14
|
+
|
|
15
|
+
TITANIC_URL = (
|
|
16
|
+
"https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv" # noqa
|
|
17
|
+
)
|
|
18
|
+
df = pd.read_csv(TITANIC_URL)
|
|
19
|
+
# Show UpSet on survival and first classs
|
|
20
|
+
df = df.set_index(df.Survived == 1).set_index(df.Pclass == 1, append=True)
|
|
21
|
+
|
|
22
|
+
upset = UpSet(df, intersection_plot_elements=0) # disable the default bar chart
|
|
23
|
+
upset.add_stacked_bars(
|
|
24
|
+
by="Sex", colors=cm.Pastel1, title="Count by gender", elements=10
|
|
25
|
+
)
|
|
26
|
+
upset.plot()
|
|
27
|
+
plt.suptitle("Gender for first class and survival on Titanic")
|
|
28
|
+
plt.show()
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
upset = UpSet(
|
|
32
|
+
df, show_counts=True, orientation="vertical", intersection_plot_elements=0
|
|
33
|
+
)
|
|
34
|
+
upset.add_stacked_bars(
|
|
35
|
+
by="Sex", colors=cm.Pastel1, title="Count by gender", elements=10
|
|
36
|
+
)
|
|
37
|
+
upset.plot()
|
|
38
|
+
plt.suptitle("Same, but vertical, with counts shown")
|
|
39
|
+
plt.show()
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
"""
|
|
2
|
+
===================================
|
|
3
|
+
Basic: Examples with generated data
|
|
4
|
+
===================================
|
|
5
|
+
|
|
6
|
+
This example illustrates basic plotting functionality using generated data.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import matplotlib
|
|
10
|
+
from matplotlib import pyplot as plt
|
|
11
|
+
|
|
12
|
+
from upsetplot import generate_counts, plot
|
|
13
|
+
|
|
14
|
+
example = generate_counts()
|
|
15
|
+
print(example)
|
|
16
|
+
|
|
17
|
+
##########################################################################
|
|
18
|
+
|
|
19
|
+
plot(example)
|
|
20
|
+
plt.suptitle("Ordered by degree")
|
|
21
|
+
plt.show()
|
|
22
|
+
|
|
23
|
+
##########################################################################
|
|
24
|
+
|
|
25
|
+
plot(example, sort_by="cardinality")
|
|
26
|
+
plt.suptitle("Ordered by cardinality")
|
|
27
|
+
plt.show()
|
|
28
|
+
|
|
29
|
+
##########################################################################
|
|
30
|
+
|
|
31
|
+
plot(example, show_counts="{:,}")
|
|
32
|
+
plt.suptitle("With counts shown, using a thousands separator")
|
|
33
|
+
plt.show()
|
|
34
|
+
|
|
35
|
+
##########################################################################
|
|
36
|
+
|
|
37
|
+
plot(example, show_counts="%d", show_percentages=True)
|
|
38
|
+
plt.suptitle("With counts and % shown")
|
|
39
|
+
plt.show()
|
|
40
|
+
|
|
41
|
+
##########################################################################
|
|
42
|
+
|
|
43
|
+
plot(example, show_percentages="{:.2%}")
|
|
44
|
+
plt.suptitle("With fraction shown in custom format")
|
|
45
|
+
plt.show()
|
|
46
|
+
|
|
47
|
+
##########################################################################
|
|
48
|
+
|
|
49
|
+
matplotlib.rcParams["font.size"] = 6
|
|
50
|
+
plot(example, show_percentages="{:.2%}")
|
|
51
|
+
plt.suptitle("With a smaller font size")
|
|
52
|
+
plt.show()
|
examples/plot_hide.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""
|
|
2
|
+
=============================================
|
|
3
|
+
Basic: Hiding subsets based on size or degree
|
|
4
|
+
=============================================
|
|
5
|
+
|
|
6
|
+
This illustrates the use of ``min_subset_size``, ``max_subset_size``,
|
|
7
|
+
``min_degree`` or ``max_degree``.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from matplotlib import pyplot as plt
|
|
11
|
+
|
|
12
|
+
from upsetplot import generate_counts, plot
|
|
13
|
+
|
|
14
|
+
example = generate_counts()
|
|
15
|
+
|
|
16
|
+
plot(example, show_counts=True)
|
|
17
|
+
plt.suptitle("Nothing hidden")
|
|
18
|
+
plt.show()
|
|
19
|
+
|
|
20
|
+
##########################################################################
|
|
21
|
+
|
|
22
|
+
plot(example, show_counts=True, min_subset_size=100)
|
|
23
|
+
plt.suptitle("Small subsets hidden")
|
|
24
|
+
plt.show()
|
|
25
|
+
|
|
26
|
+
##########################################################################
|
|
27
|
+
|
|
28
|
+
plot(example, show_counts=True, max_subset_size=500)
|
|
29
|
+
plt.suptitle("Large subsets hidden")
|
|
30
|
+
plt.show()
|
|
31
|
+
|
|
32
|
+
##########################################################################
|
|
33
|
+
|
|
34
|
+
plot(example, show_counts=True, min_degree=2)
|
|
35
|
+
plt.suptitle("Degree <2 hidden")
|
|
36
|
+
plt.show()
|
|
37
|
+
|
|
38
|
+
##########################################################################
|
|
39
|
+
|
|
40
|
+
plot(example, show_counts=True, max_degree=2)
|
|
41
|
+
plt.suptitle("Degree >2 hidden")
|
|
42
|
+
plt.show()
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
"""
|
|
2
|
+
=======================================
|
|
3
|
+
Data Vis: Highlighting selected subsets
|
|
4
|
+
=======================================
|
|
5
|
+
|
|
6
|
+
Demonstrates use of the `style_subsets` method to mark some subsets as
|
|
7
|
+
different.
|
|
8
|
+
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from matplotlib import pyplot as plt
|
|
12
|
+
|
|
13
|
+
from upsetplot import UpSet, generate_counts
|
|
14
|
+
|
|
15
|
+
example = generate_counts()
|
|
16
|
+
|
|
17
|
+
##########################################################################
|
|
18
|
+
# Subsets can be styled by the categories present in them, and a legend
|
|
19
|
+
# can be optionally generated.
|
|
20
|
+
|
|
21
|
+
upset = UpSet(example)
|
|
22
|
+
upset.style_subsets(present=["cat1", "cat2"], facecolor="blue", label="special")
|
|
23
|
+
upset.plot()
|
|
24
|
+
plt.suptitle("Paint blue subsets including both cat1 and cat2; show a legend")
|
|
25
|
+
plt.show()
|
|
26
|
+
|
|
27
|
+
##########################################################################
|
|
28
|
+
# ... or styling can be applied by the categories absent in a subset.
|
|
29
|
+
|
|
30
|
+
upset = UpSet(example, orientation="vertical")
|
|
31
|
+
upset.style_subsets(present="cat2", absent="cat1", edgecolor="red", linewidth=2)
|
|
32
|
+
upset.plot()
|
|
33
|
+
plt.suptitle("Border for subsets including cat2 but not cat1")
|
|
34
|
+
plt.show()
|
|
35
|
+
|
|
36
|
+
##########################################################################
|
|
37
|
+
# ... or their size.
|
|
38
|
+
|
|
39
|
+
upset = UpSet(example)
|
|
40
|
+
upset.style_subsets(
|
|
41
|
+
min_subset_size=1000, facecolor="lightblue", hatch="xx", label="big"
|
|
42
|
+
)
|
|
43
|
+
upset.plot()
|
|
44
|
+
plt.suptitle("Hatch subsets with size >1000")
|
|
45
|
+
plt.show()
|
|
46
|
+
|
|
47
|
+
##########################################################################
|
|
48
|
+
# ... or degree.
|
|
49
|
+
|
|
50
|
+
upset = UpSet(example)
|
|
51
|
+
upset.style_subsets(min_degree=1, facecolor="blue")
|
|
52
|
+
upset.style_subsets(min_degree=2, facecolor="purple")
|
|
53
|
+
upset.style_subsets(min_degree=3, facecolor="red")
|
|
54
|
+
upset.plot()
|
|
55
|
+
plt.suptitle("Coloring by degree")
|
|
56
|
+
plt.show()
|
|
57
|
+
|
|
58
|
+
##########################################################################
|
|
59
|
+
# Multiple stylings can be applied with different criteria in the same
|
|
60
|
+
# plot.
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
upset = UpSet(example, facecolor="gray")
|
|
64
|
+
upset.style_subsets(present="cat0", label="Contains cat0", facecolor="blue")
|
|
65
|
+
upset.style_subsets(
|
|
66
|
+
present="cat1", label="Contains cat1", hatch="xx", edgecolor="black"
|
|
67
|
+
)
|
|
68
|
+
upset.style_subsets(present="cat2", label="Contains cat2", edgecolor="red")
|
|
69
|
+
|
|
70
|
+
# reduce legend size:
|
|
71
|
+
params = {"legend.fontsize": 8}
|
|
72
|
+
with plt.rc_context(params):
|
|
73
|
+
upset.plot()
|
|
74
|
+
plt.suptitle("Styles for every category!")
|
|
75
|
+
plt.show()
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"""
|
|
2
|
+
==========================================
|
|
3
|
+
Data Vis: Highlighting selected categories
|
|
4
|
+
==========================================
|
|
5
|
+
|
|
6
|
+
Demonstrates use of the `style_categories` method to mark some
|
|
7
|
+
categories differently.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from matplotlib import pyplot as plt
|
|
11
|
+
|
|
12
|
+
from upsetplot import UpSet, generate_counts
|
|
13
|
+
|
|
14
|
+
example = generate_counts()
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
##########################################################################
|
|
18
|
+
# Categories can be shaded by name with the ``shading_`` parameters.
|
|
19
|
+
|
|
20
|
+
upset = UpSet(example)
|
|
21
|
+
upset.style_categories("cat2", shading_edgecolor="darkgreen", shading_linewidth=1)
|
|
22
|
+
upset.style_categories(
|
|
23
|
+
"cat1",
|
|
24
|
+
shading_facecolor="lavender",
|
|
25
|
+
)
|
|
26
|
+
upset.plot()
|
|
27
|
+
plt.suptitle("Shade or edge a category with color")
|
|
28
|
+
plt.show()
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
##########################################################################
|
|
32
|
+
# Category total bars can be styled with the ``bar_`` parameters.
|
|
33
|
+
# You can also specify categories using a list of names.
|
|
34
|
+
|
|
35
|
+
upset = UpSet(example)
|
|
36
|
+
upset.style_categories(
|
|
37
|
+
["cat2", "cat1"], bar_facecolor="aqua", bar_hatch="xx", bar_edgecolor="black"
|
|
38
|
+
)
|
|
39
|
+
upset.plot()
|
|
40
|
+
plt.suptitle("")
|
|
41
|
+
plt.show()
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"""
|
|
2
|
+
==================================================
|
|
3
|
+
Basic: Plotting the distribution of missing values
|
|
4
|
+
==================================================
|
|
5
|
+
|
|
6
|
+
UpSet plots are often used to show which variables are missing together.
|
|
7
|
+
|
|
8
|
+
Passing a callable ``indicators=pd.isna`` to :func:`from_indicators` is
|
|
9
|
+
an easy way to categorise a record by the variables that are missing in it.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import pandas as pd
|
|
13
|
+
from matplotlib import pyplot as plt
|
|
14
|
+
|
|
15
|
+
from upsetplot import from_indicators, plot
|
|
16
|
+
|
|
17
|
+
TITANIC_URL = (
|
|
18
|
+
"https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv" # noqa
|
|
19
|
+
)
|
|
20
|
+
data = pd.read_csv(TITANIC_URL)
|
|
21
|
+
|
|
22
|
+
plot(from_indicators(indicators=pd.isna, data=data), show_counts=True)
|
|
23
|
+
plt.show()
|