upsetplot-bombcell 0.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
doc/conf.py ADDED
@@ -0,0 +1,289 @@
1
+ # project-template documentation build configuration file, created by
2
+ # sphinx-quickstart on Mon Jan 18 14:44:12 2016.
3
+ #
4
+ # This file is execfile()d with the current directory set to its
5
+ # containing dir.
6
+ #
7
+ # Note that not all possible configuration values are present in this
8
+ # autogenerated file.
9
+ #
10
+ # All configuration values have a default; values that are commented out
11
+ # serve to show the default.
12
+
13
+ import os
14
+ import re
15
+ import sys
16
+ import warnings
17
+
18
+ # project root
19
+ sys.path.insert(0, os.path.abspath(".."))
20
+
21
+ import matplotlib # noqa
22
+
23
+ matplotlib.use("agg")
24
+ warnings.filterwarnings(
25
+ "ignore",
26
+ category=UserWarning,
27
+ message="Matplotlib is currently using agg, which is a"
28
+ " non-GUI backend, so cannot show the figure."
29
+ "|(\n|.)*is non-interactive, and thus cannot be shown",
30
+ )
31
+
32
+ import sphinx_rtd_theme # noqa
33
+ from sphinx_gallery.sorting import ExampleTitleSortKey # noqa
34
+ from upsetplot import __version__ as release # noqa
35
+
36
+
37
+ # If extensions (or modules to document with autodoc) are in another directory,
38
+ # add these directories to sys.path here. If the directory is relative to the
39
+ # documentation root, use os.path.abspath to make it absolute, like shown here.
40
+
41
+ # -- General configuration ---------------------------------------------------
42
+
43
+ # If your documentation needs a minimal Sphinx version, state it here.
44
+ # needs_sphinx = '1.0'
45
+
46
+ # Add any Sphinx extension module names here, as strings. They can be
47
+ # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
48
+ # ones.
49
+ extensions = [
50
+ "sphinx_gallery.gen_gallery",
51
+ "sphinx.ext.autodoc",
52
+ "sphinx.ext.autosummary",
53
+ "sphinx.ext.doctest",
54
+ "sphinx.ext.intersphinx",
55
+ "sphinx.ext.todo",
56
+ "numpydoc",
57
+ "sphinx.ext.ifconfig",
58
+ "sphinx.ext.viewcode",
59
+ "sphinx_issues",
60
+ "nbsphinx",
61
+ ]
62
+
63
+ # Add any paths that contain templates here, relative to this directory.
64
+ templates_path = ["_templates"]
65
+
66
+ # The suffix of source filenames.
67
+ source_suffix = ".rst"
68
+
69
+ # The encoding of source files.
70
+ # source_encoding = 'utf-8-sig'
71
+
72
+ # The master toctree document.
73
+ master_doc = "index"
74
+
75
+ # General information about the project.
76
+ project = "upsetplot"
77
+ copyright = "2018-2024, Joel Nothman"
78
+
79
+ # The version info for the project you're documenting, acts as replacement for
80
+ # |version| and |release|, also used in various other places throughout the
81
+ # built documents.
82
+ #
83
+ # The short X.Y version.
84
+
85
+ version = re.match(r"^\d+(\.\d+)*", release).group()
86
+
87
+ # version = upsetplot.__version__
88
+ # The full version, including alpha/beta/rc tags.
89
+ # release = version
90
+
91
+ # The language for content autogenerated by Sphinx. Refer to documentation
92
+ # for a list of supported languages.
93
+ # language = None
94
+
95
+ # There are two options for replacing |today|: either, you set today to some
96
+ # non-false value, then it is used:
97
+ # today = ''
98
+ # Else, today_fmt is used as the format for a strftime call.
99
+ # today_fmt = '%B %d, %Y'
100
+
101
+ # List of patterns, relative to source directory, that match files and
102
+ # directories to ignore when looking for source files.
103
+ exclude_patterns = ["_build"]
104
+
105
+ # The reST default role (used for this markup: `text`) to use for all
106
+ # documents.
107
+ default_role = "any"
108
+
109
+ # If true, '()' will be appended to :func: etc. cross-reference text.
110
+ # add_function_parentheses = True
111
+
112
+ # If true, the current module name will be prepended to all description
113
+ # unit titles (such as .. function::).
114
+ # add_module_names = True
115
+
116
+ # If true, sectionauthor and moduleauthor directives will be shown in the
117
+ # output. They are ignored by default.
118
+ # show_authors = False
119
+
120
+ # The name of the Pygments (syntax highlighting) style to use.
121
+ pygments_style = "sphinx"
122
+
123
+ # A list of ignored prefixes for module index sorting.
124
+ # modindex_common_prefix = []
125
+
126
+ # If true, keep warnings as "system message" paragraphs in the built documents.
127
+ # keep_warnings = False
128
+
129
+
130
+ # -- Options for HTML output ----------------------------------------------
131
+
132
+ # The theme to use for HTML and HTML Help pages. See the documentation for
133
+ # a list of builtin themes.
134
+ html_theme = "sphinx_rtd_theme"
135
+
136
+ # Theme options are theme-specific and customize the look and feel of a theme
137
+ # further. For a list of options available for each theme, see the
138
+ # documentation.
139
+ # html_theme_options = {}
140
+
141
+ # Add any paths that contain custom themes here, relative to this directory.
142
+ html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
143
+
144
+ # The name for this set of Sphinx documents. If None, it defaults to
145
+ # "<project> v<release> documentation".
146
+ # html_title = None
147
+
148
+ # A shorter title for the navigation bar. Default is the same as html_title.
149
+ # html_short_title = None
150
+
151
+ # The name of an image file (relative to this directory) to place at the top
152
+ # of the sidebar.
153
+ # html_logo = None
154
+
155
+ # The name of an image file (within the static path) to use as favicon of the
156
+ # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
157
+ # pixels large.
158
+ # html_favicon = None
159
+
160
+ # Add any paths that contain custom static files (such as style sheets) here,
161
+ # relative to this directory. They are copied after the builtin static files,
162
+ # so a file named "default.css" will overwrite the builtin "default.css".
163
+ html_static_path = ["_static"]
164
+
165
+ # Add any extra paths that contain custom files (such as robots.txt or
166
+ # .htaccess) here, relative to this directory. These files are copied
167
+ # directly to the root of the documentation.
168
+ # html_extra_path = []
169
+
170
+ # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
171
+ # using the given strftime format.
172
+ # html_last_updated_fmt = '%b %d, %Y'
173
+
174
+ # If true, SmartyPants will be used to convert quotes and dashes to
175
+ # typographically correct entities.
176
+ # html_use_smartypants = True
177
+
178
+ # Custom sidebar templates, maps document names to template names.
179
+ # html_sidebars = {}
180
+
181
+ # Additional templates that should be rendered to pages, maps page names to
182
+ # template names.
183
+ # html_additional_pages = {}
184
+
185
+ # If false, no module index is generated.
186
+ # html_domain_indices = True
187
+
188
+ # If false, no index is generated.
189
+ # html_use_index = True
190
+
191
+ # If true, the index is split into individual pages for each letter.
192
+ # html_split_index = False
193
+
194
+ # If true, links to the reST sources are added to the pages.
195
+ # html_show_sourcelink = True
196
+
197
+ # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
198
+ # html_show_sphinx = True
199
+
200
+ # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
201
+ # html_show_copyright = True
202
+
203
+ # If true, an OpenSearch description file will be output, and all pages will
204
+ # contain a <link> tag referring to it. The value of this option must be the
205
+ # base URL from which the finished HTML is served.
206
+ # html_use_opensearch = ''
207
+
208
+ # This is the file name suffix for HTML files (e.g. ".xhtml").
209
+ # html_file_suffix = None
210
+
211
+ # Output file base name for HTML help builder.
212
+ htmlhelp_basename = "project-templatedoc"
213
+
214
+
215
+ # -- Options for LaTeX output ---------------------------------------------
216
+
217
+ latex_elements = {
218
+ # The paper size ('letterpaper' or 'a4paper').
219
+ # 'papersize': 'letterpaper',
220
+ # The font size ('10pt', '11pt' or '12pt').
221
+ # 'pointsize': '10pt',
222
+ # Additional stuff for the LaTeX preamble.
223
+ # 'preamble': '',
224
+ }
225
+
226
+ # Grouping the document tree into LaTeX files. List of tuples
227
+ # (source start file, target name, title,
228
+ # author, documentclass [howto, manual, or own class]).
229
+ latex_documents = [
230
+ ("index", "upsetplot.tex", "upsetplot Documentation", "Joel Nothman", "manual"),
231
+ ]
232
+
233
+ # The name of an image file (relative to this directory) to place at the top of
234
+ # the title page.
235
+ # latex_logo = None
236
+
237
+ # For "manual" documents, if this is true, then toplevel headings are parts,
238
+ # not chapters.
239
+ # latex_use_parts = False
240
+
241
+ # If true, show page references after internal links.
242
+ # latex_show_pagerefs = False
243
+
244
+ # If true, show URL addresses after external links.
245
+ # latex_show_urls = False
246
+
247
+ # Documents to append as an appendix to all manuals.
248
+ # latex_appendices = []
249
+
250
+ # If false, no module index is generated.
251
+ # latex_domain_indices = True
252
+
253
+ # Documents to append as an appendix to all manuals.
254
+ # texinfo_appendices = []
255
+
256
+ # If false, no module index is generated.
257
+ # texinfo_domain_indices = True
258
+
259
+ # How to display URL addresses: 'footnote', 'no', or 'inline'.
260
+ # texinfo_show_urls = 'footnote'
261
+
262
+ # If true, do not generate a @detailmenu in the "Top" node's menu.
263
+ # texinfo_no_detailmenu = False
264
+
265
+
266
+ # Example configuration for intersphinx: refer to the Python standard library.
267
+ intersphinx_mapping = {
268
+ "python": ("http://docs.python.org/", None),
269
+ "numpy": ("https://docs.scipy.org/doc/numpy/", None),
270
+ "matplotlib": ("https://matplotlib.org/", None),
271
+ "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None),
272
+ }
273
+
274
+
275
+ # Config for sphinx_issues
276
+
277
+ issues_uri = "https://github.com/jnothman/upsetplot/issues/{issue}"
278
+ issues_github_path = "jnothman/upsetplot"
279
+ issues_user_uri = "https://github.com/{user}"
280
+
281
+
282
+ sphinx_gallery_conf = {
283
+ # path to your examples scripts
284
+ "examples_dirs": "../examples",
285
+ # path where to save gallery generated examples
286
+ "gallery_dirs": "auto_examples",
287
+ "backreferences_dir": "_modules",
288
+ "within_subsection_order": ExampleTitleSortKey,
289
+ }
@@ -0,0 +1,22 @@
1
+ """
2
+ ===============================
3
+ Design: Customizing axis labels
4
+ ===============================
5
+
6
+ This example illustrates how the return value of the plot method can be used
7
+ to customize aspects of the plot, such as axis labels, legend position, etc.
8
+ """
9
+
10
+ from matplotlib import pyplot as plt
11
+
12
+ from upsetplot import generate_counts, plot
13
+
14
+ example = generate_counts()
15
+ print(example)
16
+
17
+ ##########################################################################
18
+
19
+ plot_result = plot(example)
20
+ plot_result["intersections"].set_ylabel("Subset size")
21
+ plot_result["totals"].set_xlabel("Category size")
22
+ plt.show()
@@ -0,0 +1,77 @@
1
+ """
2
+ ==========================================
3
+ Data Vis: Feature distribution in Diabetes
4
+ ==========================================
5
+
6
+ Explore above-average attributes in the Diabetes dataset (Efron et al, 2004).
7
+
8
+ Here we take some features correlated with disease progression, and look at the
9
+ distribution of that disease progression value when each of these features is
10
+ above average.
11
+
12
+ The most correlated features are:
13
+
14
+ - bmi body mass index
15
+ - bp average blood pressure
16
+ - s4 tch, total cholesterol / HDL
17
+ - s5 ltg, possibly log of serum triglycerides level
18
+ - s6 glu, blood sugar level
19
+
20
+ This kind of dataset analysis may not be a practical use of UpSet, but helps
21
+ to illustrate the :meth:`UpSet.add_catplot` feature.
22
+ """
23
+
24
+ import pandas as pd
25
+ from matplotlib import pyplot as plt
26
+ from sklearn.datasets import load_diabetes
27
+
28
+ from upsetplot import UpSet
29
+
30
+ # Load the dataset into a DataFrame
31
+ diabetes = load_diabetes()
32
+ diabetes_df = pd.DataFrame(diabetes.data, columns=diabetes.feature_names)
33
+
34
+ # Get five features most correlated with median house value
35
+ correls = diabetes_df.corrwith(
36
+ pd.Series(diabetes.target), method="spearman"
37
+ ).sort_values()
38
+ top_features = correls.index[-5:]
39
+
40
+ # Get a binary indicator of whether each top feature is above average
41
+ diabetes_above_avg = diabetes_df > diabetes_df.median(axis=0)
42
+ diabetes_above_avg = diabetes_above_avg[top_features]
43
+ diabetes_above_avg = diabetes_above_avg.rename(columns=lambda x: x + ">")
44
+
45
+ # Make this indicator mask an index of diabetes_df
46
+ diabetes_df = pd.concat([diabetes_df, diabetes_above_avg], axis=1)
47
+ diabetes_df = diabetes_df.set_index(list(diabetes_above_avg.columns))
48
+
49
+ # Also give us access to the target (median house value)
50
+ diabetes_df = diabetes_df.assign(progression=diabetes.target)
51
+
52
+ ##########################################################################
53
+
54
+ # UpSet plot it!
55
+ upset = UpSet(diabetes_df, subset_size="count", intersection_plot_elements=3)
56
+ upset.add_catplot(value="progression", kind="strip", color="blue")
57
+ print(diabetes_df)
58
+ upset.add_catplot(value="bmi", kind="strip", color="black")
59
+ upset.plot()
60
+ plt.title("UpSet with catplots, for orientation='horizontal'")
61
+ plt.show()
62
+
63
+ ##########################################################################
64
+
65
+ # And again in vertical orientation
66
+
67
+ upset = UpSet(
68
+ diabetes_df,
69
+ subset_size="count",
70
+ intersection_plot_elements=3,
71
+ orientation="vertical",
72
+ )
73
+ upset.add_catplot(value="progression", kind="strip", color="blue")
74
+ upset.add_catplot(value="bmi", kind="strip", color="black")
75
+ upset.plot()
76
+ plt.suptitle("UpSet with catplots, for orientation='vertical'")
77
+ plt.show()
@@ -0,0 +1,39 @@
1
+ """
2
+ ===========================================================
3
+ Data Vis: Plotting discrete variables as stacked bar charts
4
+ ===========================================================
5
+
6
+ Currently, a somewhat contrived example of `add_stacked_bars`.
7
+ """
8
+
9
+ import pandas as pd
10
+ from matplotlib import cm
11
+ from matplotlib import pyplot as plt
12
+
13
+ from upsetplot import UpSet
14
+
15
+ TITANIC_URL = (
16
+ "https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv" # noqa
17
+ )
18
+ df = pd.read_csv(TITANIC_URL)
19
+ # Show UpSet on survival and first classs
20
+ df = df.set_index(df.Survived == 1).set_index(df.Pclass == 1, append=True)
21
+
22
+ upset = UpSet(df, intersection_plot_elements=0) # disable the default bar chart
23
+ upset.add_stacked_bars(
24
+ by="Sex", colors=cm.Pastel1, title="Count by gender", elements=10
25
+ )
26
+ upset.plot()
27
+ plt.suptitle("Gender for first class and survival on Titanic")
28
+ plt.show()
29
+
30
+
31
+ upset = UpSet(
32
+ df, show_counts=True, orientation="vertical", intersection_plot_elements=0
33
+ )
34
+ upset.add_stacked_bars(
35
+ by="Sex", colors=cm.Pastel1, title="Count by gender", elements=10
36
+ )
37
+ upset.plot()
38
+ plt.suptitle("Same, but vertical, with counts shown")
39
+ plt.show()
@@ -0,0 +1,52 @@
1
+ """
2
+ ===================================
3
+ Basic: Examples with generated data
4
+ ===================================
5
+
6
+ This example illustrates basic plotting functionality using generated data.
7
+ """
8
+
9
+ import matplotlib
10
+ from matplotlib import pyplot as plt
11
+
12
+ from upsetplot import generate_counts, plot
13
+
14
+ example = generate_counts()
15
+ print(example)
16
+
17
+ ##########################################################################
18
+
19
+ plot(example)
20
+ plt.suptitle("Ordered by degree")
21
+ plt.show()
22
+
23
+ ##########################################################################
24
+
25
+ plot(example, sort_by="cardinality")
26
+ plt.suptitle("Ordered by cardinality")
27
+ plt.show()
28
+
29
+ ##########################################################################
30
+
31
+ plot(example, show_counts="{:,}")
32
+ plt.suptitle("With counts shown, using a thousands separator")
33
+ plt.show()
34
+
35
+ ##########################################################################
36
+
37
+ plot(example, show_counts="%d", show_percentages=True)
38
+ plt.suptitle("With counts and % shown")
39
+ plt.show()
40
+
41
+ ##########################################################################
42
+
43
+ plot(example, show_percentages="{:.2%}")
44
+ plt.suptitle("With fraction shown in custom format")
45
+ plt.show()
46
+
47
+ ##########################################################################
48
+
49
+ matplotlib.rcParams["font.size"] = 6
50
+ plot(example, show_percentages="{:.2%}")
51
+ plt.suptitle("With a smaller font size")
52
+ plt.show()
examples/plot_hide.py ADDED
@@ -0,0 +1,42 @@
1
+ """
2
+ =============================================
3
+ Basic: Hiding subsets based on size or degree
4
+ =============================================
5
+
6
+ This illustrates the use of ``min_subset_size``, ``max_subset_size``,
7
+ ``min_degree`` or ``max_degree``.
8
+ """
9
+
10
+ from matplotlib import pyplot as plt
11
+
12
+ from upsetplot import generate_counts, plot
13
+
14
+ example = generate_counts()
15
+
16
+ plot(example, show_counts=True)
17
+ plt.suptitle("Nothing hidden")
18
+ plt.show()
19
+
20
+ ##########################################################################
21
+
22
+ plot(example, show_counts=True, min_subset_size=100)
23
+ plt.suptitle("Small subsets hidden")
24
+ plt.show()
25
+
26
+ ##########################################################################
27
+
28
+ plot(example, show_counts=True, max_subset_size=500)
29
+ plt.suptitle("Large subsets hidden")
30
+ plt.show()
31
+
32
+ ##########################################################################
33
+
34
+ plot(example, show_counts=True, min_degree=2)
35
+ plt.suptitle("Degree <2 hidden")
36
+ plt.show()
37
+
38
+ ##########################################################################
39
+
40
+ plot(example, show_counts=True, max_degree=2)
41
+ plt.suptitle("Degree >2 hidden")
42
+ plt.show()
@@ -0,0 +1,75 @@
1
+ """
2
+ =======================================
3
+ Data Vis: Highlighting selected subsets
4
+ =======================================
5
+
6
+ Demonstrates use of the `style_subsets` method to mark some subsets as
7
+ different.
8
+
9
+ """
10
+
11
+ from matplotlib import pyplot as plt
12
+
13
+ from upsetplot import UpSet, generate_counts
14
+
15
+ example = generate_counts()
16
+
17
+ ##########################################################################
18
+ # Subsets can be styled by the categories present in them, and a legend
19
+ # can be optionally generated.
20
+
21
+ upset = UpSet(example)
22
+ upset.style_subsets(present=["cat1", "cat2"], facecolor="blue", label="special")
23
+ upset.plot()
24
+ plt.suptitle("Paint blue subsets including both cat1 and cat2; show a legend")
25
+ plt.show()
26
+
27
+ ##########################################################################
28
+ # ... or styling can be applied by the categories absent in a subset.
29
+
30
+ upset = UpSet(example, orientation="vertical")
31
+ upset.style_subsets(present="cat2", absent="cat1", edgecolor="red", linewidth=2)
32
+ upset.plot()
33
+ plt.suptitle("Border for subsets including cat2 but not cat1")
34
+ plt.show()
35
+
36
+ ##########################################################################
37
+ # ... or their size.
38
+
39
+ upset = UpSet(example)
40
+ upset.style_subsets(
41
+ min_subset_size=1000, facecolor="lightblue", hatch="xx", label="big"
42
+ )
43
+ upset.plot()
44
+ plt.suptitle("Hatch subsets with size >1000")
45
+ plt.show()
46
+
47
+ ##########################################################################
48
+ # ... or degree.
49
+
50
+ upset = UpSet(example)
51
+ upset.style_subsets(min_degree=1, facecolor="blue")
52
+ upset.style_subsets(min_degree=2, facecolor="purple")
53
+ upset.style_subsets(min_degree=3, facecolor="red")
54
+ upset.plot()
55
+ plt.suptitle("Coloring by degree")
56
+ plt.show()
57
+
58
+ ##########################################################################
59
+ # Multiple stylings can be applied with different criteria in the same
60
+ # plot.
61
+
62
+
63
+ upset = UpSet(example, facecolor="gray")
64
+ upset.style_subsets(present="cat0", label="Contains cat0", facecolor="blue")
65
+ upset.style_subsets(
66
+ present="cat1", label="Contains cat1", hatch="xx", edgecolor="black"
67
+ )
68
+ upset.style_subsets(present="cat2", label="Contains cat2", edgecolor="red")
69
+
70
+ # reduce legend size:
71
+ params = {"legend.fontsize": 8}
72
+ with plt.rc_context(params):
73
+ upset.plot()
74
+ plt.suptitle("Styles for every category!")
75
+ plt.show()
@@ -0,0 +1,41 @@
1
+ """
2
+ ==========================================
3
+ Data Vis: Highlighting selected categories
4
+ ==========================================
5
+
6
+ Demonstrates use of the `style_categories` method to mark some
7
+ categories differently.
8
+ """
9
+
10
+ from matplotlib import pyplot as plt
11
+
12
+ from upsetplot import UpSet, generate_counts
13
+
14
+ example = generate_counts()
15
+
16
+
17
+ ##########################################################################
18
+ # Categories can be shaded by name with the ``shading_`` parameters.
19
+
20
+ upset = UpSet(example)
21
+ upset.style_categories("cat2", shading_edgecolor="darkgreen", shading_linewidth=1)
22
+ upset.style_categories(
23
+ "cat1",
24
+ shading_facecolor="lavender",
25
+ )
26
+ upset.plot()
27
+ plt.suptitle("Shade or edge a category with color")
28
+ plt.show()
29
+
30
+
31
+ ##########################################################################
32
+ # Category total bars can be styled with the ``bar_`` parameters.
33
+ # You can also specify categories using a list of names.
34
+
35
+ upset = UpSet(example)
36
+ upset.style_categories(
37
+ ["cat2", "cat1"], bar_facecolor="aqua", bar_hatch="xx", bar_edgecolor="black"
38
+ )
39
+ upset.plot()
40
+ plt.suptitle("")
41
+ plt.show()
@@ -0,0 +1,23 @@
1
+ """
2
+ ==================================================
3
+ Basic: Plotting the distribution of missing values
4
+ ==================================================
5
+
6
+ UpSet plots are often used to show which variables are missing together.
7
+
8
+ Passing a callable ``indicators=pd.isna`` to :func:`from_indicators` is
9
+ an easy way to categorise a record by the variables that are missing in it.
10
+ """
11
+
12
+ import pandas as pd
13
+ from matplotlib import pyplot as plt
14
+
15
+ from upsetplot import from_indicators, plot
16
+
17
+ TITANIC_URL = (
18
+ "https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv" # noqa
19
+ )
20
+ data = pd.read_csv(TITANIC_URL)
21
+
22
+ plot(from_indicators(indicators=pd.isna, data=data), show_counts=True)
23
+ plt.show()