import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
# Built-in datasets
tips = sns.load_dataset('tips')
iris = sns.load_dataset('iris')
flights = sns.load_dataset('flights')
penguins = sns.load_dataset('penguins')
diamonds = sns.load_dataset('diamonds')
mpg = sns.load_dataset('mpg')
# Built-in themes
sns.set_theme(style='darkgrid') # default, gray grid
sns.set_theme(style='whitegrid') # white with grid
sns.set_theme(style='dark') # dark no grid
sns.set_theme(style='white') # white no grid
sns.set_theme(style='ticks') # white + tick marks
# Full customization
sns.set_theme(
style='whitegrid',
palette='muted',
font='sans-serif',
font_scale=1.2,
rc={'figure.figsize': (12, 7)},
)
# Temporary style
with sns.axes_style('darkgrid'):
sns.barplot(data=tips, x='day', y='total_bill')
plt.show()
# Reset to defaults
sns.reset_defaults()
| Palette | Type | Best For |
|---|
| deep | Qualitative | Default, general categorical |
| muted | Qualitative | Softer categories |
| pastel | Qualitative | Light, presentation-friendly |
| bright | Qualitative | High contrast categories |
| dark | Qualitative | Dark backgrounds |
| colorblind | Qualitative | Accessible, 8 distinct colors |
| viridis | Sequential | Perceptually uniform, ordered data |
| rocket | Sequential | Intensifying values |
| mako | Sequential | Alternative sequential |
| flare | Sequential | Warm sequential |
| coolwarm | Diverging | Positive / negative values |
| vlag | Diverging | Red-blue diverging |
| icefire | Diverging | Cool-warm diverging |
# Sequential palette from a color
sns.color_palette('Blues', as_cmap=True)
# Custom colors
my_colors = ['#264653', '#2a9d8f', '#e9c46a', '#f4a261', '#e76f51']
sns.set_palette(my_colors)
# Husl colors (evenly spaced in hue)
sns.husl_palette(n_colors=6, s=0.7, l=0.6)
# Cubehelix
sns.cubehelix_palette(start=2, rot=0.5, dark=0.2, light=0.9)
# Display palette
sns.palplot(sns.color_palette('Set2', 8))
# Histogram (replaces distplot)
sns.histplot(data=tips, x='total_bill', bins=30,
kde=True, stat='density',
hue='time', element='step',
fill=True, palette='Set2',
linewidth=1, edgecolor='white')
# Bivariate histogram
sns.histplot(data=tips, x='total_bill', y='tip',
bins=30, cbar=True, cmap='YlOrRd')
# KDE — univariate
sns.kdeplot(data=tips, x='total_bill',
hue='time', fill=True,
common_norm=False, # separate densities
alpha=0.4, linewidth=2)
# KDE — bivariate
sns.kdeplot(data=tips, x='total_bill', y='tip',
hue='time', fill=True,
levels=8, thresh=0.05,
cmap='mako', alpha=0.7)
# 2D contour
sns.kdeplot(data=tips, x='total_bill', y='tip',
levels=10, color='darkblue',
linewidths=0.8)
# ECDF — empirical cumulative distribution
sns.ecdfplot(data=tips, x='total_bill',
hue='time', stat='proportion',
complementary=False)
# Rug plot (data ticks along axis)
sns.rugplot(data=tips, x='total_bill',
hue='time', height=0.05,
linewidth=1, alpha=0.5)
# Combine: KDE + rug
sns.kdeplot(data=tips, x='total_bill', fill=True, alpha=0.3)
sns.rugplot(data=tips, x='total_bill')
# Figure-level distribution plot
sns.displot(data=tips, x='total_bill',
col='time', row='smoker',
hue='sex', kind='kde',
fill=True, height=4, aspect=1.2,
facet_kws=dict(margin_titles=True))
💡histplot + kde replaces the deprecated distplot. Use common_norm=False when comparing distributions across groups so each gets its own area normalization.
# Box plot
sns.boxplot(data=tips, x='day', y='total_bill',
hue='sex', palette='Set2',
linewidth=1.2, fliersize=4,
saturation=0.7, gap=0.1)
# Boxen plot (letter-value) — large datasets
sns.boxenplot(data=diamonds, x='clarity', y='price',
hue='cut', palette='coolwarm')
# Violin plot
sns.violinplot(data=tips, x='day', y='total_bill',
hue='sex', split=True, # split by hue
inner='quartile', # show quartiles inside
palette='muted', bw=0.2,
cut=0, # limit violin range
# Bar plot (shows mean + CI by default)
sns.barplot(data=tips, x='day', y='total_bill',
hue='sex', estimator='mean',
errorbar='sd', # standard deviation
capsize=0.1, gap=0.1,
palette='Blues_d')
# Count plot
sns.countplot(data=tips, x='day',
hue='time', palette='Set2',
stat='count', width=0.7)
# Swarm plot (no overlap)
sns.swarmplot(data=tips, x='day', y='total_bill',
hue='sex', palette='Set2',
size=4, dodge=True) # separate by hue
# Strip plot (may overlap)
sns.stripplot(data=tips, x='day', y='total_bill',
hue='sex', dodge=True, jitter=0.2,
alpha=0.6, marker='o')
# Figure-level categorical plot
sns.catplot(data=tips, x='day', y='total_bill',
hue='sex', col='smoker',
kind='violin', # 'box', 'violin', 'bar', 'strip', 'swarm'
split=True, palette='muted',
height=5, aspect=0.8,
sharey=False)
| Plot | Shows | Best For | Dataset Size |
|---|
| boxplot | Median, IQR, outliers | Comparing distributions | Any |
| violinplot | Full density shape | Distribution comparison | < 10k points |
| boxenplot | Letter-value stats | Large datasets, tails | Large |
| barplot | Mean + error bar | Aggregate comparison | Any |
| countplot | Frequencies | Category counts | Any |
| swarmplot | Individual points | Distribution + every point | < 500 points |
| stripplot | Individual (overlapping) | Quick distribution check | Any |
⚠️Use sns.boxenplot instead of boxplot for large datasets — it shows more quantile information and handles heavy-tailed distributions better.
# Basic scatter
sns.scatterplot(data=tips, x='total_bill', y='tip',
hue='time', style='smoker',
size='size', sizes=(20, 200),
palette='Set2', alpha=0.8)
# With regression line (via lmplot)
sns.regplot(data=tips, x='total_bill', y='tip',
scatter_kws={'alpha': 0.4, 's': 40},
line_kws={'color': 'red', 'linewidth': 2},
order=2) # polynomial
# Line plot with CI
sns.lineplot(data=flights, x='year', y='passengers',
hue='month', style='month',
markers=True, dashes=False,
errorbar='sd', estimator='mean',
palette='husl', linewidth=2)
# Multi-line with facets
sns.relplot(data=flights, x='year', y='passengers',
hue='month', col='month', col_wrap=4,
kind='line', height=3, aspect=1.2,
marker='o')
# Figure-level relational plot
g = sns.relplot(data=tips,
x='total_bill', y='tip',
hue='time', style='sex',
size='size', sizes=(30, 200),
col='smoker', row='day',
kind='scatter',
palette='deep', height=3, aspect=1.2)
# Customize facet grid
g.set_axis_labels('Total Bill ($)', 'Tip ($)')
g.set_titles(col_template='{col_name} smoker',
row_template='{row_name}')
g.add_legend(title='Legend')
g.figure.suptitle('Tips Analysis', y=1.02)
💡sns.relplot() returns a FacetGrid — use g.map(), g.set_axis_labels(), and g.add_legend() to customize the figure-level output.
# Simple linear regression
sns.regplot(data=tips, x='total_bill', y='tip',
scatter_kws={'alpha': 0.4, 's': 30},
line_kws={'color': 'crimson', 'lw': 2})
# Polynomial regression
sns.regplot(data=tips, x='total_bill', y='tip',
order=3, color='teal')
# Logistic regression
sns.regplot(data=tips, x='total_bill', y='size',
logistic=True, y_jitter=0.1,
color='purple')
# Lowess smoother
sns.regplot(data=tips, x='total_bill', y='tip',
lowess=True, scatter_kws={'alpha': 0.3})
# Residual plot
sns.residplot(data=tips, x='total_bill', y='tip',
lowess=True, color='steelblue',
scatter_kws={'alpha': 0.4, 's': 30})
# Figure-level regression — facets
g = sns.lmplot(data=tips,
x='total_bill', y='tip',
hue='time', col='smoker',
row='sex', height=4, aspect=1,
palette='Set2',
scatter_kws={'alpha': 0.4, 's': 30},
line_kws={'linewidth': 2})
# Robust regression (outlier-resistant)
sns.lmplot(data=tips, x='total_bill', y='tip',
robust=True, scatter_kws={'alpha': 0.3})
# With x estimator (bin x-axis)
sns.lmplot(data=tips, x='size', y='total_bill',
x_estimator=np.mean, x_bins=np.arange(1, 7),
ci=95, order=2)
| Param | Value | Description |
|---|
| order | int | Polynomial degree (1=linear, 2=quadratic) |
| logistic | True | Binary logistic regression |
| lowess | True | Locally weighted scatterplot smoothing |
| robust | True | RANSAC — robust to outliers |
| x_estimator | callable | Aggregate x groups (e.g. np.mean) |
| x_bins | array | Bin x-axis into discrete groups |
| ci | int / None | Confidence interval size; None to disable |
| n_boot | int | Bootstrap iterations for CI (default 1000) |
🚫Use sns.residplot() to check model assumptions — residuals should be randomly scattered around zero with no pattern. A funnel shape indicates heteroscedasticity.
# Correlation heatmap
corr = tips.select_dtypes(include='number').corr()
sns.heatmap(corr, annot=True, fmt='.2f',
cmap='coolwarm', center=0,
square=True, linewidths=1,
linecolor='white',
vmin=-1, vmax=1,
cbar_kws={'shrink': 0.8, 'label': 'Correlation'})
# Pivot table heatmap
flights_pivot = flights.pivot(index='month',
columns='year', values='passengers')
sns.heatmap(flights_pivot, annot=True, fmt='d',
cmap='YlOrRd', linewidths=0.5,
cbar_kws={'label': 'Passengers'})
# Hierarchical clustering heatmap
sns.clustermap(flights_pivot,
cmap='vlag', # diverging palette
standard_scale=1, # scale columns to 0-1
z_score=1, # z-score columns
figsize=(12, 10),
linewidths=0.5,
annot=True, fmt='d',
row_cluster=True,
col_cluster=True,
method='average', # linkage method
metric='euclidean', # distance metric
cbar_pos=(0.02, 0.8, 0.05, 0.18))
# Mask upper triangle
mask = np.triu(np.ones_like(corr, dtype=bool))
sns.heatmap(corr, mask=mask, annot=True,
cmap='coolwarm', center=0,
square=True, fmt='.2f',
linewidths=1, linecolor='white')
# Custom annotation
def annot_fmt(val):
return f'{val:.2f}' if abs(val) > 0.3 else ''
sns.heatmap(corr, annot=np.vectorize(annot_fmt)(corr),
fmt='', cmap='RdBu', center=0)
⚠️For correlation heatmaps, always use center=0 with a diverging colormap so that positive and negative correlations are clearly distinguished.
# Axes styles
sns.axes_style() # show current params
sns.axes_style('whitegrid') # get style dict
# Set individual params
sns.set_theme(rc={
'axes.spines.top': False,
'axes.spines.right': False,
'axes.grid': True,
'grid.alpha': 0.3,
'font.family': 'sans-serif',
'font.sans-serif': ['Arial', 'DejaVu Sans'],
})
# Scaling presets
sns.set_context('paper') # small, for papers
sns.set_context('notebook') # default
sns.set_context('talk') # for presentations
sns.set_context('poster') # for posters
# Custom scale
sns.set_context('notebook', font_scale=1.5,
rc={'lines.linewidth': 2})
# Remove spines
sns.despine() # remove top & right
sns.despine(left=True) # also remove left
sns.despine(offset=10) # offset spines
sns.despine(trim=True) # trim to data range
sns.despine(left=True, bottom=True) # minimal look
# Move legend outside
g = sns.scatterplot(data=tips, x='total_bill', y='tip', hue='day')
sns.move_legend(g, 'upper left', bbox_to_anchor=(1, 1),
title='Day', frameon=False)
# Figure-level titles
g = sns.FacetGrid(tips, col='time', row='smoker', height=4)
g.map_dataframe(sns.histplot, x='total_bill', hue='sex')
g.set_axis_labels('Total Bill', 'Count')
g.set_titles(row_template='{row_name}', col_template='{col_name}')
g.add_legend(title='Sex')
g.figure.subplots_adjust(wspace=0.3, hspace=0.4)
💡Use sns.set_context('talk') when creating plots for presentations — it increases font sizes and line widths automatically.
| Aspect | Matplotlib | Seaborn |
|---|
| Default style | Basic | Publication-ready |
| DataFrame support | Manual | Built-in via data= param |
| Statistical plots | Manual | Built-in (regression, CI, etc.) |
| Color palettes | Manual | Curated, perceptually uniform |
| Faceting | Manual gridspec | FacetGrid / catplot / relplot |
| Learning curve | Steep | Gentle for common plots |
| Customization | Full control | Wraps matplotlib (full access) |
# Seaborn + Matplotlib hybrid
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
# Left: seaborn
sns.violinplot(data=tips, x='day', y='total_bill',
hue='sex', split=True, ax=axes[0],
palette='muted')
axes[0].set_title('Distribution by Day')
axes[0].legend(title='Sex', loc='upper right')
# Right: matplotlib custom
axes[1].hist(tips['total_bill'], bins=30, alpha=0.7,
color='teal', edgecolor='white', density=True)
axes[1].set_xlabel('Total Bill ($)')
axes[1].set_ylabel('Density')
axes[1].set_title('Overall Distribution')
fig.suptitle('Restaurant Tips Analysis', fontsize=16, y=1.02)
fig.tight_layout()
# Pair plot — pairwise relationships
sns.pairplot(data=iris,
hue='species',
diag_kind='kde', # 'hist' or 'kde' on diagonal
kind='scatter',
palette='Set2',
corner=True, # only lower triangle
plot_kws={'alpha': 0.5, 's': 40},
diag_kws={'fill': True},
height=2.5)
# Joint plot — scatter + marginal distributions
sns.jointplot(data=tips, x='total_bill', y='tip',
hue='time', kind='kde', # 'scatter','kde','hex','reg'
palette='Set2', fill=True,
height=7, marginal_ticks=True)
💡Pass ax=axes[i] to any seaborn function to draw into a specific matplotlib subplot — this is the key to combining seaborn's ease of use with matplotlib's layout flexibility.
⚠️For datasets with many columns, use sns.pairplot(..., corner=True) to render only the lower triangle — it halves the plot count and is easier to read.
🚫When presenting to colorblind audiences, use sns.color_palette('colorblind') — it provides 8 colors that are distinguishable for the most common forms of color vision deficiency.