Embedding Model Comparison Benchmark - EmbedComp¶
Models: e5-base-v2 · bge-base-en-v1.5 · multilingual-e5-base · all-MiniLM-L6-v2
Dataset: BEIR trec-covid corpus
Metrics: Encode throughput · Query latency (mean/p95/p99) · Recall@K · MRR · Cosine distribution
Search/Recommendation System Metrics Overview¶
| Metric | What it Measures | Ideal Improvement / Interpretation |
|---|---|---|
| Encode throughput | How many documents the model can embed per second (system capacity). | ⬆️ Higher (Higher is better) |
| Query latency (mean) | Average time from "user types query" to "results returned," in milliseconds. | ⬇️ Lower (Lower is better) |
| Query latency (p95) | The time by which 95% of all queries finish—a realistic worst-case for most users. | ⬇️ Lower (Lower is better) |
| Query latency (p99) | The time by which 99% of all queries finish—representing tail latency experienced by the slowest 1 in 100 users. | ⬇️ Lower (Lower is better) |
| Recall@K (e.g., Recall@3) | Measures if a relevant document appears within the top $K$ results. ($\text{ideal} = 1.0$) | ⬆️ Higher (Higher is better; ideal = 1.0) |
| MRR | Mean Reciprocal Rank. How high the relevant document ranks on average (e.g., if always first, rank is 1.0). | ⬆️ Higher (Closer to 1.0 is better) |
| Cosine distribution | The spread of similarity scores across top-K hits. Measures how distinct your best results are from each other. | 📈 Mean higher · Spread narrower (Ideal is a high mean with minimal variance) |
In [ ]:
# Run once to install deps
!pip install sentence-transformers datasets faiss-cpu pandas numpy matplotlib seaborn
In [1]:
import time
import numpy as np
import pandas as pd
import faiss
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from matplotlib.patches import Circle
import seaborn as sns
from sentence_transformers import SentenceTransformer
from datasets import load_dataset
from collections import defaultdict
import warnings
warnings.filterwarnings('ignore')
# Set matplotlib style
plt.style.use('dark_background')
sns.set_palette("husl")
print('All imports OK')
In [3]:
# ── CONFIG ── swap models / queries freely here ──────────────────────
MODELS = {
'e5-base-v2': 'intfloat/e5-base-v2',
'bge-base-en': 'BAAI/bge-base-en-v1.5',
'multilingual-e5-base': 'intfloat/multilingual-e5-base',
'all-MiniLM-L6-v2': 'sentence-transformers/all-MiniLM-L6-v2',
'nomic-embed-v1': 'nomic-ai/nomic-embed-text-v1'
}
QUERY_PREFIX = {
'e5-base-v2': 'query: ',
'bge-base-en': 'Represent this sentence for searching relevant passages: ',
'multilingual-e5-base': 'query: ',
'all-MiniLM-L6-v2': '',
'nomic-embed-v1': 'query: '
}
DOC_PREFIX = {
'e5-base-v2': 'passage: ',
'bge-base-en': '',
'multilingual-e5-base': 'passage: ',
'all-MiniLM-L6-v2': '',
'nomic-embed-v1': 'passage: '
}
CORPUS_SIZE = 500 # increase on GPU
BATCH_SIZE = 16
TOP_K = 5
LATENCY_RUNS = 10 # number of runs to average latency over
# Ground-truth: query -> list of relevant doc indices in corpus
# These will be computed dynamically using e5-base-v2 as reference
QUERIES = [
'How does COVID-19 affect lung tissue?',
'What are the symptoms of coronavirus infection?',
'How is PCR testing used to detect COVID-19?',
'What treatments exist for severe COVID cases?',
'How does the spike protein enable viral entry?',
]
GROUND_TRUTH = {} # Will be populated after loading corpus
COLORS = ['#58a6ff', '#3fb950', '#a371f7', '#f0883e', '#79c0ff']
print(f'Config ready: {len(MODELS)} models, {CORPUS_SIZE} docs, {len(GROUND_TRUTH)} queries')
In [4]:
print('Loading BEIR trec-covid corpus...')
dataset = load_dataset('BeIR/trec-covid', 'corpus')['corpus'].select(range(CORPUS_SIZE))
corpus_texts = [doc['text'] for doc in dataset]
print(f'Loaded {len(corpus_texts)} documents')
print(f'Sample doc: {corpus_texts[0][:150]}...')
# Generate ground truth: find top-10 similar docs for each query using e5-base-v2
print('\nGenerating ground truth (finding top-10 similar docs per query)...')
ref_model = SentenceTransformer('intfloat/e5-base-v2')
corpus_embs = ref_model.encode(['passage: ' + t for t in corpus_texts],
batch_size=BATCH_SIZE, convert_to_numpy=True,
normalize_embeddings=True, show_progress_bar=False)
index_ref = faiss.IndexFlatIP(corpus_embs.shape[1])
index_ref.add(corpus_embs.astype(np.float32))
for query in QUERIES:
qe = ref_model.encode(['query: ' + query], normalize_embeddings=True)
D, I = index_ref.search(qe.astype(np.float32), k=10)
GROUND_TRUTH[query] = I[0].tolist()
print(f" '{query[:40]}...' → top-10 indices: {I[0].tolist()}")
del ref_model
print(f'Ground truth ready: {len(GROUND_TRUTH)} queries')
In [5]:
def benchmark_model(model_name, model_id):
print(f"\n{'='*60}\n {model_name}\n{'='*60}")
m = {'model': model_name}
qpfx, dpfx = QUERY_PREFIX[model_name], DOC_PREFIX[model_name]
# 1. Load
t0 = time.perf_counter()
model = SentenceTransformer(model_id)
m['load_time_s'] = round(time.perf_counter() - t0, 2)
print(f' Load : {m["load_time_s"]}s')
# 2. Encode corpus -> throughput
docs = [dpfx + t for t in corpus_texts]
t0 = time.perf_counter()
embs = model.encode(docs, batch_size=BATCH_SIZE, convert_to_numpy=True,
normalize_embeddings=True, show_progress_bar=False)
enc_time = time.perf_counter() - t0
m['encode_time_s'] = round(enc_time, 3)
m['throughput_docs_s'] = round(len(corpus_texts) / enc_time, 1)
m['embedding_dim'] = embs.shape[1]
m['memory_mb'] = round(embs.nbytes / 1e6, 2)
print(f' Throughput : {m["throughput_docs_s"]} docs/s | dim={m["embedding_dim"]} | {m["memory_mb"]} MB')
# 3. FAISS index (IndexFlatIP = cosine for normalised vecs)
index = faiss.IndexFlatIP(embs.shape[1])
index.add(embs.astype(np.float32))
# 4. Query latency — averaged over LATENCY_RUNS
sample_q = qpfx + list(GROUND_TRUTH.keys())[0]
lats = []
for _ in range(LATENCY_RUNS):
t0 = time.perf_counter()
qe = model.encode([sample_q], normalize_embeddings=True)
index.search(qe.astype(np.float32), k=TOP_K)
lats.append((time.perf_counter() - t0) * 1000)
m['latency_ms_mean'] = round(np.mean(lats), 2)
m['latency_ms_p95'] = round(np.percentile(lats, 95), 2)
m['latency_ms_p99'] = round(np.percentile(lats, 99), 2)
print(f' Latency : mean={m["latency_ms_mean"]}ms p95={m["latency_ms_p95"]}ms p99={m["latency_ms_p99"]}ms')
# 5. Recall@K and MRR
recall_at = defaultdict(list)
mrr_list, cosines = [], []
for qtext, relevant in GROUND_TRUTH.items():
qe = model.encode([qpfx + qtext], normalize_embeddings=True)
D, I = index.search(qe.astype(np.float32), k=TOP_K)
ret = I[0].tolist()
cosines.extend(D[0].tolist())
for k in [1, 3, 5]:
hits = len(set(ret[:k]) & set(relevant))
recall_at[k].append(hits / min(len(relevant), k))
rr = next((1/r for r, d in enumerate(ret, 1) if d in relevant), 0.0)
mrr_list.append(rr)
m['recall@1'] = round(np.mean(recall_at[1]), 4)
m['recall@3'] = round(np.mean(recall_at[3]), 4)
m['recall@5'] = round(np.mean(recall_at[5]), 4)
m['mrr'] = round(np.mean(mrr_list), 4)
m['avg_top_cosine'] = round(np.mean(cosines), 4)
m['cosine_scores'] = cosines
print(f' Recall@1/3/5 : {m["recall@1"]} / {m["recall@3"]} / {m["recall@5"]}')
print(f' MRR : {m["mrr"]} | Avg cosine: {m["avg_top_cosine"]}')
del model
return m
all_metrics = [benchmark_model(name, mid) for name, mid in MODELS.items()]
df = pd.DataFrame([{k: v for k, v in m.items() if k != 'cosine_scores'} for m in all_metrics])
print('\nDone! Summary:')
print(df[['model','throughput_docs_s','latency_ms_mean','recall@5','mrr']].to_string(index=False))
In [18]:
# ── 2-column Matplotlib Dashboard (2 charts per row) ────────────────
model_names = df['model'].tolist()
colors_palette = ['#58a6ff', '#3fb950', '#a371f7', '#f0883e', '#79c0ff'][:len(model_names)]
fig, axes = plt.subplots(5, 2, figsize=(16, 28))
fig.patch.set_facecolor('#0d1117')
# R1C1: Throughput
ax = axes[0, 0]
bars = ax.bar(model_names, df['throughput_docs_s'], color=colors_palette, alpha=0.8, edgecolor='white', linewidth=1.5)
ax.set_title('Throughput (docs/sec) ↑', fontsize=12, fontweight='bold', pad=10)
ax.set_ylabel('docs/sec', fontsize=10)
for bar in bars:
height = bar.get_height()
ax.text(bar.get_x() + bar.get_width()/2., height,
f'{height:.1f}', ha='center', va='bottom', fontsize=9, color='#e6edf3')
ax.set_facecolor('#161b22')
ax.grid(axis='y', alpha=0.3, color='#30363d')
ax.tick_params(axis='x', rotation=45, labelsize=10)
# R1C2: MRR
ax = axes[0, 1]
bars = ax.bar(model_names, df['mrr'], color=colors_palette, alpha=0.8, edgecolor='white', linewidth=1.5)
ax.set_title('Mean Reciprocal Rank (MRR) ↑', fontsize=12, fontweight='bold', pad=10)
ax.set_ylabel('MRR', fontsize=10)
for bar in bars:
height = bar.get_height()
ax.text(bar.get_x() + bar.get_width()/2., height,
f'{height:.3f}', ha='center', va='bottom', fontsize=9, color='#e6edf3')
ax.set_facecolor('#161b22')
ax.grid(axis='y', alpha=0.3, color='#30363d')
ax.tick_params(axis='x', rotation=45, labelsize=10)
# R2C1: Latency (Mean, p95, p99)
ax = axes[1, 0]
x = np.arange(len(model_names))
width = 0.25
ax.bar(x - width, df['latency_ms_mean'], width, label='Mean', color=colors_palette, alpha=1.0, edgecolor='white', linewidth=0.5)
ax.bar(x, df['latency_ms_p95'], width, label='p95', color=colors_palette, alpha=0.65, edgecolor='white', linewidth=0.5)
ax.bar(x + width, df['latency_ms_p99'], width, label='p99', color=colors_palette, alpha=0.4, edgecolor='white', linewidth=0.5)
ax.set_title('Query Latency mean/p95/p99 (ms) ↓', fontsize=12, fontweight='bold', pad=10)
ax.set_ylabel('Latency (ms)', fontsize=10)
ax.set_xticks(x)
ax.set_xticklabels(model_names, rotation=45, fontsize=10)
ax.legend(fontsize=8, loc='upper right')
ax.set_facecolor('#161b22')
ax.grid(axis='y', alpha=0.3, color='#30363d')
# R2C2: Recall (R@1, R@3, R@5)
ax = axes[1, 1]
x = np.arange(len(model_names))
ax.bar(x - width, df['recall@1'], width, label='R@1', color=colors_palette, alpha=1.0, edgecolor='white', linewidth=0.5)
ax.bar(x, df['recall@3'], width, label='R@3', color=colors_palette, alpha=0.65, edgecolor='white', linewidth=0.5)
ax.bar(x + width, df['recall@5'], width, label='R@5', color=colors_palette, alpha=0.4, edgecolor='white', linewidth=0.5)
ax.set_title('Recall@1 / @3 / @5 ↑', fontsize=12, fontweight='bold', pad=10)
ax.set_ylabel('Recall', fontsize=10)
ax.set_xticks(x)
ax.set_xticklabels(model_names, rotation=45, fontsize=10)
ax.legend(fontsize=8, loc='lower right')
ax.set_facecolor('#161b22')
ax.grid(axis='y', alpha=0.3, color='#30363d')
# R3C1: Embedding Dimension
ax = axes[2, 0]
bars = ax.bar(model_names, df['embedding_dim'], color=colors_palette, alpha=0.8, edgecolor='white', linewidth=1.5)
ax.set_title('Embedding Dimension', fontsize=12, fontweight='bold', pad=10)
ax.set_ylabel('Dimension', fontsize=10)
for bar in bars:
height = bar.get_height()
ax.text(bar.get_x() + bar.get_width()/2., height,
f'{int(height)}', ha='center', va='bottom', fontsize=9, color='#e6edf3')
ax.set_facecolor('#161b22')
ax.grid(axis='y', alpha=0.3, color='#30363d')
ax.tick_params(axis='x', rotation=45, labelsize=10)
# R3C2: Cosine Distribution (Box plot)
ax = axes[2, 1]
cosine_data = [m['cosine_scores'] for m in all_metrics]
bp = ax.boxplot(cosine_data, patch_artist=True, widths=0.6)
for patch, color in zip(bp['boxes'], colors_palette):
patch.set_facecolor(color)
patch.set_alpha(0.7)
for whisker in bp['whiskers']:
whisker.set_color('#e6edf3')
for median in bp['medians']:
median.set_color('#e6edf3')
median.set_linewidth(2)
ax.set_title('Cosine Score Distribution', fontsize=12, fontweight='bold', pad=10)
ax.set_ylabel('Cosine Scores', fontsize=10)
ax.set_xticks(range(1, len(model_names) + 1))
ax.set_xticklabels(model_names, rotation=45, fontsize=10)
ax.set_facecolor('#161b22')
ax.grid(axis='y', alpha=0.3, color='#30363d')
# R4C1: Index Memory
ax = axes[3, 0]
bars = ax.bar(model_names, df['memory_mb'], color=colors_palette, alpha=0.8, edgecolor='white', linewidth=1.5)
ax.set_title('Index Memory (MB) ↓', fontsize=12, fontweight='bold', pad=10)
ax.set_ylabel('Memory (MB)', fontsize=10)
for bar in bars:
height = bar.get_height()
ax.text(bar.get_x() + bar.get_width()/2., height,
f'{height:.1f}', ha='center', va='bottom', fontsize=9, color='#e6edf3')
ax.set_facecolor('#161b22')
ax.grid(axis='y', alpha=0.3, color='#30363d')
ax.tick_params(axis='x', rotation=45, labelsize=10)
# R4C2: Model Load Time
ax = axes[3, 1]
bars = ax.bar(model_names, df['load_time_s'], color=colors_palette, alpha=0.8, edgecolor='white', linewidth=1.5)
ax.set_title('Model Load Time (s) ↓', fontsize=12, fontweight='bold', pad=10)
ax.set_ylabel('Load Time (s)', fontsize=10)
for bar in bars:
height = bar.get_height()
ax.text(bar.get_x() + bar.get_width()/2., height,
f'{height:.2f}', ha='center', va='bottom', fontsize=9, color='#e6edf3')
ax.set_facecolor('#161b22')
ax.grid(axis='y', alpha=0.3, color='#30363d')
ax.tick_params(axis='x', rotation=45, labelsize=10)
# R5C1: Average Top-5 Cosine
ax = axes[4, 0]
bars = ax.bar(model_names, df['avg_top_cosine'], color=colors_palette, alpha=0.8, edgecolor='white', linewidth=1.5)
ax.set_title('Avg Top-5 Cosine ↑', fontsize=12, fontweight='bold', pad=10)
ax.set_ylabel('Cosine Score', fontsize=10)
for bar in bars:
height = bar.get_height()
ax.text(bar.get_x() + bar.get_width()/2., height,
f'{height:.3f}', ha='center', va='bottom', fontsize=9, color='#e6edf3')
ax.set_facecolor('#161b22')
ax.grid(axis='y', alpha=0.3, color='#30363d')
ax.tick_params(axis='x', rotation=45, labelsize=10)
# R5C2: Empty (hide)
axes[4, 1].axis('off')
fig.suptitle('Embedding Model Comparison Dashboard\nBEIR trec-covid · 500 docs · 5 queries · 10 latency runs',
fontsize=16, fontweight='bold', y=0.995, color='#e6edf3')
plt.tight_layout(rect=[0, 0, 1, 0.99])
plt.subplots_adjust(hspace=0.5)
plt.show()
In [16]:
# ── Radar chart — overall profile per model ──────────────────────────
import numpy as np
from math import pi
def minmax_norm(s, invert=False):
lo, hi = s.min(), s.max()
if hi == lo: return [0.5]*len(s)
n = (s - lo)/(hi - lo)
return (1-n).tolist() if invert else n.tolist()
axes_dict = {
'Throughput': minmax_norm(df['throughput_docs_s']),
'Low Latency': minmax_norm(df['latency_ms_mean'], invert=True),
'Recall@5': minmax_norm(df['recall@5']),
'MRR': minmax_norm(df['mrr']),
'Cosine Qual': minmax_norm(df['avg_top_cosine']),
'Low Memory': minmax_norm(df['memory_mb'], invert=True),
}
cats = list(axes_dict.keys())
num_vars = len(cats)
# Compute angle for each axis
angles = [n / float(num_vars) * 2 * pi for n in range(num_vars)]
angles += angles[:1]
# Create polar subplot
fig, ax = plt.subplots(figsize=(12, 10), subplot_kw=dict(projection='polar'))
fig.patch.set_facecolor('#0d1117')
ax.set_facecolor('#161b22')
colors = ['#58a6ff', '#3fb950', '#a371f7', '#f0883e', '#79c0ff'][:len(model_names)]
for i, mn in enumerate(model_names):
vals = [axes_dict[a][i] for a in cats]
vals += vals[:1]
ax.plot(angles, vals, 'o-', linewidth=2.5, label=mn, color=colors[i])
ax.fill(angles, vals, alpha=0.25, color=colors[i])
ax.set_xticks(angles[:-1])
ax.set_xticklabels(cats, size=11, color='#e6edf3')
ax.set_ylim(0, 1)
ax.set_yticks([0.2, 0.4, 0.6, 0.8, 1.0])
ax.set_yticklabels(['0.2', '0.4', '0.6', '0.8', '1.0'], size=8, color='#a0aec0')
ax.grid(True, color='#30363d', alpha=0.3)
plt.legend(loc='upper right', bbox_to_anchor=(1.3, 1.1), fontsize=10, framealpha=0.9)
plt.title('Overall Model Profile\nAll metrics normalised 0–1 (higher = better on every axis)',
fontsize=14, fontweight='bold', color='#e6edf3', pad=20)
plt.tight_layout()
plt.show()
In [13]:
# ── Latency vs Recall@5 bubble chart — the sweet-spot view ───────────
fig, ax = plt.subplots(figsize=(12, 7))
fig.patch.set_facecolor('#0d1117')
ax.set_facecolor('#161b22')
colors = ['#58a6ff', '#3fb950', '#a371f7', '#f0883e', '#79c0ff'][:len(model_names)]
sizes = (df['throughput_docs_s'] / df['throughput_docs_s'].max()) * 1000 # Scale bubble sizes
scatter = ax.scatter(df['latency_ms_mean'], df['recall@5'], s=sizes, c=colors, alpha=0.6,
edgecolors='white', linewidth=2)
# Add model name labels
for idx, row in df.iterrows():
ax.annotate(row['model'],
xy=(row['latency_ms_mean'], row['recall@5']),
xytext=(5, 5), textcoords='offset points',
fontsize=10, color='#e6edf3', fontweight='bold')
ax.set_xlabel('Query Latency mean (ms) ↓', fontsize=12, fontweight='bold', color='#e6edf3')
ax.set_ylabel('Recall@5 ↑', fontsize=12, fontweight='bold', color='#e6edf3')
ax.set_title('Latency vs Recall@5 (bubble size = throughput docs/sec)\nIdeal: bottom-right — low latency, high recall',
fontsize=13, fontweight='bold', color='#e6edf3', pad=15)
ax.grid(True, alpha=0.2, color='#30363d')
ax.tick_params(colors='#e6edf3', labelsize=10)
# Add legend for bubble sizes
legend_sizes = [df['throughput_docs_s'].min(), df['throughput_docs_s'].median(), df['throughput_docs_s'].max()]
legend_bubbles = []
for size in legend_sizes:
legend_bubbles.append(ax.scatter([], [], s=(size/df['throughput_docs_s'].max())*1000,
c='#e6edf3', alpha=0.6, edgecolors='white', linewidth=1.5))
ax.legend(legend_bubbles, [f'{s:.0f} docs/s' for s in legend_sizes],
scatterpoints=1, frameon=True, labelspacing=2, title='Throughput',
loc='lower left', fontsize=9, title_fontsize=10, framealpha=0.9)
plt.tight_layout()
plt.show()
In [20]:
# ── Summary table + winners ──────────────────────────────────────────
summary = df[['model','embedding_dim','throughput_docs_s','latency_ms_mean',
'latency_ms_p95','recall@1','recall@3','recall@5','mrr',
'avg_top_cosine','memory_mb','load_time_s']].copy()
summary.columns = ['Model','Dim','Thru\n(docs/s)','Latency\nMean(ms)','Latency\np95(ms)',
'R@1','R@3','R@5','MRR','AvgCos','Mem\n(MB)','Load\n(s)']
# Create figure for table
fig, ax = plt.subplots(figsize=(16, 6))
fig.patch.set_facecolor('#0d1117')
ax.axis('tight')
ax.axis('off')
# Convert to list format for table
table_data = [summary.columns.tolist()]
for idx, row in summary.iterrows():
table_data.append(row.tolist())
# Create table
table = ax.table(cellText=table_data, cellLoc='center', loc='center',
colWidths=[0.10, 0.08, 0.10, 0.10, 0.10, 0.08, 0.08, 0.08, 0.08, 0.10, 0.08, 0.08])
# Style the table
table.auto_set_font_size(False)
table.set_fontsize(10)
table.scale(1, 2.5)
# Color header row
for i in range(len(summary.columns)):
cell = table[(0, i)]
cell.set_facecolor('#3fb950')
cell.set_text_props(weight='bold', color='#0d1117')
# Color data rows alternately
colors_alt = ['#161b22', '#1c2128']
for i in range(1, len(table_data)):
for j in range(len(summary.columns)):
cell = table[(i, j)]
cell.set_facecolor(colors_alt[i % 2])
cell.set_text_props(color='#e6edf3')
cell.set_edgecolor('#30363d')
plt.title('EMBEDDING MODEL COMPARISON — FINAL SUMMARY',
fontsize=14, fontweight='bold', color='#e6edf3', pad=20)
plt.tight_layout()
plt.show()
# Create Winners table
winners_data = [['Metric', 'Winner Model', 'Score']]
for metric, col, better in [
('🚀 Throughput','throughput_docs_s','max'),
('⚡ Latency','latency_ms_mean','min'),
('🎯 Recall@5','recall@5','max'),
('📊 MRR','mrr','max'),
('✨ Cosine Quality','avg_top_cosine','max'),
('💾 Low Memory','memory_mb','min'),
]:
idx = df[col].idxmax() if better=='max' else df[col].idxmin()
value = df.loc[idx, col]
model = df.loc[idx, 'model']
winners_data.append([metric, model, f'{value}'])
# Create figure for winners table
fig, ax = plt.subplots(figsize=(12, 5))
fig.patch.set_facecolor('#0d1117')
ax.axis('tight')
ax.axis('off')
# Create winners table
winners_table = ax.table(cellText=winners_data, cellLoc='center', loc='center',
colWidths=[0.35, 0.35, 0.30])
# Style winners table
winners_table.auto_set_font_size(False)
winners_table.set_fontsize(11)
winners_table.scale(1, 2.8)
# Color header row
for i in range(3):
cell = winners_table[(0, i)]
cell.set_facecolor('#a371f7')
cell.set_text_props(weight='bold', color='#0d1117', size=12)
# Color data rows with gradient effect
colors_winners = ['#161b22', '#1c2128']
for i in range(1, len(winners_data)):
for j in range(3):
cell = winners_table[(i, j)]
cell.set_facecolor(colors_winners[i % 2])
cell.set_text_props(color='#e6edf3', weight='bold' if j == 1 else 'normal')
cell.set_edgecolor('#30363d')
plt.title('WINNERS BY METRIC',
fontsize=14, fontweight='bold', color='#e6edf3', pad=20)
plt.tight_layout()
plt.show()