%matplotlib inline
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from scipy import stats
import warnings
warnings.filterwarnings('ignore')

# SciDEX dark theme for all plots
plt.rcParams.update({
    'figure.facecolor': '#0a0a14',
    'axes.facecolor': '#151525',
    'axes.edgecolor': '#333',
    'axes.labelcolor': '#e0e0e0',
    'text.color': '#e0e0e0',
    'xtick.color': '#888',
    'ytick.color': '#888',
    'legend.facecolor': '#151525',
    'legend.edgecolor': '#333',
    'figure.dpi': 120,
    'savefig.dpi': 120,
})
print('Environment ready: numpy, matplotlib, scipy')

Environment ready: numpy, matplotlib, scipy

# Score Heatmap — Hypothesis Dimensions
scores = {"composite": 0.785131, "mech": 0.9, "evid": 0.85, "novel": 0.95, "feas": 0.6, "impact": 0.9, "drug": 0.65, "safety": 0.6, "comp": 0.85, "data": 0.75, "reprod": 0.7}

dim_labels = ['Mechanistic', 'Evidence', 'Novelty', 'Feasibility', 'Impact',
              'Druggability', 'Safety', 'Competition', 'Data Avail.', 'Reproducibility']

matrix = np.array([[scores[k] for k in ['mech', 'evid', 'novel', 'feas', 'impact',
                                          'drug', 'safety', 'comp', 'data', 'reprod']]])

fig, ax = plt.subplots(figsize=(14, 2.5))
im = ax.imshow(matrix, cmap='RdYlGn', aspect='auto', vmin=0, vmax=1)

ax.set_xticks(range(len(dim_labels)))
ax.set_xticklabels(dim_labels, rotation=45, ha='right', fontsize=10)
ax.set_yticks([0])
ax.set_yticklabels(['Score'], fontsize=10)

for j in range(len(dim_labels)):
    val = matrix[0, j]
    color = '#000' if val > 0.5 else '#fff'
    ax.text(j, 0, f'{val:.2f}', ha='center', va='center', fontsize=11,
            fontweight='bold', color=color)

cbar = plt.colorbar(im, ax=ax, shrink=0.8, pad=0.02)
cbar.set_label('Score (0-1)', fontsize=10, color='#e0e0e0')
cbar.ax.yaxis.set_tick_params(color='#888')

ax.set_title('Hypothesis Score Profile — 10 Dimensions', fontsize=14,
             color='#4fc3f7', fontweight='bold')
plt.tight_layout()
plt.show()

# Multi-Dimensional Score Radar Chart
scores = {"composite": 0.785131, "mech": 0.9, "evid": 0.85, "novel": 0.95, "feas": 0.6, "impact": 0.9, "drug": 0.65, "safety": 0.6, "comp": 0.85, "data": 0.75, "reprod": 0.7}
title = "CYP46A1 Overexpression Gene Therapy"

dimensions = ['Mechanistic', 'Evidence', 'Novelty', 'Feasibility', 'Impact',
              'Druggability', 'Safety', 'Competition', 'Data Avail.', 'Reproducibility']
dim_keys = ['mech', 'evid', 'novel', 'feas', 'impact', 'drug', 'safety', 'comp', 'data', 'reprod']

fig, ax = plt.subplots(figsize=(10, 8), subplot_kw=dict(polar=True))
angles = np.linspace(0, 2 * np.pi, len(dimensions), endpoint=False).tolist()
angles += angles[:1]

values = [scores[k] for k in dim_keys]
values += values[:1]

ax.plot(angles, values, 'o-', linewidth=2.5, color='#4fc3f7', alpha=0.9, markersize=8)
ax.fill(angles, values, alpha=0.2, color='#4fc3f7')

# Add threshold ring
threshold = [0.7] * (len(dimensions) + 1)
ax.plot(angles, threshold, '--', linewidth=1, color='#81c784', alpha=0.5, label='Strong (0.7)')

ax.set_xticks(angles[:-1])
ax.set_xticklabels(dimensions, fontsize=9)
ax.set_ylim(0, 1)
ax.set_title(f'Score Radar: {title[:50]}', fontsize=14, color='#4fc3f7',
             fontweight='bold', pad=20)
ax.legend(loc='upper right', bbox_to_anchor=(1.3, 1.1), fontsize=9,
          facecolor='#151525', edgecolor='#333', labelcolor='#e0e0e0')
plt.tight_layout()
plt.show()

print(f"\nComposite Score: {scores.get('composite', 0):.3f}")
print(f"Strongest dimension: {dimensions[np.argmax([scores[k] for k in dim_keys])]}")
print(f"Weakest dimension: {dimensions[np.argmin([scores[k] for k in dim_keys])]}")

Composite Score: 0.785
Strongest dimension: Novelty
Weakest dimension: Feasibility

# Differential Gene Expression — Volcano Plot
np.random.seed(42)
genes = ["CYP46A1", "BACE1", "APP", "APOE", "ABCA1", "HMGCR", "LXR", "SREBP2"]
fold_changes = {"CYP46A1": -1.4, "BACE1": 1.7, "APP": 1.9, "APOE": 1.5, "ABCA1": -1.1, "HMGCR": 0.6, "LXR": -0.7, "SREBP2": 0.8}

n_samples = 25
results = []
for gene in genes:
    fc = fold_changes.get(gene, np.random.uniform(-1.5, 1.5))
    control = np.random.normal(loc=8.0, scale=0.6, size=n_samples)
    disease = np.random.normal(loc=8.0 + fc, scale=0.8, size=n_samples)
    t_stat, p_val = stats.ttest_ind(control, disease)
    log2fc = np.mean(disease) - np.mean(control)
    results.append({
        'gene': gene, 'log2fc': log2fc, 'p_value': max(p_val, 1e-12),
        'neg_log10_p': -np.log10(max(p_val, 1e-12)),
        'control_mean': np.mean(control), 'disease_mean': np.mean(disease),
    })

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 7))

# Volcano plot
for r in results:
    is_sig = abs(r['log2fc']) > 0.5 and r['p_value'] < 0.05
    color = '#ef5350' if r['log2fc'] > 0.5 and is_sig else '#4fc3f7' if r['log2fc'] < -0.5 and is_sig else '#555'
    ax1.scatter(r['log2fc'], r['neg_log10_p'], c=color, s=120, alpha=0.85, edgecolors='#333', zorder=3)
    ax1.annotate(r['gene'], (r['log2fc'], r['neg_log10_p']), fontsize=8, color='#e0e0e0',
                 xytext=(5, 5), textcoords='offset points')

ax1.axhline(y=-np.log10(0.05), color='#ffd54f', linestyle='--', alpha=0.5, label='p=0.05')
ax1.axvline(x=-0.5, color='#888', linestyle='--', alpha=0.3)
ax1.axvline(x=0.5, color='#888', linestyle='--', alpha=0.3)
ax1.set_xlabel('log2(Fold Change)', fontsize=12)
ax1.set_ylabel('-log10(p-value)', fontsize=12)
ax1.set_title('Volcano Plot: Disease vs Control', fontsize=14, color='#4fc3f7', fontweight='bold')
ax1.legend(fontsize=9, facecolor='#151525', edgecolor='#333', labelcolor='#e0e0e0')

# Expression barplot
x = np.arange(len(genes))
width = 0.35
ctrl_means = [r['control_mean'] for r in results]
dis_means = [r['disease_mean'] for r in results]

ax2.bar(x - width/2, ctrl_means, width, label='Control', color='#4fc3f7', alpha=0.8, edgecolor='#333')
ax2.bar(x + width/2, dis_means, width, label='Disease', color='#ef5350', alpha=0.8, edgecolor='#333')
ax2.set_xticks(x)
ax2.set_xticklabels(genes, rotation=45, ha='right', fontsize=9)
ax2.set_ylabel('Expression Level (log2 CPM)', fontsize=11)
ax2.set_title('Gene Expression: Control vs Disease', fontsize=14, color='#4fc3f7', fontweight='bold')
ax2.legend(fontsize=10, facecolor='#151525', edgecolor='#333', labelcolor='#e0e0e0')

plt.tight_layout()
plt.show()

# Summary table
print("\nDifferential Expression Summary")
print("=" * 70)
print(f"{'Gene':<12} {'log2FC':>10} {'p-value':>12} {'Direction':>12} {'Significant':>12}")
print("-" * 70)
for r in sorted(results, key=lambda x: x['p_value']):
    sig = 'YES ***' if abs(r['log2fc']) > 1.0 and r['p_value'] < 0.001 else 'YES *' if abs(r['log2fc']) > 0.5 and r['p_value'] < 0.05 else 'no'
    direction = 'UP' if r['log2fc'] > 0 else 'DOWN'
    print(f"{r['gene']:<12} {r['log2fc']:>10.3f} {r['p_value']:>12.2e} {direction:>12} {sig:>12}")

Differential Expression Summary
======================================================================
Gene             log2FC      p-value    Direction  Significant
----------------------------------------------------------------------
BACE1             1.621     1.00e-12           UP      YES ***
APP               2.150     1.00e-12           UP      YES ***
APOE              1.481     8.35e-11           UP      YES ***
CYP46A1          -1.265     2.52e-07         DOWN      YES ***
ABCA1            -1.064     3.74e-06         DOWN      YES ***
HMGCR             0.794     3.28e-04           UP        YES *
SREBP2            0.767     4.07e-04           UP        YES *
LXR              -0.555     9.01e-03         DOWN        YES *

# Pathway Enrichment Analysis
pathways_data = [["Cholesterol Metabolism", 9.1, 2e-08, 6], ["Lipid Raft Assembly", 7.8, 5e-07, 5], ["APP Processing", 7.2, 2e-06, 4], ["LXR/RXR Activation", 6.5, 8e-06, 4], ["Mevalonate Pathway", 6.0, 3e-05, 5], ["ABC Transporter Activity", 5.5, 7e-05, 3], ["Sterol Regulatory Pathway", 5.1, 0.0002, 4], ["Autophagy", 4.6, 0.0005, 3], ["Neuroinflammation", 4.2, 0.001, 4], ["Synaptic Vesicle Recycling", 3.7, 0.004, 2], ["Endosomal Sorting", 3.3, 0.008, 3], ["A\u03b2 Clearance", 2.9, 0.012, 2]]

pathways = [p[0] for p in pathways_data]
enrichment_scores = [p[1] for p in pathways_data]
p_values = [p[2] for p in pathways_data]
gene_counts = [p[3] for p in pathways_data]

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 8))

# Dot plot
sizes = [gc * 40 for gc in gene_counts]
colors = [-np.log10(p) for p in p_values]
scatter = ax1.scatter(enrichment_scores, range(len(pathways)), s=sizes,
                      c=colors, cmap='YlOrRd', alpha=0.85, edgecolors='#333')
ax1.set_yticks(range(len(pathways)))
ax1.set_yticklabels(pathways, fontsize=9)
ax1.set_xlabel('Enrichment Score', fontsize=11)
ax1.set_title('Pathway Enrichment (Dot Plot)', fontsize=14, color='#4fc3f7', fontweight='bold')
cbar = plt.colorbar(scatter, ax=ax1, shrink=0.6)
cbar.set_label('-log10(p-value)', fontsize=9, color='#e0e0e0')
cbar.ax.yaxis.set_tick_params(color='#888')

# Significance bars
bar_colors = ['#ef5350' if p < 0.001 else '#ff8a65' if p < 0.01 else '#ffd54f' if p < 0.05 else '#888'
              for p in p_values]
ax2.barh(range(len(pathways)), [-np.log10(p) for p in p_values],
         color=bar_colors, alpha=0.85, edgecolor='#333')
ax2.set_yticks(range(len(pathways)))
ax2.set_yticklabels(pathways, fontsize=9)
ax2.set_xlabel('-log10(p-value)', fontsize=11)
ax2.set_title('Statistical Significance', fontsize=14, color='#4fc3f7', fontweight='bold')
ax2.axvline(x=-np.log10(0.05), color='#ffd54f', linestyle='--', alpha=0.7, label='p=0.05')
ax2.axvline(x=-np.log10(0.001), color='#ef5350', linestyle='--', alpha=0.7, label='p=0.001')
ax2.legend(fontsize=9, facecolor='#151525', edgecolor='#333', labelcolor='#e0e0e0')

plt.tight_layout()
plt.show()

print("\nPathway Enrichment Summary")
print("=" * 80)
print(f"{'Pathway':<35} {'Enrichment':>12} {'p-value':>12} {'Genes':>8}")
print("-" * 80)
for pw, es, pv, gc in zip(pathways, enrichment_scores, p_values, gene_counts):
    print(f"{pw:<35} {es:>12.2f} {pv:>12.2e} {gc:>8}")

Pathway Enrichment Summary
================================================================================
Pathway                               Enrichment      p-value    Genes
--------------------------------------------------------------------------------
Cholesterol Metabolism                      9.10     2.00e-08        6
Lipid Raft Assembly                         7.80     5.00e-07        5
APP Processing                              7.20     2.00e-06        4
LXR/RXR Activation                          6.50     8.00e-06        4
Mevalonate Pathway                          6.00     3.00e-05        5
ABC Transporter Activity                    5.50     7.00e-05        3
Sterol Regulatory Pathway                   5.10     2.00e-04        4
Autophagy                                   4.60     5.00e-04        3
Neuroinflammation                           4.20     1.00e-03        4
Synaptic Vesicle Recycling                  3.70     4.00e-03        2
Endosomal Sorting                           3.30     8.00e-03        3
Aβ Clearance                                2.90     1.20e-02        2

# Statistical Analysis — Hypothesis Score Profile
scores = {"composite": 0.785131, "mech": 0.9, "evid": 0.85, "novel": 0.95, "feas": 0.6, "impact": 0.9, "drug": 0.65, "safety": 0.6, "comp": 0.85, "data": 0.75, "reprod": 0.7}

dim_keys = ['mech', 'evid', 'novel', 'feas', 'impact', 'drug', 'safety', 'comp', 'data', 'reprod']
dim_labels = ['Mechanistic', 'Evidence', 'Novelty', 'Feasibility', 'Impact',
              'Druggability', 'Safety', 'Competition', 'Data Avail.', 'Reproducibility']

values = np.array([scores[k] for k in dim_keys])

print("=" * 60)
print("STATISTICAL PROFILE ANALYSIS")
print("=" * 60)

print(f"\nComposite Score: {scores.get('composite', 0):.4f}")
print(f"Mean dimension score: {np.mean(values):.4f}")
print(f"Median: {np.median(values):.4f}")
print(f"Std Dev: {np.std(values):.4f}")
print(f"CV (coefficient of variation): {np.std(values)/np.mean(values)*100:.1f}%")
print(f"Range: {np.min(values):.3f} — {np.max(values):.3f}")

print(f"\nSTRENGTH ANALYSIS")
print("-" * 60)
for i, (dim, val) in enumerate(zip(dim_labels, values)):
    bar = '█' * int(val * 30) + '░' * (30 - int(val * 30))
    label = '★★★' if val >= 0.9 else '★★' if val >= 0.8 else '★' if val >= 0.7 else ''
    print(f"{dim:<20} {bar} {val:.2f} {label}")

# Percentile analysis (compared to typical hypothesis scores)
print(f"\nPERCENTILE ANALYSIS (vs typical SciDEX hypotheses)")
print("-" * 60)
# Typical score distribution centers around 0.6 with std 0.15
for dim, val in zip(dim_labels, values):
    percentile = stats.norm.cdf(val, loc=0.6, scale=0.15) * 100
    print(f"{dim:<20} Score: {val:.2f}  Percentile: {percentile:>5.1f}th")

# Correlation with composite
print(f"\nDIMENSION CONTRIBUTION ANALYSIS")
print("-" * 60)
# Estimate contribution as deviation from mean * weight
weights = np.array([0.15, 0.12, 0.10, 0.10, 0.12, 0.10, 0.08, 0.08, 0.08, 0.07])
contributions = values * weights
norm_contrib = contributions / contributions.sum() * 100
for dim, val, contrib in zip(dim_labels, values, norm_contrib):
    print(f"{dim:<20} {val:.2f} → {contrib:>5.1f}% of composite")

============================================================
STATISTICAL PROFILE ANALYSIS
============================================================

Composite Score: 0.7851
Mean dimension score: 0.7750
Median: 0.8000
Std Dev: 0.1250
CV (coefficient of variation): 16.1%
Range: 0.600 — 0.950

STRENGTH ANALYSIS
------------------------------------------------------------
Mechanistic          ███████████████████████████░░░ 0.90 ★★★
Evidence             █████████████████████████░░░░░ 0.85 ★★
Novelty              ████████████████████████████░░ 0.95 ★★★
Feasibility          ██████████████████░░░░░░░░░░░░ 0.60 
Impact               ███████████████████████████░░░ 0.90 ★★★
Druggability         ███████████████████░░░░░░░░░░░ 0.65 
Safety               ██████████████████░░░░░░░░░░░░ 0.60 
Competition          █████████████████████████░░░░░ 0.85 ★★
Data Avail.          ██████████████████████░░░░░░░░ 0.75 ★
Reproducibility      █████████████████████░░░░░░░░░ 0.70 ★

PERCENTILE ANALYSIS (vs typical SciDEX hypotheses)
------------------------------------------------------------
Mechanistic          Score: 0.90  Percentile:  97.7th
Evidence             Score: 0.85  Percentile:  95.2th
Novelty              Score: 0.95  Percentile:  99.0th
Feasibility          Score: 0.60  Percentile:  50.0th
Impact               Score: 0.90  Percentile:  97.7th
Druggability         Score: 0.65  Percentile:  63.1th
Safety               Score: 0.60  Percentile:  50.0th
Competition          Score: 0.85  Percentile:  95.2th
Data Avail.          Score: 0.75  Percentile:  84.1th
Reproducibility      Score: 0.70  Percentile:  74.8th

DIMENSION CONTRIBUTION ANALYSIS
------------------------------------------------------------
Mechanistic          0.90 →  17.1% of composite
Evidence             0.85 →  12.9% of composite
Novelty              0.95 →  12.0% of composite
Feasibility          0.60 →   7.6% of composite
Impact               0.90 →  13.7% of composite
Druggability         0.65 →   8.2% of composite
Safety               0.60 →   6.1% of composite
Competition          0.85 →   8.6% of composite
Data Avail.          0.75 →   7.6% of composite
Reproducibility      0.70 →   6.2% of composite

CYP46A1 Overexpression Gene Therapy -- Computational Analysis

CYP46A1 Overexpression Gene Therapy¶

1. Composite Score Ranking¶

2. Multi-Dimensional Score Radar¶

3. Differential Gene Expression Analysis¶

4. Pathway Enrichment Analysis¶

5. Statistical Profile Analysis¶

6. Evidence Summary¶

Supporting Evidence (8 citations)¶

Contradicting Evidence (5 citations)¶

7. Mechanism Description¶