import json, sys, sqlite3
from pathlib import Path
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
matplotlib.rcParams['figure.dpi'] = 110
matplotlib.rcParams['figure.facecolor'] = 'white'

REPO = Path('.').resolve()
sys.path.insert(0, str(REPO))

CACHE_SUB = 'seaad'
CACHE = REPO / 'data' / 'forge_cache' / CACHE_SUB

def load(name):
    p = CACHE / f'{name}.json'
    if p.exists():
        return json.loads(p.read_text())
    return {}

db_path = Path('/home/ubuntu/scidex/scidex.db')
try:
    db = sqlite3.connect(str(db_path))
    prov = pd.read_sql_query('''
        SELECT skill_id, status, COUNT(*) AS n_calls,
               ROUND(AVG(duration_ms),0) AS mean_ms
        FROM tool_calls
        WHERE created_at >= date('now','-30 days')
        GROUP BY skill_id, status
        ORDER BY n_calls DESC
    ''', db)
    db.close()
    prov['tool'] = prov['skill_id'].str.replace('tool_', '', regex=False)
    print(f'{len(prov)} tool-call aggregates (last 30 days):')
    prov[['tool','status','n_calls','mean_ms']].head(20)
except Exception as e:
    print(f'Provenance unavailable: {e}')

181 tool-call aggregates (last 30 days):

ann_rows = []
for g in ['APOE', 'BAX', 'BDNF', 'NAMPT']:
    mg = load(f'mygene_{g}')
    hpa = load(f'hpa_{g}')
    if not mg and not hpa:
        ann_rows.append({'gene': g, 'name': '—', 'protein_class': '—',
                         'disease_involvement': '—'})
        continue
    ann_rows.append({
        'gene': g,
        'name': (mg.get('name') or '')[:55],
        'protein_class': ', '.join((hpa.get('protein_class') or [])[:2])[:55]
                        if isinstance(hpa.get('protein_class'), list)
                        else str(hpa.get('protein_class') or '—')[:55],
        'disease_involvement': ', '.join((hpa.get('disease_involvement') or [])[:2])[:55]
                              if isinstance(hpa.get('disease_involvement'), list)
                              else str(hpa.get('disease_involvement') or '')[:55],
    })
pd.DataFrame(ann_rows)

go_bp = load('enrichr_GO_Biological_Process')
if isinstance(go_bp, list) and go_bp:
    go_df = pd.DataFrame(go_bp[:10])[['term','p_value','odds_ratio','genes']]
    go_df['p_value'] = go_df['p_value'].apply(lambda p: f'{p:.2e}')
    go_df['odds_ratio'] = go_df['odds_ratio'].round(1)
    go_df['term'] = go_df['term'].str[:60]
    go_df['n_hits'] = go_df['genes'].apply(len)
    go_df['genes'] = go_df['genes'].apply(lambda g: ', '.join(g))
    go_df[['term','n_hits','p_value','odds_ratio','genes']]
else:
    print('No GO:BP enrichment data')

# Visualize top GO BP enrichment
go_bp = load('enrichr_GO_Biological_Process')
if isinstance(go_bp, list) and go_bp:
    top = go_bp[:8]
    terms = [t['term'][:45] for t in top][::-1]
    neglogp = [-np.log10(max(t['p_value'], 1e-300)) for t in top][::-1]
    fig, ax = plt.subplots(figsize=(9, 4.5))
    ax.barh(terms, neglogp, color='#4fc3f7')
    ax.set_xlabel('-log10(p-value)')
    ax.set_title('Top GO:BP enrichment (Enrichr)')
    ax.grid(axis='x', alpha=0.3)
    plt.tight_layout(); plt.show()
else:
    print('No GO:BP data to plot')

kegg = load('enrichr_KEGG_Pathways')
if isinstance(kegg, list) and kegg:
    kegg_df = pd.DataFrame(kegg[:10])[['term','p_value','odds_ratio','genes']]
    kegg_df['genes'] = kegg_df['genes'].apply(lambda g: ', '.join(g))
    kegg_df['p_value'] = kegg_df['p_value'].apply(lambda p: f'{p:.2e}')
    kegg_df['odds_ratio'] = kegg_df['odds_ratio'].round(1)
    kegg_df
else:
    print('No KEGG enrichment data')

No KEGG enrichment data

ppi = load('string_network')
if isinstance(ppi, list) and ppi:
    ppi_df = pd.DataFrame(ppi).sort_values('score', ascending=False)
    display_cols = [c for c in ['protein1','protein2','score','escore','tscore'] if c in ppi_df.columns]
    print(f'{len(ppi_df)} STRING edges')
    ppi_df[display_cols].head(20)
else:
    print('No STRING edges returned')

11 STRING edges

# Network figure
ppi = load('string_network')
if isinstance(ppi, list) and ppi:
    import math
    nodes = sorted({p for e in ppi for p in (e['protein1'], e['protein2'])})
    n = len(nodes)
    pos = {n_: (math.cos(2*math.pi*i/n), math.sin(2*math.pi*i/n)) for i, n_ in enumerate(nodes)}
    fig, ax = plt.subplots(figsize=(7, 7))
    for e in ppi:
        x1,y1 = pos[e['protein1']]; x2,y2 = pos[e['protein2']]
        ax.plot([x1,x2],[y1,y2], color='#888', alpha=0.3+0.5*e['score'],
                linewidth=0.5+2*e['score'])
    for name,(x,y) in pos.items():
        ax.scatter([x],[y], s=450, color='#ffd54f', edgecolors='#333', zorder=3)
        ax.annotate(name, (x,y), ha='center', va='center', fontsize=8, fontweight='bold', zorder=4)
    ax.set_aspect('equal'); ax.axis('off')
    ax.set_title(f'STRING PPI network ({len(ppi)} edges)')
    plt.tight_layout(); plt.show()
else:
    print('No STRING data to visualize')

pw_rows = []
for g in ['APOE', 'BAX', 'BDNF', 'NAMPT']:
    pws = load(f'reactome_{g}')
    if isinstance(pws, list):
        pw_rows.append({'gene': g, 'n_pathways': len(pws),
                        'top_pathway': (pws[0]['name'] if pws else '—')[:70]})
    else:
        pw_rows.append({'gene': g, 'n_pathways': 0, 'top_pathway': '—'})
pd.DataFrame(pw_rows).sort_values('n_pathways', ascending=False)

ish_rows = []
for g in ['APOE', 'BAX', 'BDNF', 'NAMPT']:
    ish = load(f'allen_ish_{g}')
    regions = ish.get('regions') or [] if isinstance(ish, dict) else []
    ish_rows.append({
        'gene': g,
        'n_ish_regions': len(regions),
        'top_region': (regions[0].get('structure','') if regions else '—')[:45],
        'top_energy': round(regions[0].get('expression_energy',0), 2) if regions else None,
    })
pd.DataFrame(ish_rows)

hyp_data = [('Metabolic Reprogramming to Reverse Senescence', 1.0), ('SASP Modulation Rather Than Cell Elimination', 0.981), ('Autophagy-Senescence Axis Therapeutic Window', 0.921), ('Oligodendrocyte Precursor Cell Senescence in White Matt', 0.769), ('Apoptosis-Senescence Decision Point Intervention', 0.649), ('APOE4-Driven Astrocyte Senescence as Primary Target', 0.629), ('Selective Microglial Senescence Targeting via TREM2 Mod', 0.459)]
titles = [h[0] for h in hyp_data][::-1]
scores = [h[1] for h in hyp_data][::-1]
fig, ax = plt.subplots(figsize=(10, max(8, len(titles)*0.4)))
colors = ['#ef5350' if s >= 0.6 else '#ffa726' if s >= 0.5 else '#66bb6a' for s in scores]
ax.barh(range(len(titles)), scores, color=colors)
ax.set_yticks(range(len(titles))); ax.set_yticklabels(titles, fontsize=7)
ax.set_xlabel('Composite Score'); ax.set_title('Senescent cell clearance as neurodegeneration therapy')
ax.grid(axis='x', alpha=0.3)
plt.tight_layout(); plt.show()

labels = ['Metabolic Reprogramming to Reverse Senes', 'SASP Modulation Rather Than Cell Elimina', 'Autophagy-Senescence Axis Therapeutic Wi', 'Oligodendrocyte Precursor Cell Senescenc', 'Apoptosis-Senescence Decision Point Inte', 'APOE4-Driven Astrocyte Senescence as Pri', 'Selective Microglial Senescence Targetin']
matrix = np.array([[0.6, 0.9, 0.82, 0.8, 0.72, 0.8, 0.8, 0.9, 0.9], [0.7, 0.8, 0.78, 0.7, 0.65, 0.7, 0.7, 0.8, 0.8], [0.8, 0.6, 0, 0.8, 0, 0.7, 0.6, 0.7, 0.7], [0.8, 0.5, 0, 0.6, 0, 0.4, 0.5, 0.6, 0.7], [0.8, 0.2, 0, 0.6, 0, 0.6, 0.4, 0.3, 0.4], [0.7, 0.4, 0, 0.4, 0, 0.4, 0.3, 0.5, 0.5], [0.6, 0.1, 0, 0.3, 0, 0.5, 0.2, 0.2, 0.1]])
dims = ['novelty_score', 'feasibility_score', 'impact_score', 'mechanistic_plausibility_score', 'clinical_relevance_score', 'data_availability_score', 'reproducibility_score', 'druggability_score', 'safety_profile_score']
if matrix.size:
    fig, ax = plt.subplots(figsize=(10, 5))
    im = ax.imshow(matrix, cmap='RdYlGn', aspect='auto', vmin=0, vmax=1)
    ax.set_xticks(range(len(dims)))
    ax.set_xticklabels([d.replace('_score','').replace('_',' ').title() for d in dims],
                       rotation=45, ha='right', fontsize=8)
    ax.set_yticks(range(len(labels))); ax.set_yticklabels(labels, fontsize=7)
    ax.set_title('Score dimensions — top hypotheses')
    plt.colorbar(im, ax=ax, shrink=0.8)
    plt.tight_layout(); plt.show()
else:
    print('No score data available')

hid = 'SDA-2026-04-16-hyp-e5bf6e0d'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
    lit = pd.DataFrame(papers)
    cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
    if cols:
        lit = lit[cols]
        lit['title'] = lit['title'].str[:80]
        if 'journal' in lit.columns:
            lit['journal'] = lit['journal'].str[:30]
        lit.sort_values('year', ascending=False, inplace=True)
        display_df = lit
    else:
        display_df = pd.DataFrame(papers[:5])
else:
    display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df

hid = 'SDA-2026-04-16-hyp-daadc5c6'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
    lit = pd.DataFrame(papers)
    cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
    if cols:
        lit = lit[cols]
        lit['title'] = lit['title'].str[:80]
        if 'journal' in lit.columns:
            lit['journal'] = lit['journal'].str[:30]
        lit.sort_values('year', ascending=False, inplace=True)
        display_df = lit
    else:
        display_df = pd.DataFrame(papers[:5])
else:
    display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df

hid = 'SDA-2026-04-16-hyp-4a6e22aa'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
    lit = pd.DataFrame(papers)
    cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
    if cols:
        lit = lit[cols]
        lit['title'] = lit['title'].str[:80]
        if 'journal' in lit.columns:
            lit['journal'] = lit['journal'].str[:30]
        lit.sort_values('year', ascending=False, inplace=True)
        display_df = lit
    else:
        display_df = pd.DataFrame(papers[:5])
else:
    display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df

hid = 'SDA-2026-04-16-hyp-b29129dc'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
    lit = pd.DataFrame(papers)
    cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
    if cols:
        lit = lit[cols]
        lit['title'] = lit['title'].str[:80]
        if 'journal' in lit.columns:
            lit['journal'] = lit['journal'].str[:30]
        lit.sort_values('year', ascending=False, inplace=True)
        display_df = lit
    else:
        display_df = pd.DataFrame(papers[:5])
else:
    display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df

hid = 'SDA-2026-04-16-hyp-a44afba2'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
    lit = pd.DataFrame(papers)
    cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
    if cols:
        lit = lit[cols]
        lit['title'] = lit['title'].str[:80]
        if 'journal' in lit.columns:
            lit['journal'] = lit['journal'].str[:30]
        lit.sort_values('year', ascending=False, inplace=True)
        display_df = lit
    else:
        display_df = pd.DataFrame(papers[:5])
else:
    display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df

hid = 'SDA-2026-04-16-hyp-96ec73b3'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
    lit = pd.DataFrame(papers)
    cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
    if cols:
        lit = lit[cols]
        lit['title'] = lit['title'].str[:80]
        if 'journal' in lit.columns:
            lit['journal'] = lit['journal'].str[:30]
        lit.sort_values('year', ascending=False, inplace=True)
        display_df = lit
    else:
        display_df = pd.DataFrame(papers[:5])
else:
    display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df

hid = 'SDA-2026-04-16-hyp-f460e747'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
    lit = pd.DataFrame(papers)
    cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
    if cols:
        lit = lit[cols]
        lit['title'] = lit['title'].str[:80]
        if 'journal' in lit.columns:
            lit['journal'] = lit['journal'].str[:30]
        lit.sort_values('year', ascending=False, inplace=True)
        display_df = lit
    else:
        display_df = pd.DataFrame(papers[:5])
else:
    display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df

edge_data = [{'source': 'p16INK4a', 'relation': 'activates', 'target': 'senescence', 'strength': 0.85}, {'source': 'SASP', 'relation': 'activates', 'target': 'neuroinflammation', 'strength': 0.8}, {'source': 'senescence', 'relation': 'contributes_to', 'target': 'neurodegeneration', 'strength': 0.8}, {'source': 'p21', 'relation': 'activates', 'target': 'senescence', 'strength': 0.8}, {'source': 'quercetin', 'relation': 'associated_with', 'target': 'senolytic_therapy', 'strength': 0.75}, {'source': 'dasatinib', 'relation': 'associated_with', 'target': 'senolytic_therapy', 'strength': 0.75}, {'source': 'diseases-psp', 'relation': 'investigated_in', 'target': 'SDA-2026-04-16-hyp-e5bf6e0d', 'strength': 0.75}, {'source': 'diseases-corticobasal-degenera', 'relation': 'investigated_in', 'target': 'SDA-2026-04-16-hyp-e5bf6e0d', 'strength': 0.75}, {'source': 'diseases-huntingtons', 'relation': 'investigated_in', 'target': 'SDA-2026-04-16-hyp-e5bf6e0d', 'strength': 0.75}, {'source': 'diseases-vascular-cognitive-im', 'relation': 'investigated_in', 'target': 'SDA-2026-04-16-hyp-e5bf6e0d', 'strength': 0.75}, {'source': 'diseases-prion-disease', 'relation': 'investigated_in', 'target': 'SDA-2026-04-16-hyp-e5bf6e0d', 'strength': 0.75}, {'source': 'diseases-machado-joseph-diseas', 'relation': 'investigated_in', 'target': 'SDA-2026-04-16-hyp-e5bf6e0d', 'strength': 0.75}, {'source': 'genes-rpl30', 'relation': 'investigated_in', 'target': 'SDA-2026-04-16-hyp-e5bf6e0d', 'strength': 0.75}, {'source': 'senolytic_therapy', 'relation': 'inhibits', 'target': 'senescence', 'strength': 0.7}, {'source': 'GFAP', 'relation': 'co_discussed', 'target': 'BMAL1', 'strength': 0.4}, {'source': 'GFAP', 'relation': 'co_discussed', 'target': 'LRP1', 'strength': 0.4}, {'source': 'GFAP', 'relation': 'co_discussed', 'target': 'APOE', 'strength': 0.4}, {'source': 'GFAP', 'relation': 'co_discussed', 'target': 'CLOCK', 'strength': 0.4}, {'source': 'GFAP', 'relation': 'co_discussed', 'target': 'SIRT1', 'strength': 0.4}, {'source': 'BMAL1', 'relation': 'co_discussed', 'target': 'LRP1', 'strength': 0.4}, {'source': 'BMAL1', 'relation': 'co_discussed', 'target': 'APOE', 'strength': 0.4}, {'source': 'BMAL1', 'relation': 'co_discussed', 'target': 'NLRP3', 'strength': 0.4}, {'source': 'LRP1', 'relation': 'co_discussed', 'target': 'CLOCK', 'strength': 0.4}, {'source': 'LRP1', 'relation': 'co_discussed', 'target': 'SIRT1', 'strength': 0.4}, {'source': 'APOE', 'relation': 'co_discussed', 'target': 'CLOCK', 'strength': 0.4}]
if edge_data:
    pd.DataFrame(edge_data).head(25)
else:
    print('No KG edge data available')

Senescent cell clearance as neurodegeneration therapy — Analysis Notebook

Senescent cell clearance as neurodegeneration therapy¶

Research question¶

Approach¶

Debate Summary¶

1. Forge tool provenance¶

2. Target gene annotations¶

3. GO Biological Process enrichment (Enrichr)¶

4. KEGG pathway enrichment¶

5. STRING protein interaction network¶

6. Reactome pathway footprint¶

7. Allen Brain Atlas ISH regional expression¶

8. Hypothesis ranking (7 hypotheses)¶

9. Score dimension heatmap (top 10)¶

10. PubMed evidence per hypothesis¶

Hypothesis 1: Metabolic Reprogramming to Reverse Senescence¶

Metabolic Reprogramming to Reverse Senescence in Neurodegeneration: A Mechanistic Hypothesis¶

The Senescence Conundrum in Neurodegenerative Disease¶

Hypothesis 2: SASP Modulation Rather Than Cell Elimination¶

SASP Modulation Rather Than Cell Elimination¶

Hypothesis Expansion: Selectively Modulating the Senescence-Associated Secretory Phenotype Through NF-κB and Cytokine Pathway Targeting to Reduce Neurotoxic Inflammation While Preserving Neurotrophic Function¶

Background and Rationale¶

Hypothesis 3: Autophagy-Senescence Axis Therapeutic Window¶

Autophagy-Senescence Axis Therapeutic Window: Sequential Targeting of ATG7 and BCL-2 Family Proteins in Neurodegeneration¶

Background and Conceptual Framework¶

Hypothesis 4: Oligodendrocyte Precursor Cell Senescence in White Matter Disease¶

Hypothesis 5: Apoptosis-Senescence Decision Point Intervention¶

Hypothesis 6: APOE4-Driven Astrocyte Senescence as Primary Target¶

Hypothesis 7: Selective Microglial Senescence Targeting via TREM2 Modulation¶

11. Knowledge graph edges (71 total)¶

12. Caveats¶

	gene	name	protein_class	disease_involvement
0	APOE	apolipoprotein E	Cancer-related genes, Candidate cardiovascular...	Alzheimer disease, Amyloidosis
1	BAX	—	—	—
2	BDNF	—	—	—
3	NAMPT	—	—	—