import json, sys, sqlite3
from pathlib import Path
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
matplotlib.rcParams['figure.dpi'] = 110
matplotlib.rcParams['figure.facecolor'] = 'white'

REPO = Path('.').resolve()
sys.path.insert(0, str(REPO))

CACHE_SUB = 'seaad'
CACHE = REPO / 'data' / 'forge_cache' / CACHE_SUB

def load(name):
    p = CACHE / f'{name}.json'
    if p.exists():
        return json.loads(p.read_text())
    return {}

db_path = Path('/home/ubuntu/scidex/scidex.db')
try:
    db = sqlite3.connect(str(db_path))
    prov = pd.read_sql_query('''
        SELECT skill_id, status, COUNT(*) AS n_calls,
               ROUND(AVG(duration_ms),0) AS mean_ms
        FROM tool_calls
        WHERE created_at >= date('now','-30 days')
        GROUP BY skill_id, status
        ORDER BY n_calls DESC
    ''', db)
    db.close()
    prov['tool'] = prov['skill_id'].str.replace('tool_', '', regex=False)
    print(f'{len(prov)} tool-call aggregates (last 30 days):')
    prov[['tool','status','n_calls','mean_ms']].head(20)
except Exception as e:
    print(f'Provenance unavailable: {e}')

77 tool-call aggregates (last 30 days):

ann_rows = []
for g in ['TFEB', 'YWHAG']:
    mg = load(f'mygene_{g}')
    hpa = load(f'hpa_{g}')
    if not mg and not hpa:
        ann_rows.append({'gene': g, 'name': '—', 'protein_class': '—',
                         'disease_involvement': '—'})
        continue
    ann_rows.append({
        'gene': g,
        'name': (mg.get('name') or '')[:55],
        'protein_class': ', '.join((hpa.get('protein_class') or [])[:2])[:55]
                        if isinstance(hpa.get('protein_class'), list)
                        else str(hpa.get('protein_class') or '—')[:55],
        'disease_involvement': ', '.join((hpa.get('disease_involvement') or [])[:2])[:55]
                              if isinstance(hpa.get('disease_involvement'), list)
                              else str(hpa.get('disease_involvement') or '')[:55],
    })
pd.DataFrame(ann_rows)

go_bp = load('enrichr_GO_Biological_Process')
if isinstance(go_bp, list) and go_bp:
    go_df = pd.DataFrame(go_bp[:10])[['term','p_value','odds_ratio','genes']]
    go_df['p_value'] = go_df['p_value'].apply(lambda p: f'{p:.2e}')
    go_df['odds_ratio'] = go_df['odds_ratio'].round(1)
    go_df['term'] = go_df['term'].str[:60]
    go_df['n_hits'] = go_df['genes'].apply(len)
    go_df['genes'] = go_df['genes'].apply(lambda g: ', '.join(g))
    go_df[['term','n_hits','p_value','odds_ratio','genes']]
else:
    print('No GO:BP enrichment data')

# Visualize top GO BP enrichment
go_bp = load('enrichr_GO_Biological_Process')
if isinstance(go_bp, list) and go_bp:
    top = go_bp[:8]
    terms = [t['term'][:45] for t in top][::-1]
    neglogp = [-np.log10(max(t['p_value'], 1e-300)) for t in top][::-1]
    fig, ax = plt.subplots(figsize=(9, 4.5))
    ax.barh(terms, neglogp, color='#4fc3f7')
    ax.set_xlabel('-log10(p-value)')
    ax.set_title('Top GO:BP enrichment (Enrichr)')
    ax.grid(axis='x', alpha=0.3)
    plt.tight_layout(); plt.show()
else:
    print('No GO:BP data to plot')

kegg = load('enrichr_KEGG_Pathways')
if isinstance(kegg, list) and kegg:
    kegg_df = pd.DataFrame(kegg[:10])[['term','p_value','odds_ratio','genes']]
    kegg_df['genes'] = kegg_df['genes'].apply(lambda g: ', '.join(g))
    kegg_df['p_value'] = kegg_df['p_value'].apply(lambda p: f'{p:.2e}')
    kegg_df['odds_ratio'] = kegg_df['odds_ratio'].round(1)
    kegg_df
else:
    print('No KEGG enrichment data')

No KEGG enrichment data

ppi = load('string_network')
if isinstance(ppi, list) and ppi:
    ppi_df = pd.DataFrame(ppi).sort_values('score', ascending=False)
    display_cols = [c for c in ['protein1','protein2','score','escore','tscore'] if c in ppi_df.columns]
    print(f'{len(ppi_df)} STRING edges')
    ppi_df[display_cols].head(20)
else:
    print('No STRING edges returned')

11 STRING edges

# Network figure
ppi = load('string_network')
if isinstance(ppi, list) and ppi:
    import math
    nodes = sorted({p for e in ppi for p in (e['protein1'], e['protein2'])})
    n = len(nodes)
    pos = {n_: (math.cos(2*math.pi*i/n), math.sin(2*math.pi*i/n)) for i, n_ in enumerate(nodes)}
    fig, ax = plt.subplots(figsize=(7, 7))
    for e in ppi:
        x1,y1 = pos[e['protein1']]; x2,y2 = pos[e['protein2']]
        ax.plot([x1,x2],[y1,y2], color='#888', alpha=0.3+0.5*e['score'],
                linewidth=0.5+2*e['score'])
    for name,(x,y) in pos.items():
        ax.scatter([x],[y], s=450, color='#ffd54f', edgecolors='#333', zorder=3)
        ax.annotate(name, (x,y), ha='center', va='center', fontsize=8, fontweight='bold', zorder=4)
    ax.set_aspect('equal'); ax.axis('off')
    ax.set_title(f'STRING PPI network ({len(ppi)} edges)')
    plt.tight_layout(); plt.show()
else:
    print('No STRING data to visualize')

pw_rows = []
for g in ['TFEB', 'YWHAG']:
    pws = load(f'reactome_{g}')
    if isinstance(pws, list):
        pw_rows.append({'gene': g, 'n_pathways': len(pws),
                        'top_pathway': (pws[0]['name'] if pws else '—')[:70]})
    else:
        pw_rows.append({'gene': g, 'n_pathways': 0, 'top_pathway': '—'})
pd.DataFrame(pw_rows).sort_values('n_pathways', ascending=False)

ish_rows = []
for g in ['TFEB', 'YWHAG']:
    ish = load(f'allen_ish_{g}')
    regions = ish.get('regions') or [] if isinstance(ish, dict) else []
    ish_rows.append({
        'gene': g,
        'n_ish_regions': len(regions),
        'top_region': (regions[0].get('structure','') if regions else '—')[:45],
        'top_energy': round(regions[0].get('expression_energy',0), 2) if regions else None,
    })
pd.DataFrame(ish_rows)

hyp_data = [('Temporal TFEB Modulation Therapy', 0.553), ('TFEB-Independent Autophagy Bypass', 0.551), ('Cell-Type Specific TFEB Modulation', 0.547), ('Selective TFEB Cofactor Enhancement', 0.54), ('Lysosomal pH Restoration Upstream of TFEB', 0.518), ('YWHAG-Mediated TFEB Subcellular Targeting', 0.508), ('Mitochondrial-Lysosomal Coupling Enhancer', 0.495)]
titles = [h[0] for h in hyp_data][::-1]
scores = [h[1] for h in hyp_data][::-1]
fig, ax = plt.subplots(figsize=(10, max(8, len(titles)*0.4)))
colors = ['#ef5350' if s >= 0.6 else '#ffa726' if s >= 0.5 else '#66bb6a' for s in scores]
ax.barh(range(len(titles)), scores, color=colors)
ax.set_yticks(range(len(titles))); ax.set_yticklabels(titles, fontsize=7)
ax.set_xlabel('Composite Score'); ax.set_title('Does TFEB dysfunction cause neurodegeneration or represent a compensatory response to primary pathology?')
ax.grid(axis='x', alpha=0.3)
plt.tight_layout(); plt.show()

labels = ['Temporal TFEB Modulation Therapy', 'TFEB-Independent Autophagy Bypass', 'Cell-Type Specific TFEB Modulation', 'Selective TFEB Cofactor Enhancement', 'Lysosomal pH Restoration Upstream of TFE', 'YWHAG-Mediated TFEB Subcellular Targetin', 'Mitochondrial-Lysosomal Coupling Enhance']
matrix = np.array([[0.8, 0.2, 0.7, 0.4, 0, 0.4, 0.3, 0.2, 0.3], [0.6, 0.9, 0.8, 0.8, 0, 0.8, 0.8, 0.9, 0.8], [0.9, 0.6, 0.8, 0.8, 0, 0.6, 0.7, 0.5, 0.7], [0.8, 0.3, 0.7, 0.6, 0, 0.5, 0.4, 0.2, 0.5], [0.7, 0.6, 0.7, 0.8, 0, 0.7, 0.7, 0.5, 0.5], [0.8, 0.3, 0.6, 0.5, 0, 0.4, 0.3, 0.3, 0.4], [0.9, 0.3, 0.8, 0.7, 0, 0.4, 0.5, 0.2, 0.4]])
dims = ['novelty_score', 'feasibility_score', 'impact_score', 'mechanistic_plausibility_score', 'clinical_relevance_score', 'data_availability_score', 'reproducibility_score', 'druggability_score', 'safety_profile_score']
if matrix.size:
    fig, ax = plt.subplots(figsize=(10, 5))
    im = ax.imshow(matrix, cmap='RdYlGn', aspect='auto', vmin=0, vmax=1)
    ax.set_xticks(range(len(dims)))
    ax.set_xticklabels([d.replace('_score','').replace('_',' ').title() for d in dims],
                       rotation=45, ha='right', fontsize=8)
    ax.set_yticks(range(len(labels))); ax.set_yticklabels(labels, fontsize=7)
    ax.set_title('Score dimensions — top hypotheses')
    plt.colorbar(im, ax=ax, shrink=0.8)
    plt.tight_layout(); plt.show()
else:
    print('No score data available')

hid = 'h-1775578a'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
    lit = pd.DataFrame(papers)
    cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
    if cols:
        lit = lit[cols]
        lit['title'] = lit['title'].str[:80]
        if 'journal' in lit.columns:
            lit['journal'] = lit['journal'].str[:30]
        lit.sort_values('year', ascending=False, inplace=True)
        display_df = lit
    else:
        display_df = pd.DataFrame(papers[:5])
else:
    display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df

hid = 'h-1e4bba56'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
    lit = pd.DataFrame(papers)
    cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
    if cols:
        lit = lit[cols]
        lit['title'] = lit['title'].str[:80]
        if 'journal' in lit.columns:
            lit['journal'] = lit['journal'].str[:30]
        lit.sort_values('year', ascending=False, inplace=True)
        display_df = lit
    else:
        display_df = pd.DataFrame(papers[:5])
else:
    display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df

hid = 'h-b9acf0c9'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
    lit = pd.DataFrame(papers)
    cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
    if cols:
        lit = lit[cols]
        lit['title'] = lit['title'].str[:80]
        if 'journal' in lit.columns:
            lit['journal'] = lit['journal'].str[:30]
        lit.sort_values('year', ascending=False, inplace=True)
        display_df = lit
    else:
        display_df = pd.DataFrame(papers[:5])
else:
    display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df

hid = 'h-6f30a803'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
    lit = pd.DataFrame(papers)
    cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
    if cols:
        lit = lit[cols]
        lit['title'] = lit['title'].str[:80]
        if 'journal' in lit.columns:
            lit['journal'] = lit['journal'].str[:30]
        lit.sort_values('year', ascending=False, inplace=True)
        display_df = lit
    else:
        display_df = pd.DataFrame(papers[:5])
else:
    display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df

hid = 'h-6b394be1'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
    lit = pd.DataFrame(papers)
    cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
    if cols:
        lit = lit[cols]
        lit['title'] = lit['title'].str[:80]
        if 'journal' in lit.columns:
            lit['journal'] = lit['journal'].str[:30]
        lit.sort_values('year', ascending=False, inplace=True)
        display_df = lit
    else:
        display_df = pd.DataFrame(papers[:5])
else:
    display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df

hid = 'h-3d2aa5a6'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
    lit = pd.DataFrame(papers)
    cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
    if cols:
        lit = lit[cols]
        lit['title'] = lit['title'].str[:80]
        if 'journal' in lit.columns:
            lit['journal'] = lit['journal'].str[:30]
        lit.sort_values('year', ascending=False, inplace=True)
        display_df = lit
    else:
        display_df = pd.DataFrame(papers[:5])
else:
    display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df

hid = 'h-e3a48208'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
    lit = pd.DataFrame(papers)
    cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
    if cols:
        lit = lit[cols]
        lit['title'] = lit['title'].str[:80]
        if 'journal' in lit.columns:
            lit['journal'] = lit['journal'].str[:30]
        lit.sort_values('year', ascending=False, inplace=True)
        display_df = lit
    else:
        display_df = pd.DataFrame(papers[:5])
else:
    display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df

edge_data = [{'source': 'TFEB overexpression', 'relation': 'causes (early enhancement', 'target': 'neurodegeneration prevention', 'strength': 0.8}, {'source': 'mitochondrial dysfunction', 'relation': 'causes (prevents energy n', 'target': 'TFEB upregulation failure', 'strength': 0.8}, {'source': 'LAMTOR complex function', 'relation': 'causes (promotes contact ', 'target': 'mitochondrial-lysosomal coupli', 'strength': 0.8}, {'source': 'lysosomal dysfunction', 'relation': 'causes (dysfunction prece', 'target': 'TFEB activation', 'strength': 0.75}, {'source': 'lysosomal alkalization', 'relation': 'causes (prevents enzyme f', 'target': 'TFEB compensation failure', 'strength': 0.75}, {'source': 'V-ATPase enhancement', 'relation': 'causes (enhances proton p', 'target': 'lysosomal pH restoration', 'strength': 0.75}, {'source': 'ischemia', 'relation': 'causes (ischemic conditio', 'target': 'autophagy upregulation', 'strength': 0.7}, {'source': 'excessive autophagy', 'relation': 'causes (causes cell death', 'target': 'neuronal death', 'strength': 0.7}, {'source': 'autophagy upregulation', 'relation': 'causes (increased autopha', 'target': 'lysosomal storage dysfunction', 'strength': 0.7}, {'source': 'trehalose', 'relation': 'causes (induces autophagy', 'target': 'autophagy induction', 'strength': 0.7}, {'source': 'h-1e4bba56', 'relation': 'targets', 'target': 'ULK1', 'strength': 0.7}, {'source': 'h-1e4bba56', 'relation': 'implicated_in', 'target': 'neurodegeneration', 'strength': 0.7}, {'source': 'h-b9acf0c9', 'relation': 'targets', 'target': 'TFEB', 'strength': 0.7}, {'source': 'h-b9acf0c9', 'relation': 'implicated_in', 'target': 'neurodegeneration', 'strength': 0.7}, {'source': 'YWHAG-TFEB interactions', 'relation': 'causes (14-3-3 protein bi', 'target': 'TFEB subcellular targeting', 'strength': 0.65}, {'source': 'celastrol', 'relation': 'causes (enhances TFEB act', 'target': 'TFEB-mediated tau clearance', 'strength': 0.6}, {'source': 'h-6b394be1', 'relation': 'targets', 'target': 'ATP6V1A', 'strength': 0.6}, {'source': 'h-6b394be1', 'relation': 'implicated_in', 'target': 'neurodegeneration', 'strength': 0.6}, {'source': 'TFEB', 'relation': 'regulated_by', 'target': 'YWHAG', 'strength': 0.5}, {'source': 'ULK1', 'relation': 'initiates', 'target': 'autophagy', 'strength': 0.5}, {'source': 'trehalose', 'relation': 'induces', 'target': 'autophagy', 'strength': 0.5}, {'source': 'LAMTOR1', 'relation': 'associated_with', 'target': 'neurodegeneration', 'strength': 0.5}, {'source': 'YWHAG', 'relation': 'associated_with', 'target': 'neurodegeneration', 'strength': 0.5}, {'source': 'LAMTOR1', 'relation': 'regulates', 'target': 'mTOR', 'strength': 0.5}, {'source': 'ATP6V1A', 'relation': 'enables', 'target': 'lysosomal_acidification', 'strength': 0.5}]
if edge_data:
    pd.DataFrame(edge_data).head(25)
else:
    print('No KG edge data available')

Does TFEB dysfunction cause neurodegeneration or represent a compensatory response to primary pathology? — Analysis Notebook

Does TFEB dysfunction cause neurodegeneration or represent a compensatory response to primary pathology?¶

Research question¶

Approach¶

Debate Summary¶

1. Forge tool provenance¶

2. Target gene annotations¶

3. GO Biological Process enrichment (Enrichr)¶

4. KEGG pathway enrichment¶

5. STRING protein interaction network¶

6. Reactome pathway footprint¶

7. Allen Brain Atlas ISH regional expression¶

8. Hypothesis ranking (7 hypotheses)¶

9. Score dimension heatmap (top 10)¶

10. PubMed evidence per hypothesis¶

Hypothesis 1: Temporal TFEB Modulation Therapy¶

Temporal TFEB Modulation Therapy¶

Mechanistic Hypothesis Overview¶

Hypothesis 2: TFEB-Independent Autophagy Bypass¶

TFEB-Independent Autophagy Bypass¶

Mechanistic Hypothesis Overview¶

Hypothesis 3: Cell-Type Specific TFEB Modulation¶

Cell-Type Specific TFEB Modulation¶

Mechanistic Hypothesis Overview¶

Hypothesis 4: Selective TFEB Cofactor Enhancement¶

Selective TFEB Cofactor Enhancement¶

Mechanistic Hypothesis Overview¶

Hypothesis 5: Lysosomal pH Restoration Upstream of TFEB¶

Lysosomal pH Restoration Upstream of TFEB¶

Mechanistic Hypothesis Overview¶

Hypothesis 6: YWHAG-Mediated TFEB Subcellular Targeting¶

YWHAG-Mediated TFEB Subcellular Targeting¶

Mechanistic Hypothesis Overview¶

Hypothesis 7: Mitochondrial-Lysosomal Coupling Enhancer¶

Mitochondrial-Lysosomal Coupling Enhancer¶

Mechanistic Hypothesis Overview¶

11. Knowledge graph edges (47 total)¶

12. Caveats¶