Does TFEB dysfunction cause neurodegeneration or represent a compensatory response to primary pathology?¶
Notebook ID: nb-SDA-2026-04-03-gap-debate-20260403-222617-8eb5bdbc · Analysis: SDA-2026-04-03-gap-debate-20260403-222617-8eb5bdbc · Generated: 2026-04-10
Research question¶
The debate highlighted TFEB's role in mitochondrial-lysosomal coupling but couldn't resolve causation vs correlation. This distinction is critical for determining whether TFEB should be therapeutically enhanced or whether upstream targets are needed.
Source: Debate session sess_SDA-2026-04-02-gap-v2-5d0e3052 (Analysis: SDA-2026-04-02-gap-v2-5d0e3052)
Approach¶
This notebook is generated programmatically from real Forge tool calls and SciDEX debate data. Code cells load cached evidence bundles from data/forge_cache/seaad/*.json and query live data from scidex.db. Re-run python3 scripts/regenerate_notebooks.py --analysis SDA-2026-04-03-gap-debate-20260403-222617-8eb5bdbc --force to refresh.
7 hypotheses were generated and debated. The knowledge graph has 47 edges.
Debate Summary¶
Quality score: 0.5 · Rounds: 4 · Personas: Theorist, Skeptic, Domain_Expert, Synthesizer
1. Forge tool provenance¶
import json, sys, sqlite3
from pathlib import Path
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
matplotlib.rcParams['figure.dpi'] = 110
matplotlib.rcParams['figure.facecolor'] = 'white'
REPO = Path('.').resolve()
sys.path.insert(0, str(REPO))
CACHE_SUB = 'seaad'
CACHE = REPO / 'data' / 'forge_cache' / CACHE_SUB
def load(name):
p = CACHE / f'{name}.json'
if p.exists():
return json.loads(p.read_text())
return {}
db_path = Path('/home/ubuntu/scidex/scidex.db')
try:
db = sqlite3.connect(str(db_path))
prov = pd.read_sql_query('''
SELECT skill_id, status, COUNT(*) AS n_calls,
ROUND(AVG(duration_ms),0) AS mean_ms
FROM tool_calls
WHERE created_at >= date('now','-30 days')
GROUP BY skill_id, status
ORDER BY n_calls DESC
''', db)
db.close()
prov['tool'] = prov['skill_id'].str.replace('tool_', '', regex=False)
print(f'{len(prov)} tool-call aggregates (last 30 days):')
prov[['tool','status','n_calls','mean_ms']].head(20)
except Exception as e:
print(f'Provenance unavailable: {e}')
77 tool-call aggregates (last 30 days):
2. Target gene annotations¶
ann_rows = []
for g in ['TFEB', 'YWHAG']:
mg = load(f'mygene_{g}')
hpa = load(f'hpa_{g}')
if not mg and not hpa:
ann_rows.append({'gene': g, 'name': '—', 'protein_class': '—',
'disease_involvement': '—'})
continue
ann_rows.append({
'gene': g,
'name': (mg.get('name') or '')[:55],
'protein_class': ', '.join((hpa.get('protein_class') or [])[:2])[:55]
if isinstance(hpa.get('protein_class'), list)
else str(hpa.get('protein_class') or '—')[:55],
'disease_involvement': ', '.join((hpa.get('disease_involvement') or [])[:2])[:55]
if isinstance(hpa.get('disease_involvement'), list)
else str(hpa.get('disease_involvement') or '')[:55],
})
pd.DataFrame(ann_rows)
| gene | name | protein_class | disease_involvement | |
|---|---|---|---|---|
| 0 | TFEB | — | — | — |
| 1 | YWHAG | — | — | — |
3. GO Biological Process enrichment (Enrichr)¶
go_bp = load('enrichr_GO_Biological_Process')
if isinstance(go_bp, list) and go_bp:
go_df = pd.DataFrame(go_bp[:10])[['term','p_value','odds_ratio','genes']]
go_df['p_value'] = go_df['p_value'].apply(lambda p: f'{p:.2e}')
go_df['odds_ratio'] = go_df['odds_ratio'].round(1)
go_df['term'] = go_df['term'].str[:60]
go_df['n_hits'] = go_df['genes'].apply(len)
go_df['genes'] = go_df['genes'].apply(lambda g: ', '.join(g))
go_df[['term','n_hits','p_value','odds_ratio','genes']]
else:
print('No GO:BP enrichment data')
# Visualize top GO BP enrichment
go_bp = load('enrichr_GO_Biological_Process')
if isinstance(go_bp, list) and go_bp:
top = go_bp[:8]
terms = [t['term'][:45] for t in top][::-1]
neglogp = [-np.log10(max(t['p_value'], 1e-300)) for t in top][::-1]
fig, ax = plt.subplots(figsize=(9, 4.5))
ax.barh(terms, neglogp, color='#4fc3f7')
ax.set_xlabel('-log10(p-value)')
ax.set_title('Top GO:BP enrichment (Enrichr)')
ax.grid(axis='x', alpha=0.3)
plt.tight_layout(); plt.show()
else:
print('No GO:BP data to plot')
4. KEGG pathway enrichment¶
kegg = load('enrichr_KEGG_Pathways')
if isinstance(kegg, list) and kegg:
kegg_df = pd.DataFrame(kegg[:10])[['term','p_value','odds_ratio','genes']]
kegg_df['genes'] = kegg_df['genes'].apply(lambda g: ', '.join(g))
kegg_df['p_value'] = kegg_df['p_value'].apply(lambda p: f'{p:.2e}')
kegg_df['odds_ratio'] = kegg_df['odds_ratio'].round(1)
kegg_df
else:
print('No KEGG enrichment data')
No KEGG enrichment data
5. STRING protein interaction network¶
ppi = load('string_network')
if isinstance(ppi, list) and ppi:
ppi_df = pd.DataFrame(ppi).sort_values('score', ascending=False)
display_cols = [c for c in ['protein1','protein2','score','escore','tscore'] if c in ppi_df.columns]
print(f'{len(ppi_df)} STRING edges')
ppi_df[display_cols].head(20)
else:
print('No STRING edges returned')
11 STRING edges
# Network figure
ppi = load('string_network')
if isinstance(ppi, list) and ppi:
import math
nodes = sorted({p for e in ppi for p in (e['protein1'], e['protein2'])})
n = len(nodes)
pos = {n_: (math.cos(2*math.pi*i/n), math.sin(2*math.pi*i/n)) for i, n_ in enumerate(nodes)}
fig, ax = plt.subplots(figsize=(7, 7))
for e in ppi:
x1,y1 = pos[e['protein1']]; x2,y2 = pos[e['protein2']]
ax.plot([x1,x2],[y1,y2], color='#888', alpha=0.3+0.5*e['score'],
linewidth=0.5+2*e['score'])
for name,(x,y) in pos.items():
ax.scatter([x],[y], s=450, color='#ffd54f', edgecolors='#333', zorder=3)
ax.annotate(name, (x,y), ha='center', va='center', fontsize=8, fontweight='bold', zorder=4)
ax.set_aspect('equal'); ax.axis('off')
ax.set_title(f'STRING PPI network ({len(ppi)} edges)')
plt.tight_layout(); plt.show()
else:
print('No STRING data to visualize')
6. Reactome pathway footprint¶
pw_rows = []
for g in ['TFEB', 'YWHAG']:
pws = load(f'reactome_{g}')
if isinstance(pws, list):
pw_rows.append({'gene': g, 'n_pathways': len(pws),
'top_pathway': (pws[0]['name'] if pws else '—')[:70]})
else:
pw_rows.append({'gene': g, 'n_pathways': 0, 'top_pathway': '—'})
pd.DataFrame(pw_rows).sort_values('n_pathways', ascending=False)
| gene | n_pathways | top_pathway | |
|---|---|---|---|
| 0 | TFEB | 0 | — |
| 1 | YWHAG | 0 | — |
7. Allen Brain Atlas ISH regional expression¶
ish_rows = []
for g in ['TFEB', 'YWHAG']:
ish = load(f'allen_ish_{g}')
regions = ish.get('regions') or [] if isinstance(ish, dict) else []
ish_rows.append({
'gene': g,
'n_ish_regions': len(regions),
'top_region': (regions[0].get('structure','') if regions else '—')[:45],
'top_energy': round(regions[0].get('expression_energy',0), 2) if regions else None,
})
pd.DataFrame(ish_rows)
| gene | n_ish_regions | top_region | top_energy | |
|---|---|---|---|---|
| 0 | TFEB | 0 | — | — |
| 1 | YWHAG | 0 | — | — |
8. Hypothesis ranking (7 hypotheses)¶
hyp_data = [('Temporal TFEB Modulation Therapy', 0.553), ('TFEB-Independent Autophagy Bypass', 0.551), ('Cell-Type Specific TFEB Modulation', 0.547), ('Selective TFEB Cofactor Enhancement', 0.54), ('Lysosomal pH Restoration Upstream of TFEB', 0.518), ('YWHAG-Mediated TFEB Subcellular Targeting', 0.508), ('Mitochondrial-Lysosomal Coupling Enhancer', 0.495)]
titles = [h[0] for h in hyp_data][::-1]
scores = [h[1] for h in hyp_data][::-1]
fig, ax = plt.subplots(figsize=(10, max(8, len(titles)*0.4)))
colors = ['#ef5350' if s >= 0.6 else '#ffa726' if s >= 0.5 else '#66bb6a' for s in scores]
ax.barh(range(len(titles)), scores, color=colors)
ax.set_yticks(range(len(titles))); ax.set_yticklabels(titles, fontsize=7)
ax.set_xlabel('Composite Score'); ax.set_title('Does TFEB dysfunction cause neurodegeneration or represent a compensatory response to primary pathology?')
ax.grid(axis='x', alpha=0.3)
plt.tight_layout(); plt.show()
9. Score dimension heatmap (top 10)¶
labels = ['Temporal TFEB Modulation Therapy', 'TFEB-Independent Autophagy Bypass', 'Cell-Type Specific TFEB Modulation', 'Selective TFEB Cofactor Enhancement', 'Lysosomal pH Restoration Upstream of TFE', 'YWHAG-Mediated TFEB Subcellular Targetin', 'Mitochondrial-Lysosomal Coupling Enhance']
matrix = np.array([[0.8, 0.2, 0.7, 0.4, 0, 0.4, 0.3, 0.2, 0.3], [0.6, 0.9, 0.8, 0.8, 0, 0.8, 0.8, 0.9, 0.8], [0.9, 0.6, 0.8, 0.8, 0, 0.6, 0.7, 0.5, 0.7], [0.8, 0.3, 0.7, 0.6, 0, 0.5, 0.4, 0.2, 0.5], [0.7, 0.6, 0.7, 0.8, 0, 0.7, 0.7, 0.5, 0.5], [0.8, 0.3, 0.6, 0.5, 0, 0.4, 0.3, 0.3, 0.4], [0.9, 0.3, 0.8, 0.7, 0, 0.4, 0.5, 0.2, 0.4]])
dims = ['novelty_score', 'feasibility_score', 'impact_score', 'mechanistic_plausibility_score', 'clinical_relevance_score', 'data_availability_score', 'reproducibility_score', 'druggability_score', 'safety_profile_score']
if matrix.size:
fig, ax = plt.subplots(figsize=(10, 5))
im = ax.imshow(matrix, cmap='RdYlGn', aspect='auto', vmin=0, vmax=1)
ax.set_xticks(range(len(dims)))
ax.set_xticklabels([d.replace('_score','').replace('_',' ').title() for d in dims],
rotation=45, ha='right', fontsize=8)
ax.set_yticks(range(len(labels))); ax.set_yticklabels(labels, fontsize=7)
ax.set_title('Score dimensions — top hypotheses')
plt.colorbar(im, ax=ax, shrink=0.8)
plt.tight_layout(); plt.show()
else:
print('No score data available')
10. PubMed evidence per hypothesis¶
Hypothesis 1: Temporal TFEB Modulation Therapy¶
Target genes: TFEB · Composite score: 0.553
Temporal TFEB Modulation Therapy¶
Mechanistic Hypothesis Overview¶
This hypothesis proposes a disease-modifying strategy centered on Temporal TFEB Modulation Therapy as a mechanistic intervention point in neurodegeneration. The core claim is that the biological process represented by temporal tfeb modulation therapy is not a passive disease byproduct, but a functional bottleneck that shapes how quickly neurons lose homeostasis under chronic stress. In this framing, pathology progresse
hid = 'h-1775578a'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
lit = pd.DataFrame(papers)
cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
if cols:
lit = lit[cols]
lit['title'] = lit['title'].str[:80]
if 'journal' in lit.columns:
lit['journal'] = lit['journal'].str[:30]
lit.sort_values('year', ascending=False, inplace=True)
display_df = lit
else:
display_df = pd.DataFrame(papers[:5])
else:
display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df
| note | |
|---|---|
| 0 | no PubMed results |
Hypothesis 2: TFEB-Independent Autophagy Bypass¶
Target genes: ULK1 · Composite score: 0.551
TFEB-Independent Autophagy Bypass¶
Mechanistic Hypothesis Overview¶
This hypothesis proposes a disease-modifying strategy centered on TFEB-Independent Autophagy Bypass as a mechanistic intervention point in neurodegeneration. The core claim is that the biological process represented by tfeb-independent autophagy bypass is not a passive disease byproduct, but a functional bottleneck that shapes how quickly neurons lose homeostasis under chronic stress. In this framing, pathology progre
hid = 'h-1e4bba56'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
lit = pd.DataFrame(papers)
cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
if cols:
lit = lit[cols]
lit['title'] = lit['title'].str[:80]
if 'journal' in lit.columns:
lit['journal'] = lit['journal'].str[:30]
lit.sort_values('year', ascending=False, inplace=True)
display_df = lit
else:
display_df = pd.DataFrame(papers[:5])
else:
display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df
| note | |
|---|---|
| 0 | no PubMed results |
Hypothesis 3: Cell-Type Specific TFEB Modulation¶
Target genes: TFEB · Composite score: 0.547
Cell-Type Specific TFEB Modulation¶
Mechanistic Hypothesis Overview¶
The "Cell-Type Specific TFEB Modulation" hypothesis proposes that the transcription factor EB (TFEB) — the master regulator of autophagy and lysosomal biogenesis — is a high-value therapeutic target for Alzheimer's disease, and that cell-type specific TFEB activation can simultaneously enhance Aβ clearance, tau turnover, and mitochondrial quality control without the toxicity associated with non-selective TFEB activation.
hid = 'h-b9acf0c9'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
lit = pd.DataFrame(papers)
cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
if cols:
lit = lit[cols]
lit['title'] = lit['title'].str[:80]
if 'journal' in lit.columns:
lit['journal'] = lit['journal'].str[:30]
lit.sort_values('year', ascending=False, inplace=True)
display_df = lit
else:
display_df = pd.DataFrame(papers[:5])
else:
display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df
| note | |
|---|---|
| 0 | no PubMed results |
Hypothesis 4: Selective TFEB Cofactor Enhancement¶
Target genes: TFE3 · Composite score: 0.54
Selective TFEB Cofactor Enhancement¶
Mechanistic Hypothesis Overview¶
This hypothesis proposes a disease-modifying strategy centered on Selective TFEB Cofactor Enhancement as a mechanistic intervention point in neurodegeneration. The core claim is that the biological process represented by selective tfeb cofactor enhancement is not a passive disease byproduct, but a functional bottleneck that shapes how quickly neurons lose homeostasis under chronic stress. In this framing, pathology
hid = 'h-6f30a803'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
lit = pd.DataFrame(papers)
cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
if cols:
lit = lit[cols]
lit['title'] = lit['title'].str[:80]
if 'journal' in lit.columns:
lit['journal'] = lit['journal'].str[:30]
lit.sort_values('year', ascending=False, inplace=True)
display_df = lit
else:
display_df = pd.DataFrame(papers[:5])
else:
display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df
| note | |
|---|---|
| 0 | no PubMed results |
Hypothesis 5: Lysosomal pH Restoration Upstream of TFEB¶
Target genes: ATP6V1A · Composite score: 0.518
Lysosomal pH Restoration Upstream of TFEB¶
Mechanistic Hypothesis Overview¶
This hypothesis proposes a disease-modifying strategy centered on Lysosomal pH Restoration Upstream of TFEB as a mechanistic intervention point in neurodegeneration. The core claim is that the biological process represented by lysosomal ph restoration upstream of tfeb is not a passive disease byproduct, but a functional bottleneck that shapes how quickly neurons lose homeostasis under chronic stress. In this f
hid = 'h-6b394be1'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
lit = pd.DataFrame(papers)
cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
if cols:
lit = lit[cols]
lit['title'] = lit['title'].str[:80]
if 'journal' in lit.columns:
lit['journal'] = lit['journal'].str[:30]
lit.sort_values('year', ascending=False, inplace=True)
display_df = lit
else:
display_df = pd.DataFrame(papers[:5])
else:
display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df
| note | |
|---|---|
| 0 | no PubMed results |
Hypothesis 6: YWHAG-Mediated TFEB Subcellular Targeting¶
Target genes: YWHAG · Composite score: 0.508
YWHAG-Mediated TFEB Subcellular Targeting¶
Mechanistic Hypothesis Overview¶
This hypothesis proposes a disease-modifying strategy centered on YWHAG-Mediated TFEB Subcellular Targeting as a mechanistic intervention point in neurodegeneration. The core claim is that the biological process represented by ywhag-mediated tfeb subcellular targeting is not a passive disease byproduct, but a functional bottleneck that shapes how quickly neurons lose homeostasis under chronic stress. In this f
hid = 'h-3d2aa5a6'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
lit = pd.DataFrame(papers)
cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
if cols:
lit = lit[cols]
lit['title'] = lit['title'].str[:80]
if 'journal' in lit.columns:
lit['journal'] = lit['journal'].str[:30]
lit.sort_values('year', ascending=False, inplace=True)
display_df = lit
else:
display_df = pd.DataFrame(papers[:5])
else:
display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df
| note | |
|---|---|
| 0 | no PubMed results |
Hypothesis 7: Mitochondrial-Lysosomal Coupling Enhancer¶
Target genes: LAMTOR1 · Composite score: 0.495
Mitochondrial-Lysosomal Coupling Enhancer¶
Mechanistic Hypothesis Overview¶
This hypothesis proposes a disease-modifying strategy centered on Mitochondrial-Lysosomal Coupling Enhancer as a mechanistic intervention point in neurodegeneration. The core claim is that the biological process represented by mitochondrial-lysosomal coupling enhancer is not a passive disease byproduct, but a functional bottleneck that shapes how quickly neurons lose homeostasis under chronic stress. In this f
hid = 'h-e3a48208'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
lit = pd.DataFrame(papers)
cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
if cols:
lit = lit[cols]
lit['title'] = lit['title'].str[:80]
if 'journal' in lit.columns:
lit['journal'] = lit['journal'].str[:30]
lit.sort_values('year', ascending=False, inplace=True)
display_df = lit
else:
display_df = pd.DataFrame(papers[:5])
else:
display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df
| note | |
|---|---|
| 0 | no PubMed results |
11. Knowledge graph edges (47 total)¶
edge_data = [{'source': 'TFEB overexpression', 'relation': 'causes (early enhancement', 'target': 'neurodegeneration prevention', 'strength': 0.8}, {'source': 'mitochondrial dysfunction', 'relation': 'causes (prevents energy n', 'target': 'TFEB upregulation failure', 'strength': 0.8}, {'source': 'LAMTOR complex function', 'relation': 'causes (promotes contact ', 'target': 'mitochondrial-lysosomal coupli', 'strength': 0.8}, {'source': 'lysosomal dysfunction', 'relation': 'causes (dysfunction prece', 'target': 'TFEB activation', 'strength': 0.75}, {'source': 'lysosomal alkalization', 'relation': 'causes (prevents enzyme f', 'target': 'TFEB compensation failure', 'strength': 0.75}, {'source': 'V-ATPase enhancement', 'relation': 'causes (enhances proton p', 'target': 'lysosomal pH restoration', 'strength': 0.75}, {'source': 'ischemia', 'relation': 'causes (ischemic conditio', 'target': 'autophagy upregulation', 'strength': 0.7}, {'source': 'excessive autophagy', 'relation': 'causes (causes cell death', 'target': 'neuronal death', 'strength': 0.7}, {'source': 'autophagy upregulation', 'relation': 'causes (increased autopha', 'target': 'lysosomal storage dysfunction', 'strength': 0.7}, {'source': 'trehalose', 'relation': 'causes (induces autophagy', 'target': 'autophagy induction', 'strength': 0.7}, {'source': 'h-1e4bba56', 'relation': 'targets', 'target': 'ULK1', 'strength': 0.7}, {'source': 'h-1e4bba56', 'relation': 'implicated_in', 'target': 'neurodegeneration', 'strength': 0.7}, {'source': 'h-b9acf0c9', 'relation': 'targets', 'target': 'TFEB', 'strength': 0.7}, {'source': 'h-b9acf0c9', 'relation': 'implicated_in', 'target': 'neurodegeneration', 'strength': 0.7}, {'source': 'YWHAG-TFEB interactions', 'relation': 'causes (14-3-3 protein bi', 'target': 'TFEB subcellular targeting', 'strength': 0.65}, {'source': 'celastrol', 'relation': 'causes (enhances TFEB act', 'target': 'TFEB-mediated tau clearance', 'strength': 0.6}, {'source': 'h-6b394be1', 'relation': 'targets', 'target': 'ATP6V1A', 'strength': 0.6}, {'source': 'h-6b394be1', 'relation': 'implicated_in', 'target': 'neurodegeneration', 'strength': 0.6}, {'source': 'TFEB', 'relation': 'regulated_by', 'target': 'YWHAG', 'strength': 0.5}, {'source': 'ULK1', 'relation': 'initiates', 'target': 'autophagy', 'strength': 0.5}, {'source': 'trehalose', 'relation': 'induces', 'target': 'autophagy', 'strength': 0.5}, {'source': 'LAMTOR1', 'relation': 'associated_with', 'target': 'neurodegeneration', 'strength': 0.5}, {'source': 'YWHAG', 'relation': 'associated_with', 'target': 'neurodegeneration', 'strength': 0.5}, {'source': 'LAMTOR1', 'relation': 'regulates', 'target': 'mTOR', 'strength': 0.5}, {'source': 'ATP6V1A', 'relation': 'enables', 'target': 'lysosomal_acidification', 'strength': 0.5}]
if edge_data:
pd.DataFrame(edge_data).head(25)
else:
print('No KG edge data available')
12. Caveats¶
This notebook uses real Forge tool calls cached from live APIs, but:
- Enrichment is against curated gene-set libraries, not genome-wide screens
- STRING/Reactome/HPA/MyGene reflect curated knowledge
- PubMed literature is search-relevance ranked, not systematic review
The cached evidence bundle is the minimum viable real-data analysis for this topic.