Metabolic reprogramming in neurodegenerative disease¶
Notebook ID: nb-SDA-2026-04-02-gap-v2-5d0e3052 · Analysis: SDA-2026-04-02-gap-v2-5d0e3052 · Generated: 2026-04-10
Research question¶
How does metabolic reprogramming (glucose metabolism shifts, brain insulin resistance, ketone body utilization) affect neuronal survival in neurodegenerative diseases? What metabolic interventions (ketogenic diet, GLP-1 agonists, metformin) show therapeutic promise?
Approach¶
This notebook is generated programmatically from real Forge tool calls and SciDEX debate data. Code cells load cached evidence bundles from data/forge_cache/seaad/*.json and query live data from scidex.db. Re-run python3 scripts/regenerate_notebooks.py --analysis SDA-2026-04-02-gap-v2-5d0e3052 --force to refresh.
3 hypotheses were generated and debated. The knowledge graph has 28 edges.
Debate Summary¶
Quality score: 0.58 · Rounds: 4 · Personas: Theorist, Skeptic, Domain_Expert, Synthesizer
1. Forge tool provenance¶
import json, sys, sqlite3
from pathlib import Path
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
matplotlib.rcParams['figure.dpi'] = 110
matplotlib.rcParams['figure.facecolor'] = 'white'
REPO = Path('.').resolve()
sys.path.insert(0, str(REPO))
CACHE_SUB = 'seaad'
CACHE = REPO / 'data' / 'forge_cache' / CACHE_SUB
def load(name):
p = CACHE / f'{name}.json'
if p.exists():
return json.loads(p.read_text())
return {}
db_path = Path('/home/ubuntu/scidex/scidex.db')
try:
db = sqlite3.connect(str(db_path))
prov = pd.read_sql_query('''
SELECT skill_id, status, COUNT(*) AS n_calls,
ROUND(AVG(duration_ms),0) AS mean_ms
FROM tool_calls
WHERE created_at >= date('now','-30 days')
GROUP BY skill_id, status
ORDER BY n_calls DESC
''', db)
db.close()
prov['tool'] = prov['skill_id'].str.replace('tool_', '', regex=False)
print(f'{len(prov)} tool-call aggregates (last 30 days):')
prov[['tool','status','n_calls','mean_ms']].head(20)
except Exception as e:
print(f'Provenance unavailable: {e}')
Provenance unavailable: Execution failed on sql '
SELECT skill_id, status, COUNT(*) AS n_calls,
ROUND(AVG(duration_ms),0) AS mean_ms
FROM tool_calls
WHERE created_at >= date('now','-30 days')
GROUP BY skill_id, status
ORDER BY n_calls DESC
': database disk image is malformed
2. Target gene annotations¶
ann_rows = []
for g in ['TFEB']:
mg = load(f'mygene_{g}')
hpa = load(f'hpa_{g}')
if not mg and not hpa:
ann_rows.append({'gene': g, 'name': '—', 'protein_class': '—',
'disease_involvement': '—'})
continue
ann_rows.append({
'gene': g,
'name': (mg.get('name') or '')[:55],
'protein_class': ', '.join((hpa.get('protein_class') or [])[:2])[:55]
if isinstance(hpa.get('protein_class'), list)
else str(hpa.get('protein_class') or '—')[:55],
'disease_involvement': ', '.join((hpa.get('disease_involvement') or [])[:2])[:55]
if isinstance(hpa.get('disease_involvement'), list)
else str(hpa.get('disease_involvement') or '')[:55],
})
pd.DataFrame(ann_rows)
| gene | name | protein_class | disease_involvement | |
|---|---|---|---|---|
| 0 | TFEB | — | — | — |
3. GO Biological Process enrichment (Enrichr)¶
go_bp = load('enrichr_GO_Biological_Process')
if isinstance(go_bp, list) and go_bp:
go_df = pd.DataFrame(go_bp[:10])[['term','p_value','odds_ratio','genes']]
go_df['p_value'] = go_df['p_value'].apply(lambda p: f'{p:.2e}')
go_df['odds_ratio'] = go_df['odds_ratio'].round(1)
go_df['term'] = go_df['term'].str[:60]
go_df['n_hits'] = go_df['genes'].apply(len)
go_df['genes'] = go_df['genes'].apply(lambda g: ', '.join(g))
go_df[['term','n_hits','p_value','odds_ratio','genes']]
else:
print('No GO:BP enrichment data')
# Visualize top GO BP enrichment
go_bp = load('enrichr_GO_Biological_Process')
if isinstance(go_bp, list) and go_bp:
top = go_bp[:8]
terms = [t['term'][:45] for t in top][::-1]
neglogp = [-np.log10(max(t['p_value'], 1e-300)) for t in top][::-1]
fig, ax = plt.subplots(figsize=(9, 4.5))
ax.barh(terms, neglogp, color='#4fc3f7')
ax.set_xlabel('-log10(p-value)')
ax.set_title('Top GO:BP enrichment (Enrichr)')
ax.grid(axis='x', alpha=0.3)
plt.tight_layout(); plt.show()
else:
print('No GO:BP data to plot')
4. KEGG pathway enrichment¶
kegg = load('enrichr_KEGG_Pathways')
if isinstance(kegg, list) and kegg:
kegg_df = pd.DataFrame(kegg[:10])[['term','p_value','odds_ratio','genes']]
kegg_df['genes'] = kegg_df['genes'].apply(lambda g: ', '.join(g))
kegg_df['p_value'] = kegg_df['p_value'].apply(lambda p: f'{p:.2e}')
kegg_df['odds_ratio'] = kegg_df['odds_ratio'].round(1)
kegg_df
else:
print('No KEGG enrichment data')
No KEGG enrichment data
5. STRING protein interaction network¶
ppi = load('string_network')
if isinstance(ppi, list) and ppi:
ppi_df = pd.DataFrame(ppi).sort_values('score', ascending=False)
display_cols = [c for c in ['protein1','protein2','score','escore','tscore'] if c in ppi_df.columns]
print(f'{len(ppi_df)} STRING edges')
ppi_df[display_cols].head(20)
else:
print('No STRING edges returned')
11 STRING edges
# Network figure
ppi = load('string_network')
if isinstance(ppi, list) and ppi:
import math
nodes = sorted({p for e in ppi for p in (e['protein1'], e['protein2'])})
n = len(nodes)
pos = {n_: (math.cos(2*math.pi*i/n), math.sin(2*math.pi*i/n)) for i, n_ in enumerate(nodes)}
fig, ax = plt.subplots(figsize=(7, 7))
for e in ppi:
x1,y1 = pos[e['protein1']]; x2,y2 = pos[e['protein2']]
ax.plot([x1,x2],[y1,y2], color='#888', alpha=0.3+0.5*e['score'],
linewidth=0.5+2*e['score'])
for name,(x,y) in pos.items():
ax.scatter([x],[y], s=450, color='#ffd54f', edgecolors='#333', zorder=3)
ax.annotate(name, (x,y), ha='center', va='center', fontsize=8, fontweight='bold', zorder=4)
ax.set_aspect('equal'); ax.axis('off')
ax.set_title(f'STRING PPI network ({len(ppi)} edges)')
plt.tight_layout(); plt.show()
else:
print('No STRING data to visualize')
6. Reactome pathway footprint¶
pw_rows = []
for g in ['TFEB']:
pws = load(f'reactome_{g}')
if isinstance(pws, list):
pw_rows.append({'gene': g, 'n_pathways': len(pws),
'top_pathway': (pws[0]['name'] if pws else '—')[:70]})
else:
pw_rows.append({'gene': g, 'n_pathways': 0, 'top_pathway': '—'})
pd.DataFrame(pw_rows).sort_values('n_pathways', ascending=False)
| gene | n_pathways | top_pathway | |
|---|---|---|---|
| 0 | TFEB | 0 | — |
7. Allen Brain Atlas ISH regional expression¶
ish_rows = []
for g in ['TFEB']:
ish = load(f'allen_ish_{g}')
regions = ish.get('regions') or [] if isinstance(ish, dict) else []
ish_rows.append({
'gene': g,
'n_ish_regions': len(regions),
'top_region': (regions[0].get('structure','') if regions else '—')[:45],
'top_energy': round(regions[0].get('expression_energy',0), 2) if regions else None,
})
pd.DataFrame(ish_rows)
| gene | n_ish_regions | top_region | top_energy | |
|---|---|---|---|---|
| 0 | TFEB | 0 | — | — |
8. Hypothesis ranking (3 hypotheses)¶
hyp_data = [('The Mitochondrial-Lysosomal Metabolic Coupling Dysfunct', 0.594), ('Brain Insulin Resistance with Glucose Transporter Dysfu', 0.553), ('The Glial Ketone Metabolic Shunt Hypothesis', 0.544)]
titles = [h[0] for h in hyp_data][::-1]
scores = [h[1] for h in hyp_data][::-1]
fig, ax = plt.subplots(figsize=(10, max(8, len(titles)*0.4)))
colors = ['#ef5350' if s >= 0.6 else '#ffa726' if s >= 0.5 else '#66bb6a' for s in scores]
ax.barh(range(len(titles)), scores, color=colors)
ax.set_yticks(range(len(titles))); ax.set_yticklabels(titles, fontsize=7)
ax.set_xlabel('Composite Score'); ax.set_title('Metabolic reprogramming in neurodegenerative disease')
ax.grid(axis='x', alpha=0.3)
plt.tight_layout(); plt.show()
9. Score dimension heatmap (top 10)¶
labels = ['The Mitochondrial-Lysosomal Metabolic Co', 'Brain Insulin Resistance with Glucose Tr', 'The Glial Ketone Metabolic Shunt Hypothe']
matrix = np.array([[0.5, 0.5, 0.5, 0.5, 0.339, 0.5, 0.5, 0.5, 0.5], [0.5, 0.5, 0.5, 0.5, 0.09, 0.5, 0.5, 0.5, 0.5], [0.5, 0.5, 0.5, 0.5, 0.13, 0.5, 0.5, 0.5, 0.5]])
dims = ['novelty_score', 'feasibility_score', 'impact_score', 'mechanistic_plausibility_score', 'clinical_relevance_score', 'data_availability_score', 'reproducibility_score', 'druggability_score', 'safety_profile_score']
if matrix.size:
fig, ax = plt.subplots(figsize=(10, 5))
im = ax.imshow(matrix, cmap='RdYlGn', aspect='auto', vmin=0, vmax=1)
ax.set_xticks(range(len(dims)))
ax.set_xticklabels([d.replace('_score','').replace('_',' ').title() for d in dims],
rotation=45, ha='right', fontsize=8)
ax.set_yticks(range(len(labels))); ax.set_yticklabels(labels, fontsize=7)
ax.set_title('Score dimensions — top hypotheses')
plt.colorbar(im, ax=ax, shrink=0.8)
plt.tight_layout(); plt.show()
else:
print('No score data available')
10. PubMed evidence per hypothesis¶
Hypothesis 1: The Mitochondrial-Lysosomal Metabolic Coupling Dysfunction¶
Target genes: TFEB · Composite score: 0.594
Background and Rationale
The cellular quality control system represents one of the most critical determinants of neuronal survival and longevity. Among the key players in this system, the transcription factor EB (TFEB) has emerged as a master regulator of lysosomal biogenesis and autophagy, orchestrating what is increasingly recognized as the mitochondrial-lysosomal axis. TFEB belongs to the microphthalmia-associated transcription factor (MiTF) family and serves as the principal coordinator
hid = 'h-e3e8407c'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
lit = pd.DataFrame(papers)
cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
if cols:
lit = lit[cols]
lit['title'] = lit['title'].str[:80]
if 'journal' in lit.columns:
lit['journal'] = lit['journal'].str[:30]
lit.sort_values('year', ascending=False, inplace=True)
display_df = lit
else:
display_df = pd.DataFrame(papers[:5])
else:
display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df
| note | |
|---|---|
| 0 | no PubMed results |
Hypothesis 2: Brain Insulin Resistance with Glucose Transporter Dysfunction¶
Target genes: GLUT3/GLUT4 · Composite score: 0.553
Brain Insulin Resistance with Glucose Transporter Dysfunction proposes that neuronal insulin signaling failure — a central metabolic feature of Alzheimer's disease often called "type 3 diabetes" — drives neurodegeneration through impaired glucose transporter (GLUT3/GLUT4) trafficking, energy crisis, and compensatory metabolic shifts that exacerbate tau phosphorylation and amyloid pathology.
Background and Rationale
The brain consumes approximately 20% of the body's total glucose despite co
hid = 'h-075f1f02'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
lit = pd.DataFrame(papers)
cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
if cols:
lit = lit[cols]
lit['title'] = lit['title'].str[:80]
if 'journal' in lit.columns:
lit['journal'] = lit['journal'].str[:30]
lit.sort_values('year', ascending=False, inplace=True)
display_df = lit
else:
display_df = pd.DataFrame(papers[:5])
else:
display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df
| note | |
|---|---|
| 0 | no PubMed results |
Hypothesis 3: The Glial Ketone Metabolic Shunt Hypothesis¶
Target genes: HMGCS2 · Composite score: 0.544
The Glial Ketone Metabolic Shunt Hypothesis proposes that reactive astrocytes in neurodegenerative disease aberrantly upregulate ketone body synthesis (ketogenesis), creating a metabolic steal syndrome that depletes shared glucose and lipid substrates from neurons while producing ketone bodies that failing neurons cannot efficiently metabolize — a paradoxical "rescue attempt" that worsens energy crisis.
Background and Rationale
Brain energy metabolism represents one of the most tightly reg
hid = 'h-4b517512'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
lit = pd.DataFrame(papers)
cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
if cols:
lit = lit[cols]
lit['title'] = lit['title'].str[:80]
if 'journal' in lit.columns:
lit['journal'] = lit['journal'].str[:30]
lit.sort_values('year', ascending=False, inplace=True)
display_df = lit
else:
display_df = pd.DataFrame(papers[:5])
else:
display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df
| note | |
|---|---|
| 0 | no PubMed results |
11. Knowledge graph edges (28 total)¶
edge_data = [{'source': 'h-e3e8407c', 'relation': 'targets', 'target': 'TFEB', 'strength': 0.5}, {'source': 'GLUT3', 'relation': 'associated_with', 'target': 'neurodegeneration', 'strength': 0.5}, {'source': 'GLUT3', 'relation': 'interacts_with', 'target': 'GLUT4', 'strength': 0.5}, {'source': 'GLUT4', 'relation': 'associated_with', 'target': 'neurodegeneration', 'strength': 0.5}, {'source': 'GLUT4', 'relation': 'interacts_with', 'target': 'GLUT3', 'strength': 0.5}, {'source': 'HMGCS2', 'relation': 'associated_with', 'target': 'neurodegeneration', 'strength': 0.5}, {'source': 'NAMPT', 'relation': 'co_discussed', 'target': 'CLOCK', 'strength': 0.4}, {'source': 'NAMPT', 'relation': 'co_discussed', 'target': 'PRKAA1', 'strength': 0.4}, {'source': 'TFEB', 'relation': 'co_discussed', 'target': 'PRKAA1', 'strength': 0.4}, {'source': 'TFEB', 'relation': 'co_discussed', 'target': 'NAMPT', 'strength': 0.4}, {'source': 'PRKAA1', 'relation': 'co_discussed', 'target': 'NAMPT', 'strength': 0.4}, {'source': 'CLOCK', 'relation': 'co_discussed', 'target': 'NAMPT', 'strength': 0.4}, {'source': 'NAMPT', 'relation': 'co_discussed', 'target': 'TFEB', 'strength': 0.4}, {'source': 'PRKAA1', 'relation': 'co_discussed', 'target': 'TFEB', 'strength': 0.4}, {'source': 'GLUT3', 'relation': 'co_discussed', 'target': 'HMGCS2', 'strength': 0.4}, {'source': 'GLUT3', 'relation': 'co_discussed', 'target': 'TFEB', 'strength': 0.4}, {'source': 'GLUT3', 'relation': 'co_discussed', 'target': 'PRKAA1', 'strength': 0.4}, {'source': 'GLUT3', 'relation': 'co_discussed', 'target': 'NAMPT', 'strength': 0.4}, {'source': 'GLUT3', 'relation': 'co_discussed', 'target': 'CLOCK', 'strength': 0.4}, {'source': 'HMGCS2', 'relation': 'co_discussed', 'target': 'TFEB', 'strength': 0.4}, {'source': 'HMGCS2', 'relation': 'co_discussed', 'target': 'PRKAA1', 'strength': 0.4}, {'source': 'HMGCS2', 'relation': 'co_discussed', 'target': 'NAMPT', 'strength': 0.4}, {'source': 'HMGCS2', 'relation': 'co_discussed', 'target': 'CLOCK', 'strength': 0.4}, {'source': 'GLUT3', 'relation': 'co_discussed', 'target': 'GLUT4', 'strength': 0.4}, {'source': 'HMGCS2', 'relation': 'co_discussed', 'target': 'GLUT4', 'strength': 0.4}]
if edge_data:
pd.DataFrame(edge_data).head(25)
else:
print('No KG edge data available')
12. Caveats¶
This notebook uses real Forge tool calls cached from live APIs, but:
- Enrichment is against curated gene-set libraries, not genome-wide screens
- STRING/Reactome/HPA/MyGene reflect curated knowledge
- PubMed literature is search-relevance ranked, not systematic review
The cached evidence bundle is the minimum viable real-data analysis for this topic.