import json, sqlite3, sys
from pathlib import Path
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import numpy as np
matplotlib.rcParams['figure.dpi'] = 110
matplotlib.rcParams['figure.facecolor'] = 'white'
matplotlib.use('Agg')

REPO = Path('.').resolve()
CACHE = REPO / 'data' / 'forge_cache' / 'microglial_priming_ad'
ANALYSIS_ID = 'SDA-2026-04-04-gap-20260404-microglial-priming-early-ad'
DB_PATH = Path('/home/ubuntu/scidex/scidex.db')

def load_cache(name):
    p = CACHE / f'{name}.json'
    return json.loads(p.read_text()) if p.exists() else {}

print(f"Repo: {REPO}")
print(f"Cache dir: {CACHE} (exists={CACHE.exists()})")
print(f"Analysis: {ANALYSIS_ID}")

Repo: /home/ubuntu/scidex/.claude/worktrees/task-25702d84-ffc9-461f-ba96-3f7d3c97cd2b
Cache dir: /home/ubuntu/scidex/.claude/worktrees/task-25702d84-ffc9-461f-ba96-3f7d3c97cd2b/data/forge_cache/microglial_priming_ad (exists=True)
Analysis: SDA-2026-04-04-gap-20260404-microglial-priming-early-ad

# Hypothesis data from DB (top 20 by composite score)
hyp_data = [
  {
    "title": "Epigenetic Reprogramming of Microglial Memory",
    "target_gene": "DNMT3A, HDAC1/2",
    "composite_score": 0.5003,
    "confidence_score": 0.6,
    "novelty_score": 0.8,
    "feasibility_score": 0.8,
    "impact_score": 0.7
  },
  {
    "title": "Microbiota-Microglia Axis Modulation",
    "target_gene": "Multiple",
    "composite_score": 0.4926,
    "confidence_score": 0.3,
    "novelty_score": 0.6,
    "feasibility_score": 0.6,
    "impact_score": 0.5
  },
  {
    "title": "Synaptic Pruning Precision Therapy",
    "target_gene": "C1QA, C3, CX3CR1, CX3CL1",
    "composite_score": 0.4654,
    "confidence_score": 0.7,
    "novelty_score": 0.7,
    "feasibility_score": 0.6,
    "impact_score": 0.8
  },
  {
    "title": "Cardiovascular-Neuroinflammatory Dual Targeting",
    "target_gene": "TNF/IL6",
    "composite_score": 0.4623,
    "confidence_score": 0.5,
    "novelty_score": 0.4,
    "feasibility_score": 0.8,
    "impact_score": 0.6
  },
  {
    "title": "IGFBPL1-Mediated Homeostatic Restoration",
    "target_gene": "IGFBPL1",
    "composite_score": 0.446,
    "confidence_score": 0.8,
    "novelty_score": 0.9,
    "feasibility_score": 0.3,
    "impact_score": 0.8
  },
  {
    "title": "Cardiovascular-Neuroinflammation Crosstalk Interruption",
    "target_gene": "IL1B, TNFA, NLRP3",
    "composite_score": 0.4366,
    "confidence_score": 0.5,
    "novelty_score": 0.5,
    "feasibility_score": 0.8,
    "impact_score": 0.7
  },
  {
    "title": "APOE4-Lipid Metabolism Correction",
    "target_gene": "APOE",
    "composite_score": 0.4363,
    "confidence_score": 0.4,
    "novelty_score": 0.7,
    "feasibility_score": 0.4,
    "impact_score": 0.6
  },
  {
    "title": "Perinatal Immune Challenge Prevention",
    "target_gene": "Multiple",
    "composite_score": 0.4323,
    "confidence_score": 0.2,
    "novelty_score": 0.9,
    "feasibility_score": 0.1,
    "impact_score": 0.4
  },
  {
    "title": "Gut-Brain Axis Microbiome Modulation",
    "target_gene": "GPR43, GPR109A",
    "composite_score": 0.4208,
    "confidence_score": 0.4,
    "novelty_score": 0.8,
    "feasibility_score": 0.4,
    "impact_score": 0.6
  },
  {
    "title": "IGFBPL1-Mediated Microglial Reprogramming",
    "target_gene": "IGFBPL1",
    "composite_score": 0.4143,
    "confidence_score": 0.4,
    "novelty_score": 0.9,
    "feasibility_score": 0.3,
    "impact_score": 0.8
  },
  {
    "title": "Complement-Mediated Synaptic Protection",
    "target_gene": "C1QA",
    "composite_score": 0.4103,
    "confidence_score": 0.4,
    "novelty_score": 0.6,
    "feasibility_score": 0.5,
    "impact_score": 0.7
  },
  {
    "title": "Temporal Gating of Microglial Responses",
    "target_gene": "CLOCK, ARNTL",
    "composite_score": 0.3888,
    "confidence_score": 0.2,
    "novelty_score": 0.9,
    "feasibility_score": 0.3,
    "impact_score": 0.4
  },
  {
    "title": "Perinatal Hypoxia-Primed Microglia Targeting",
    "target_gene": "HIF1A, NFKB1",
    "composite_score": 0.385,
    "confidence_score": 0.3,
    "novelty_score": 0.7,
    "feasibility_score": 0.2,
    "impact_score": 0.5
  },
  {
    "title": "TREM2-P2RY12 Balance Restoration Therapy",
    "target_gene": "TREM2",
    "composite_score": 0.3706,
    "confidence_score": 0.3,
    "novelty_score": 0.8,
    "feasibility_score": 0.2,
    "impact_score": 0.6
  }
]

df = pd.DataFrame(hyp_data)
print(f"{len(df)} hypotheses (showing top 20 of 14 total)\n")
print(df[['title', 'target_gene', 'composite_score', 'confidence_score',
          'novelty_score', 'feasibility_score', 'impact_score']].to_string(index=False))

14 hypotheses (showing top 20 of 14 total)

                                                  title              target_gene  composite_score  confidence_score  novelty_score  feasibility_score  impact_score
          Epigenetic Reprogramming of Microglial Memory          DNMT3A, HDAC1/2           0.5003               0.6            0.8                0.8           0.7
                   Microbiota-Microglia Axis Modulation                 Multiple           0.4926               0.3            0.6                0.6           0.5
                     Synaptic Pruning Precision Therapy C1QA, C3, CX3CR1, CX3CL1           0.4654               0.7            0.7                0.6           0.8
        Cardiovascular-Neuroinflammatory Dual Targeting                  TNF/IL6           0.4623               0.5            0.4                0.8           0.6
               IGFBPL1-Mediated Homeostatic Restoration                  IGFBPL1           0.4460               0.8            0.9                0.3           0.8
Cardiovascular-Neuroinflammation Crosstalk Interruption        IL1B, TNFA, NLRP3           0.4366               0.5            0.5                0.8           0.7
                      APOE4-Lipid Metabolism Correction                     APOE           0.4363               0.4            0.7                0.4           0.6
                  Perinatal Immune Challenge Prevention                 Multiple           0.4323               0.2            0.9                0.1           0.4
                   Gut-Brain Axis Microbiome Modulation           GPR43, GPR109A           0.4208               0.4            0.8                0.4           0.6
              IGFBPL1-Mediated Microglial Reprogramming                  IGFBPL1           0.4143               0.4            0.9                0.3           0.8
                Complement-Mediated Synaptic Protection                     C1QA           0.4103               0.4            0.6                0.5           0.7
                Temporal Gating of Microglial Responses             CLOCK, ARNTL           0.3888               0.2            0.9                0.3           0.4
           Perinatal Hypoxia-Primed Microglia Targeting             HIF1A, NFKB1           0.3850               0.3            0.7                0.2           0.5
               TREM2-P2RY12 Balance Restoration Therapy                    TREM2           0.3706               0.3            0.8                0.2           0.6

# Score distribution plot
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Left: composite scores bar chart
colors = ['#3fb950' if s >= 0.5 else '#d29922' if s >= 0.35 else '#f85149'
          for s in df['composite_score']]
axes[0].barh(range(len(df)), df['composite_score'], color=colors, edgecolor='#333')
axes[0].set_yticks(range(len(df)))
axes[0].set_yticklabels([t[:40] + '...' if len(t) > 40 else t for t in df['title']],
                         fontsize=7)
axes[0].set_xlabel('Composite Score')
axes[0].set_title('Hypothesis Ranking by Composite Score')
axes[0].axvline(0.5, color='#3fb950', linestyle='--', alpha=0.5, label='0.5 threshold')
axes[0].legend(fontsize=8)
axes[0].invert_yaxis()

# Right: dimension scatter
dims = ['confidence_score', 'novelty_score', 'feasibility_score', 'impact_score']
dim_labels = ['Confidence', 'Novelty', 'Feasibility', 'Impact']
x = np.arange(len(dims))
width = 0.6 / max(len(df), 1)
for i, row in df.iterrows():
    vals = [row[d] for d in dims]
    axes[1].bar(x + i * width, vals, width, alpha=0.6,
                color=plt.cm.viridis(i / max(len(df), 1)))
axes[1].set_xticks(x + (len(df) * width) / 2)
axes[1].set_xticklabels(dim_labels)
axes[1].set_ylabel('Score')
axes[1].set_title('Scoring Dimensions (all hypotheses)')
axes[1].set_ylim(0, 1.1)

plt.tight_layout()
plt.savefig('/tmp/hyp_scores.png', dpi=100, bbox_inches='tight')
plt.show()
print("Score distributions plotted")

Score distributions plotted

# PubMed results from Forge cache
pubmed_results = [
  {
    "title": "The temporal and stimuli-specific effects of LPS and IFN\u03b3 on microglial activation.",
    "year": "2026",
    "pmid": "41695273",
    "abstract": ""
  }
]

print(f"Retrieved {len(pubmed_results)} PubMed articles\n")
for i, paper in enumerate(pubmed_results[:10], 1):
    year = paper.get('year') or paper.get('pubdate', '')
    pmid = paper.get('pmid') or paper.get('pubmed_id', '')
    title = paper.get('title', 'N/A')
    print(f"{i:2d}. [{year}] {title[:100]}")
    if pmid:
        print(f"     PMID: {pmid}")
    abstract = paper.get('abstract', '')
    if abstract:
        print(f"     {abstract[:200]}...")
    print()

Retrieved 1 PubMed articles

 1. [2026] The temporal and stimuli-specific effects of LPS and IFNγ on microglial activation.
     PMID: 41695273

gene_annotations = [
  {
    "symbol": "TREM2",
    "name": "triggering receptor expressed on myeloid cells 2",
    "entrez": "",
    "summary": "This gene encodes a membrane protein that forms a receptor signaling complex with the TYRO protein tyrosine kinase binding protein. The encoded protein functions in immune response and may be involved"
  },
  {
    "symbol": "TYROBP",
    "name": "transmembrane immune signaling adaptor TYROBP",
    "entrez": "",
    "summary": "This gene encodes a transmembrane signaling polypeptide which contains an immunoreceptor tyrosine-based activation motif (ITAM) in its cytoplasmic domain. The encoded protein may associate with the ki"
  },
  {
    "symbol": "CST7",
    "name": "cystatin F",
    "entrez": "",
    "summary": "The cystatin superfamily encompasses proteins that contain multiple cystatin-like sequences. Some of the members are active cysteine protease inhibitors, while others have lost or perhaps never acquir"
  },
  {
    "symbol": "CD33",
    "name": "CD33 molecule",
    "entrez": "",
    "summary": "Enables protein phosphatase binding activity and sialic acid binding activity. Involved in several processes, including negative regulation of cytokine production; negative regulation of monocyte acti"
  },
  {
    "symbol": "SPI1",
    "name": "Spi-1 proto-oncogene",
    "entrez": "",
    "summary": "This gene encodes an ETS-domain transcription factor that activates gene expression during myeloid and B-lymphoid cell development. The nuclear protein binds to a purine-rich sequence known as the PU-"
  },
  {
    "symbol": "BIN1",
    "name": "bridging integrator 1",
    "entrez": "",
    "summary": "This gene encodes several isoforms of a nucleocytoplasmic adaptor protein, one of which was initially identified as a MYC-interacting protein with features of a tumor suppressor. Isoforms that are exp"
  }
]

print(f"Gene annotations for {len(gene_annotations)} focal genes:\n")
for g in gene_annotations:
    print(f"  {g['symbol']} — {g['name']}")
    if g['summary'] != '—':
        print(f"    {g['summary'][:180]}")
    print()

Gene annotations for 6 focal genes:

  TREM2 — triggering receptor expressed on myeloid cells 2
    This gene encodes a membrane protein that forms a receptor signaling complex with the TYRO protein tyrosine kinase binding protein. The encoded protein functions in immune response

  TYROBP — transmembrane immune signaling adaptor TYROBP
    This gene encodes a transmembrane signaling polypeptide which contains an immunoreceptor tyrosine-based activation motif (ITAM) in its cytoplasmic domain. The encoded protein may a

  CST7 — cystatin F
    The cystatin superfamily encompasses proteins that contain multiple cystatin-like sequences. Some of the members are active cysteine protease inhibitors, while others have lost or 

  CD33 — CD33 molecule
    Enables protein phosphatase binding activity and sialic acid binding activity. Involved in several processes, including negative regulation of cytokine production; negative regulat

  SPI1 — Spi-1 proto-oncogene
    This gene encodes an ETS-domain transcription factor that activates gene expression during myeloid and B-lymphoid cell development. The nuclear protein binds to a purine-rich seque

  BIN1 — bridging integrator 1
    This gene encodes several isoforms of a nucleocytoplasmic adaptor protein, one of which was initially identified as a MYC-interacting protein with features of a tumor suppressor. I

string_interactions = [
  {
    "protein1": "CD33",
    "protein2": "TYROBP",
    "score": 0.717,
    "nscore": 0,
    "fscore": 0,
    "pscore": 0,
    "ascore": 0,
    "escore": 0,
    "dscore": 0,
    "tscore": 0.717
  },
  {
    "protein1": "CD33",
    "protein2": "TREM2",
    "score": 0.812,
    "nscore": 0,
    "fscore": 0,
    "pscore": 0,
    "ascore": 0,
    "escore": 0,
    "dscore": 0,
    "tscore": 0.812
  },
  {
    "protein1": "TYROBP",
    "protein2": "INPP5D",
    "score": 0.773,
    "nscore": 0,
    "fscore": 0,
    "pscore": 0,
    "ascore": 0,
    "escore": 0,
    "dscore": 0,
    "tscore": 0.773
  },
  {
    "protein1": "TYROBP",
    "protein2": "TREM2",
    "score": 0.998,
    "nscore": 0,
    "fscore": 0,
    "pscore": 0,
    "ascore": 0,
    "escore": 0.526,
    "dscore": 0.8,
    "tscore": 0.982
  }
]

if string_interactions:
    try:
        import networkx as nx
        G = nx.Graph()
        for edge in string_interactions:
            if isinstance(edge, dict):
                a = edge.get('preferredName_A') or edge.get('stringId_A', '')
                b = edge.get('preferredName_B') or edge.get('stringId_B', '')
                score = float(edge.get('score', edge.get('combined_score', 0.5)))
                if a and b and a != b:
                    G.add_edge(a, b, weight=score)

        if G.number_of_nodes() > 0:
            fig, ax = plt.subplots(figsize=(10, 8))
            pos = nx.spring_layout(G, seed=42, k=2.5)
            degrees = dict(G.degree())
            node_sizes = [300 + degrees.get(n, 1) * 200 for n in G.nodes()]
            target_genes = ['TREM2', 'TYROBP', 'CST7', 'CD33', 'SPI1', 'BIN1', 'INPP5D', 'MEF2C']
            node_colors = ['#4fc3f7' if n in target_genes else '#58a6ff' for n in G.nodes()]

            nx.draw_networkx_edges(G, pos, alpha=0.3, edge_color='#8b949e', ax=ax)
            nx.draw_networkx_nodes(G, pos, node_size=node_sizes,
                                   node_color=node_colors, alpha=0.85, ax=ax)
            nx.draw_networkx_labels(G, pos, font_size=8, font_color='white', ax=ax)
            ax.set_title(f'STRING Protein Interaction Network ({G.number_of_nodes()} genes, {G.number_of_edges()} edges)')
            ax.axis('off')
            plt.tight_layout()
            plt.savefig('/tmp/string_network.png', dpi=100, bbox_inches='tight')
            plt.show()
            print(f"Network: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")
            top5 = sorted(degrees.items(), key=lambda x: x[1], reverse=True)[:5]
            print(f"Hub genes by degree: {top5}")
        else:
            print("No edges with valid gene names found in STRING data")
    except ImportError:
        print("networkx not installed — skipping network visualization")
    except Exception as e:
        print(f"Network plot error: {e}")
else:
    print("No STRING interaction data available (Forge cache empty or API unavailable)")
    print("Run with --force to fetch fresh data from STRING DB")

No edges with valid gene names found in STRING data

# Pull debate data from DB
try:
    db = sqlite3.connect(str(DB_PATH))
    debate_df = pd.read_sql_query('''
        SELECT id, question, num_rounds, quality_score, personas_used, created_at
        FROM debate_sessions WHERE analysis_id = ?
    ''', db, params=(ANALYSIS_ID,))
    db.close()
    print(f"Debate sessions for {ANALYSIS_ID}:")
    print(debate_df.to_string(index=False))
except Exception as e:
    print(f"DB query skipped: {e}")

Debate sessions for SDA-2026-04-04-gap-20260404-microglial-priming-early-ad:
                                                          id                                                              question  num_rounds  quality_score                                           personas_used                 created_at
sess_SDA-2026-04-04-gap-20260404-microglial-priming-early-ad Neuroinflammation and microglial priming in early Alzheimer's Disease           4           0.63 ["theorist", "skeptic", "domain_expert", "synthesizer"] 2026-04-04T06:29:35.776865

# Statistical summary of hypothesis scores
if len(df) > 0:
    print("=== Score Distribution Statistics ===")
    print(df[['composite_score', 'confidence_score', 'novelty_score',
              'feasibility_score', 'impact_score']].describe().round(3))

    print("\n=== Top 5 Hypotheses by Composite Score ===")
    top5 = df.nlargest(5, 'composite_score')
    for i, (_, row) in enumerate(top5.iterrows(), 1):
        print(f"\n{i}. {row['title']}")
        print(f"   Target: {row['target_gene']} | Composite: {row['composite_score']:.3f}")
        print(f"   Conf={row['confidence_score']:.2f}  Nov={row['novelty_score']:.2f}  "
              f"Feas={row['feasibility_score']:.2f}  Impact={row['impact_score']:.2f}")

=== Score Distribution Statistics ===
       composite_score  confidence_score  novelty_score  feasibility_score  \
count           14.000            14.000         14.000             14.000   
mean             0.433             0.429          0.729              0.450   
std              0.039             0.177          0.159              0.238   
min              0.371             0.200          0.400              0.100   
25%              0.411             0.300          0.625              0.300   
50%              0.434             0.400          0.750              0.400   
75%              0.458             0.500          0.875              0.600   
max              0.500             0.800          0.900              0.800   

       impact_score  
count        14.000  
mean          0.621  
std           0.137  
min           0.400  
25%           0.525  
50%           0.600  
75%           0.700  
max           0.800  

=== Top 5 Hypotheses by Composite Score ===

1. Epigenetic Reprogramming of Microglial Memory
   Target: DNMT3A, HDAC1/2 | Composite: 0.500
   Conf=0.60  Nov=0.80  Feas=0.80  Impact=0.70

2. Microbiota-Microglia Axis Modulation
   Target: Multiple | Composite: 0.493
   Conf=0.30  Nov=0.60  Feas=0.60  Impact=0.50

3. Synaptic Pruning Precision Therapy
   Target: C1QA, C3, CX3CR1, CX3CL1 | Composite: 0.465
   Conf=0.70  Nov=0.70  Feas=0.60  Impact=0.80

4. Cardiovascular-Neuroinflammatory Dual Targeting
   Target: TNF/IL6 | Composite: 0.462
   Conf=0.50  Nov=0.40  Feas=0.80  Impact=0.60

5. IGFBPL1-Mediated Homeostatic Restoration
   Target: IGFBPL1 | Composite: 0.446
   Conf=0.80  Nov=0.90  Feas=0.30  Impact=0.80

Neuroinflammation and microglial priming in early Alzheimer's Disease — Analysis Notebook

Neuroinflammation and Microglial Priming in Early Alzheimer's Disease¶

Research Question¶

Analysis Summary¶

1. Setup & Data Loading¶

2. Hypothesis Landscape¶

3. Literature Evidence (PubMed)¶

4. Target Gene Annotations (MyGene.info)¶

5. Protein Interaction Network (STRING DB)¶

6. Multi-Agent Debate Analysis¶

7. Score Statistics & Top Hypotheses¶

8. Conclusions & Research Directions¶

Key Findings¶

Top Research Directions¶

Next Steps¶