# Install dependencies (run once)
!pip install scipy matplotlib seaborn pandas numpy -q

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats
from scipy.special import gammaln
import warnings
warnings.filterwarnings('ignore')

# Style settings
plt.style.use('seaborn-v0_8-whitegrid')
plt.rcParams['figure.figsize'] = [12, 8]
plt.rcParams['font.size'] = 11

print("✓ Libraries loaded successfully!")

✓ Libraries loaded successfully!

# =============================================================================
# OBSERVED DATA FROM OFFICIAL SOURCES
# =============================================================================

# National Park Service Data (highest quality)
NPS_DATA = {
    'cold_cases_total': 30,           # Total permanent disappearances (NPS cold cases)
    'years_observed': 60,             # Time span of data (~1960-2020)
    'annual_visits_millions': 312,    # Recent average annual visits
}
NPS_DATA['total_visits'] = NPS_DATA['annual_visits_millions'] * NPS_DATA['years_observed'] * 1e6

# All US Public Lands Combined
ALL_LANDS = {
    'name': 'All Public Lands',
    'annual_visits_millions': 1684,   # NPS + USFS + BLM + FWS + USACE + State Parks
    'breakdown': {
        'National Park Service': 312,
        'US Forest Service': 159,
        'Bureau of Land Management': 75,
        'Fish & Wildlife Service': 61,
        'Army Corps of Engineers': 270,
        'State Parks': 807
    }
}
ALL_LANDS['total_visits_60yr'] = ALL_LANDS['annual_visits_millions'] * 60 * 1e6

# Missing 411 Claims
M411_CLAIMS = {
    'total_cases': 1600,              # Paulides' claimed "unexplained" cases
    'years_span': 60,
    'annual_rate': 27,                # 1600 / 60 ≈ 27 per year
    'geographic_scope': 'All US public lands + some private + international'
}

# Display the data
print("="*60)
print("📊 DATA SUMMARY")
print("="*60)
print(f"\n🏞️  NPS Data (Best Quality):")
print(f"    Cold cases (permanent disappearances): {NPS_DATA['cold_cases_total']}")
print(f"    Over time period: {NPS_DATA['years_observed']} years")
print(f"    Annual visits: {NPS_DATA['annual_visits_millions']}M")
print(f"    Total visits: {NPS_DATA['total_visits']:.2e}")

print(f"\n🌲 All US Public Lands:")
for land_type, visits in ALL_LANDS['breakdown'].items():
    print(f"    {land_type}: {visits}M visits/year")
print(f"    TOTAL: {ALL_LANDS['annual_visits_millions']}M visits/year")
print(f"    60-year total: {ALL_LANDS['total_visits_60yr']:.2e} visits")

print(f"\n📕 Missing 411 Claims:")
print(f"    Total claimed 'unexplained' cases: {M411_CLAIMS['total_cases']}")
print(f"    Implied annual rate: ~{M411_CLAIMS['annual_rate']} per year")
print(f"    Scope: {M411_CLAIMS['geographic_scope']}")

============================================================
📊 DATA SUMMARY
============================================================

🏞️  NPS Data (Best Quality):
    Cold cases (permanent disappearances): 30
    Over time period: 60 years
    Annual visits: 312M
    Total visits: 1.87e+10

🌲 All US Public Lands:
    National Park Service: 312M visits/year
    US Forest Service: 159M visits/year
    Bureau of Land Management: 75M visits/year
    Fish & Wildlife Service: 61M visits/year
    Army Corps of Engineers: 270M visits/year
    State Parks: 807M visits/year
    TOTAL: 1684M visits/year
    60-year total: 1.01e+11 visits

📕 Missing 411 Claims:
    Total claimed 'unexplained' cases: 1600
    Implied annual rate: ~27 per year
    Scope: All US public lands + some private + international

# =============================================================================
# BAYESIAN MODEL 1: Poisson-Gamma Conjugate Analysis
# =============================================================================

def poisson_gamma_posterior(observed_cases, exposure, prior_alpha=1, prior_beta=1e9):
    """
    Compute posterior distribution for Poisson rate with Gamma prior.

    Args:
        observed_cases: Number of observed disappearances
        exposure: Total visits (denominator)
        prior_alpha: Gamma prior shape parameter
        prior_beta: Gamma prior rate parameter

    Returns:
        Dictionary with posterior parameters and statistics
    """
    # Posterior parameters (conjugate update)
    post_alpha = prior_alpha + observed_cases
    post_beta = prior_beta + exposure

    # Posterior statistics
    post_mean = post_alpha / post_beta
    post_var = post_alpha / (post_beta ** 2)
    post_sd = np.sqrt(post_var)

    # 95% Credible Interval
    ci_lower = stats.gamma.ppf(0.025, post_alpha, scale=1/post_beta)
    ci_upper = stats.gamma.ppf(0.975, post_alpha, scale=1/post_beta)

    return {
        'prior_alpha': prior_alpha,
        'prior_beta': prior_beta,
        'post_alpha': post_alpha,
        'post_beta': post_beta,
        'post_mean': post_mean,
        'post_sd': post_sd,
        'ci_95': (ci_lower, ci_upper)
    }

# Run the analysis
posterior = poisson_gamma_posterior(
    observed_cases=NPS_DATA['cold_cases_total'],
    exposure=NPS_DATA['total_visits']
)

# Calculate annual predictions
annual_visits = NPS_DATA['annual_visits_millions'] * 1e6
annual_expected = posterior['post_mean'] * annual_visits
annual_ci_lower = posterior['ci_95'][0] * annual_visits
annual_ci_upper = posterior['ci_95'][1] * annual_visits

print("="*60)
print("🎲 BAYESIAN MODEL 1: Poisson-Gamma Conjugate Analysis")
print("="*60)
print(f"\n📌 Prior: Gamma(α={posterior['prior_alpha']}, β={posterior['prior_beta']:.0e})")
print(f"   Prior mean rate: {posterior['prior_alpha']/posterior['prior_beta']:.2e} per visit")

print(f"\n📊 Data: {NPS_DATA['cold_cases_total']} cases over {NPS_DATA['total_visits']:.2e} visits")

print(f"\n📈 Posterior: Gamma(α={posterior['post_alpha']:.0f}, β={posterior['post_beta']:.2e})")
print(f"   Posterior mean rate: {posterior['post_mean']:.3e} per visit")
print(f"   Posterior SD: {posterior['post_sd']:.3e}")
print(f"   95% Credible Interval: [{posterior['ci_95'][0]:.3e}, {posterior['ci_95'][1]:.3e}]")

print(f"\n📅 Annual Prediction (NPS only, {NPS_DATA['annual_visits_millions']}M visits/yr):")
print(f"   Expected cases: {annual_expected:.2f} per year")
print(f"   95% CI: [{annual_ci_lower:.2f}, {annual_ci_upper:.2f}]")

print(f"\n⚡ Translation: About 1 permanent disappearance every {1/annual_expected:.1f} years")

============================================================
🎲 BAYESIAN MODEL 1: Poisson-Gamma Conjugate Analysis
============================================================

📌 Prior: Gamma(α=1, β=1e+09)
   Prior mean rate: 1.00e-09 per visit

📊 Data: 30 cases over 1.87e+10 visits

📈 Posterior: Gamma(α=31, β=1.97e+10)
   Posterior mean rate: 1.572e-09 per visit
   Posterior SD: 2.823e-10
   95% Credible Interval: [1.068e-09, 2.172e-09]

📅 Annual Prediction (NPS only, 312M visits/yr):
   Expected cases: 0.49 per year
   95% CI: [0.33, 0.68]

⚡ Translation: About 1 permanent disappearance every 2.0 years

# =============================================================================
# BAYESIAN MODEL 2: Posterior Predictive Analysis
# =============================================================================

def posterior_predictive(posterior_params, exposure, n_samples=100000):
    """
    Generate posterior predictive distribution.

    Args:
        posterior_params: Dictionary from poisson_gamma_posterior
        exposure: New exposure to predict for
        n_samples: Number of Monte Carlo samples

    Returns:
        Array of predicted case counts
    """
    # Sample rates from posterior
    lambda_samples = stats.gamma.rvs(
        posterior_params['post_alpha'],
        scale=1/posterior_params['post_beta'],
        size=n_samples
    )

    # Sample counts from Poisson with those rates
    predicted_counts = stats.poisson.rvs(lambda_samples * exposure)

    return predicted_counts

# Generate predictions for ALL public lands over 60 years
predicted_counts = posterior_predictive(posterior, ALL_LANDS['total_visits_60yr'])

# Calculate probability of observing M411's claimed count
p_at_least_m411 = np.mean(predicted_counts >= M411_CLAIMS['total_cases'])

print("="*60)
print("🔮 BAYESIAN MODEL 2: Posterior Predictive Analysis")
print("="*60)
print(f"\n📊 Predicting for: ALL public lands, 60 years")
print(f"   Total exposure: {ALL_LANDS['total_visits_60yr']:.2e} visits")

print(f"\n📈 Posterior Predictive Distribution:")
print(f"   Mean: {np.mean(predicted_counts):.1f} cases")
print(f"   Median: {np.median(predicted_counts):.1f} cases")
print(f"   Std Dev: {np.std(predicted_counts):.1f}")
print(f"   95% Predictive Interval: [{np.percentile(predicted_counts, 2.5):.0f}, {np.percentile(predicted_counts, 97.5):.0f}]")

print(f"\n❓ Missing 411 claims: {M411_CLAIMS['total_cases']} cases")
print(f"   P(predicted ≥ {M411_CLAIMS['total_cases']} | model) = {p_at_least_m411:.2e}")

if p_at_least_m411 < 1e-10:
    print(f"\n🚨 INTERPRETATION: The M411 count is ASTRONOMICALLY unlikely!")
    print(f"   This probability is essentially ZERO.")
    print(f"   M411 claims are {M411_CLAIMS['total_cases']/np.mean(predicted_counts):.0f}x higher than expected.")

============================================================
🔮 BAYESIAN MODEL 2: Posterior Predictive Analysis
============================================================

📊 Predicting for: ALL public lands, 60 years
   Total exposure: 1.01e+11 visits

📈 Posterior Predictive Distribution:
   Mean: 159.0 cases
   Median: 157.0 cases
   Std Dev: 31.1
   95% Predictive Interval: [103, 225]

❓ Missing 411 claims: 1600 cases
   P(predicted ≥ 1600 | model) = 0.00e+00

🚨 INTERPRETATION: The M411 count is ASTRONOMICALLY unlikely!
   This probability is essentially ZERO.
   M411 claims are 10x higher than expected.

# =============================================================================
# COMPREHENSIVE VISUALIZATION
# =============================================================================

fig, axes = plt.subplots(2, 2, figsize=(14, 12))
fig.suptitle('Bayesian Analysis of Wilderness Disappearances', fontsize=16, fontweight='bold', y=1.02)

# -----------------------------------------------------------------------------
# Panel 1: Posterior Distribution of Rate
# -----------------------------------------------------------------------------
ax1 = axes[0, 0]
x = np.linspace(0, 5e-9, 1000)
y = stats.gamma.pdf(x, posterior['post_alpha'], scale=1/posterior['post_beta'])

ax1.plot(x * 1e9, y / 1e9, 'b-', linewidth=2.5)
ax1.fill_between(x * 1e9, y / 1e9, alpha=0.3, color='steelblue')
ax1.axvline(posterior['post_mean'] * 1e9, color='red', linestyle='--', linewidth=2,
            label=f'Posterior Mean: {posterior["post_mean"]*1e9:.2f}×10⁻⁹')

# Add CI shading
ci_mask = (x >= posterior['ci_95'][0]) & (x <= posterior['ci_95'][1])
ax1.fill_between(x[ci_mask] * 1e9, y[ci_mask] / 1e9, alpha=0.5, color='green',
                 label='95% Credible Interval')

ax1.set_xlabel('Disappearance Rate (×10⁻⁹ per visit)', fontsize=11)
ax1.set_ylabel('Probability Density', fontsize=11)
ax1.set_title('Posterior Distribution of Disappearance Rate', fontsize=12, fontweight='bold')
ax1.legend(loc='upper right')

# -----------------------------------------------------------------------------
# Panel 2: Posterior Predictive Distribution
# -----------------------------------------------------------------------------
ax2 = axes[0, 1]

# Histogram of predictions
ax2.hist(predicted_counts, bins=50, density=True, alpha=0.7, color='steelblue',
         edgecolor='navy', label='Posterior Predictive')

# Mark the expected value
ax2.axvline(np.mean(predicted_counts), color='green', linewidth=2.5,
            label=f'Expected: {np.mean(predicted_counts):.0f} cases')

# Mark M411 claims (will be off chart!)
ax2.axvline(M411_CLAIMS['total_cases'], color='red', linewidth=2.5, linestyle='--',
            label=f'M411 Claims: {M411_CLAIMS["total_cases"]} (off chart!)')

ax2.set_xlabel('Number of Cases (60 years, all public lands)', fontsize=11)
ax2.set_ylabel('Probability Density', fontsize=11)
ax2.set_title('Posterior Predictive Distribution', fontsize=12, fontweight='bold')
ax2.set_xlim(0, 400)
ax2.legend(loc='upper right')

# Add annotation about M411
ax2.annotate(f'M411 claims {M411_CLAIMS["total_cases"]} cases\n→ Way off chart!\nP ≈ 0',
             xy=(350, 0.01), fontsize=10, color='red',
             bbox=dict(boxstyle='round', facecolor='lightyellow', alpha=0.8))

# -----------------------------------------------------------------------------
# Panel 3: Annual Case Uncertainty
# -----------------------------------------------------------------------------
ax3 = axes[1, 0]

# Sample annual cases
rate_samples = stats.gamma.rvs(posterior['post_alpha'], scale=1/posterior['post_beta'], size=50000)
annual_samples = rate_samples * annual_visits

ax3.hist(annual_samples, bins=50, density=True, alpha=0.7, color='forestgreen',
         edgecolor='darkgreen')
ax3.axvline(np.mean(annual_samples), color='red', linewidth=2.5,
            label=f'Mean: {np.mean(annual_samples):.2f}/year')
ax3.axvline(np.percentile(annual_samples, 2.5), color='orange', linestyle='--', linewidth=2,
            label=f'95% CI: [{np.percentile(annual_samples, 2.5):.2f}, {np.percentile(annual_samples, 97.5):.2f}]')
ax3.axvline(np.percentile(annual_samples, 97.5), color='orange', linestyle='--', linewidth=2)

ax3.set_xlabel('Expected Annual Cases (NPS only)', fontsize=11)
ax3.set_ylabel('Probability Density', fontsize=11)
ax3.set_title('Uncertainty in Annual Prediction', fontsize=12, fontweight='bold')
ax3.legend(loc='upper right')

# -----------------------------------------------------------------------------
# Panel 4: Prior vs Posterior (Bayesian Learning)
# -----------------------------------------------------------------------------
ax4 = axes[1, 1]

# Extended x range to show M411 implied rate
x_wide = np.linspace(0, 2e-8, 1000)

# Prior
y_prior = stats.gamma.pdf(x_wide, posterior['prior_alpha'], scale=1/posterior['prior_beta'])
ax4.plot(x_wide * 1e9, y_prior / 1e9, 'gray', linewidth=2, linestyle='--',
         label='Prior (weakly informative)', alpha=0.7)

# Posterior
y_post = stats.gamma.pdf(x_wide, posterior['post_alpha'], scale=1/posterior['post_beta'])
ax4.plot(x_wide * 1e9, y_post / 1e9, 'b-', linewidth=2.5, label='Posterior (after NPS data)')
ax4.fill_between(x_wide * 1e9, y_post / 1e9, alpha=0.3, color='steelblue')

# M411 implied rate
m411_rate = M411_CLAIMS['annual_rate'] / (ALL_LANDS['annual_visits_millions'] * 1e6)
ax4.axvline(m411_rate * 1e9, color='red', linewidth=2.5, linestyle=':',
            label=f'M411 Implied Rate: {m411_rate*1e9:.1f}×10⁻⁹')

ax4.set_xlabel('Disappearance Rate (×10⁻⁹ per visit)', fontsize=11)
ax4.set_ylabel('Probability Density', fontsize=11)
ax4.set_title('Prior vs Posterior: Bayesian Learning', fontsize=12, fontweight='bold')
ax4.legend(loc='upper right')
ax4.set_xlim(0, 20)

plt.tight_layout()
plt.savefig('missing411_bayesian_analysis.png', dpi=150, bbox_inches='tight',
            facecolor='white', edgecolor='none')
plt.show()

print("\n✓ Figure saved as 'missing411_bayesian_analysis.png'")

✓ Figure saved as 'missing411_bayesian_analysis.png'

# =============================================================================
# PROBABILITY COMPARISON CHART
# =============================================================================

fig, ax = plt.subplots(figsize=(12, 8))

# Calculate various probabilities for comparison
probabilities = [
    ('Rolling double 6s', 1/36, '#3498db'),
    ('Coin flip: 10 heads in row', 1/1024, '#2ecc71'),
    ('Winning $100 scratch ticket', 1/10000, '#9b59b6'),
    ('Royal flush in poker', 1/649740, '#e74c3c'),
    ('Lightning strike (annual)', 1/500000, '#f39c12'),
    ('Death in NPS park (per visit)', 1/2.6e6, '#1abc9c'),
    ('Shark attack (annual)', 1/3.75e6, '#34495e'),
    ('Permanent disappearance (per visit)', posterior['post_mean'], '#c0392b'),
    ('P(M411 claims | our model)', max(p_at_least_m411, 1e-300), '#8e44ad'),
]

# Sort by probability (descending)
probabilities = sorted(probabilities, key=lambda x: x[1], reverse=True)

names = [p[0] for p in probabilities]
probs = [p[1] for p in probabilities]
colors = [p[2] for p in probabilities]

# Create horizontal bar chart
y_pos = np.arange(len(names))
bars = ax.barh(y_pos, probs, color=colors, edgecolor='black', linewidth=0.5)

ax.set_yticks(y_pos)
ax.set_yticklabels(names, fontsize=11)
ax.set_xscale('log')
ax.set_xlabel('Probability (log scale)', fontsize=12)
ax.set_title('Probability Comparison: Where Does Missing 411 Fall?',
             fontsize=14, fontweight='bold')

# Add probability labels
for bar, prob, name in zip(bars, probs, names):
    if prob > 1e-15:
        label = f'{prob:.2e}'
    else:
        label = '≈ 0'
    ax.text(max(prob * 2, 1e-280), bar.get_y() + bar.get_height()/2,
           label, va='center', fontsize=9, fontweight='bold')

# Add a reference line
ax.axvline(1e-9, color='gray', linestyle=':', alpha=0.5, label='1 in a billion')

ax.set_xlim(1e-310, 1)
plt.tight_layout()
plt.savefig('missing411_probability_scale.png', dpi=150, bbox_inches='tight',
            facecolor='white', edgecolor='none')
plt.show()

print("\n✓ Figure saved as 'missing411_probability_scale.png'")

✓ Figure saved as 'missing411_probability_scale.png'

# =============================================================================
# HIERARCHICAL PARK-LEVEL ANALYSIS
# =============================================================================

# Park-level data (SAR incidents as risk proxy)
parks = pd.DataFrame({
    'park': ['Grand Canyon', 'Yosemite', 'Yellowstone', 'Rocky Mountain',
             'Sequoia/Kings', 'Zion', 'Glacier', 'Olympic', 'Other Parks'],
    'sar_incidents_3yr': [785, 732, 371, 341, 503, 200, 180, 150, 1500],
    'cold_cases': [4, 5, 0, 2, 3, 1, 2, 1, 12],
    'annual_visits_millions': [6.4, 4.4, 4.9, 4.7, 2.0, 4.5, 3.1, 3.2, 279]
})

# Calculate rates
parks['visits_60yr'] = parks['annual_visits_millions'] * 60 * 1e6
parks['rate'] = parks['cold_cases'] / parks['visits_60yr']
parks['rate_per_billion'] = parks['rate'] * 1e9

# Handle zeros for log calculations
parks['rate_display'] = parks['rate'].replace(0, np.nan)

print("="*70)
print("🏔️ HIERARCHICAL PARK-LEVEL ANALYSIS")
print("="*70)
print("\n📊 Park-Level Data:")
print(parks[['park', 'cold_cases', 'annual_visits_millions', 'rate_per_billion']].to_string(index=False))

# Calculate variation
valid_rates = parks[parks['cold_cases'] > 0]['rate_per_billion']
print(f"\n📈 Inter-Park Variation (excluding zeros):")
print(f"   Min rate: {valid_rates.min():.2f}×10⁻⁹ per visit")
print(f"   Max rate: {valid_rates.max():.2f}×10⁻⁹ per visit")
print(f"   Ratio (max/min): {valid_rates.max()/valid_rates.min():.1f}x")
print(f"   Mean rate: {valid_rates.mean():.2f}×10⁻⁹ per visit")

# Visualize
fig, ax = plt.subplots(figsize=(12, 6))

colors = plt.cm.RdYlGn_r(np.linspace(0.2, 0.8, len(parks)))
bars = ax.barh(parks['park'], parks['rate_per_billion'], color=colors, edgecolor='black')

ax.axvline(posterior['post_mean'] * 1e9, color='blue', linestyle='--', linewidth=2,
           label=f'Overall Posterior Mean: {posterior["post_mean"]*1e9:.2f}')

ax.set_xlabel('Disappearance Rate (×10⁻⁹ per visit)', fontsize=11)
ax.set_title('Park-Level Disappearance Rates', fontsize=14, fontweight='bold')
ax.legend(loc='lower right')

# Add value labels
for bar, val, cases in zip(bars, parks['rate_per_billion'], parks['cold_cases']):
    if val > 0:
        ax.text(val + 0.3, bar.get_y() + bar.get_height()/2,
               f'{val:.1f} ({cases} cases)', va='center', fontsize=9)
    else:
        ax.text(0.3, bar.get_y() + bar.get_height()/2,
               f'0 cases', va='center', fontsize=9)

plt.tight_layout()
plt.savefig('missing411_park_rates.png', dpi=150, bbox_inches='tight',
            facecolor='white', edgecolor='none')
plt.show()

print("\n✓ Figure saved as 'missing411_park_rates.png'")
print("\n💡 KEY INSIGHT: Even the 'riskiest' parks (Sequoia) have rates <25×10⁻⁹")
print("   This is still incredibly rare - about 1 in 40 million visits.")

======================================================================
🏔️ HIERARCHICAL PARK-LEVEL ANALYSIS
======================================================================

📊 Park-Level Data:
          park  cold_cases  annual_visits_millions  rate_per_billion
  Grand Canyon           4                     6.4         10.416667
      Yosemite           5                     4.4         18.939394
   Yellowstone           0                     4.9          0.000000
Rocky Mountain           2                     4.7          7.092199
 Sequoia/Kings           3                     2.0         25.000000
          Zion           1                     4.5          3.703704
       Glacier           2                     3.1         10.752688
       Olympic           1                     3.2          5.208333
   Other Parks          12                   279.0          0.716846

📈 Inter-Park Variation (excluding zeros):
   Min rate: 0.72×10⁻⁹ per visit
   Max rate: 25.00×10⁻⁹ per visit
   Ratio (max/min): 34.9x
   Mean rate: 10.23×10⁻⁹ per visit

✓ Figure saved as 'missing411_park_rates.png'

💡 KEY INSIGHT: Even the 'riskiest' parks (Sequoia) have rates <25×10⁻⁹
   This is still incredibly rare - about 1 in 40 million visits.

# =============================================================================
# EXPECTED VS CLAIMED: THE FINAL VISUALIZATION
# =============================================================================

fig, ax = plt.subplots(figsize=(10, 7))

categories = ['Expected\n(Bayesian Model)', 'Actual NPS\nCold Cases', 'Missing 411\nClaims']
values = [np.mean(predicted_counts) / (60/60),  # Normalize to 60 years of ALL lands
          NPS_DATA['cold_cases_total'] * (ALL_LANDS['annual_visits_millions'] / NPS_DATA['annual_visits_millions']),  # Scale up
          M411_CLAIMS['total_cases']]

# Simpler: just show the direct comparison
categories = ['Bayesian\nExpected\n(All Lands, 60yr)',
              'NPS Cold Cases\n(Actual, 60yr)',
              'Missing 411\nClaims']
values = [np.mean(predicted_counts),
          NPS_DATA['cold_cases_total'],
          M411_CLAIMS['total_cases']]
colors = ['#3498db', '#2ecc71', '#e74c3c']

bars = ax.bar(categories, values, color=colors, edgecolor='black', linewidth=2)

ax.set_ylabel('Total Cases', fontsize=12)
ax.set_title('Wilderness Disappearances: Expected vs. Actual vs. Claimed',
             fontsize=14, fontweight='bold')

# Add value labels
for bar, val in zip(bars, values):
    ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 30,
           f'{val:.0f}', ha='center', va='bottom', fontsize=14, fontweight='bold')

# Add annotations
ax.annotate('Model prediction\nfor ALL public lands', xy=(0, 159), xytext=(-0.3, 400),
           fontsize=9, ha='center',
           arrowprops=dict(arrowstyle='->', color='gray'))

ax.annotate('NPS only\n(best data quality)', xy=(1, 30), xytext=(0.7, 300),
           fontsize=9, ha='center',
           arrowprops=dict(arrowstyle='->', color='gray'))

ax.annotate('10x higher\nthan expected!', xy=(2, 1600), xytext=(2.3, 1200),
           fontsize=10, ha='center', color='red', fontweight='bold',
           arrowprops=dict(arrowstyle='->', color='red'))

ax.set_ylim(0, 1800)

plt.tight_layout()
plt.savefig('missing411_comparison.png', dpi=150, bbox_inches='tight',
            facecolor='white', edgecolor='none')
plt.show()

print("\n✓ Figure saved as 'missing411_comparison.png'")

✓ Figure saved as 'missing411_comparison.png'

# =============================================================================
# FINAL SUMMARY
# =============================================================================

print("\n" + "="*70)
print("📋 BAYESIAN ANALYSIS SUMMARY")
print("="*70)

print("\n🎯 KEY FINDINGS:\n")

print("1️⃣  RATE ESTIMATE")
print(f"    Posterior mean: {posterior['post_mean']:.3e} per visit")
print(f"    95% Credible Interval: [{posterior['ci_95'][0]:.3e}, {posterior['ci_95'][1]:.3e}]")
print(f"    Translation: About 1 in {1/posterior['post_mean']:,.0f} visits")

print("\n2️⃣  ANNUAL PREDICTION (NPS, 312M visits)")
print(f"    Expected: {annual_expected:.2f} cases/year")
print(f"    95% CI: [{annual_ci_lower:.2f}, {annual_ci_upper:.2f}]")

print("\n3️⃣  60-YEAR PREDICTION (All Public Lands, 1.68B visits/yr)")
print(f"    Expected: {np.mean(predicted_counts):.0f} cases")
print(f"    95% Predictive Interval: [{np.percentile(predicted_counts, 2.5):.0f}, {np.percentile(predicted_counts, 97.5):.0f}]")

print("\n4️⃣  MISSING 411 PLAUSIBILITY")
print(f"    M411 claims: {M411_CLAIMS['total_cases']} cases")
print(f"    P(≥{M411_CLAIMS['total_cases']} | model) ≈ 0")
print(f"    Ratio (M411 / Expected): {M411_CLAIMS['total_cases']/np.mean(predicted_counts):.1f}x")

print("\n" + "="*70)
print("🔬 CONCLUSION")
print("="*70)
print("""
The Bayesian analysis provides strong quantitative evidence that:

  ✓ The NPS cold case count (~30) is EXACTLY what the model predicts
  ✓ Missing 411's 1,600 cases would require a rate ~10x higher than observed
  ✓ The probability of M411 being accurate (given NPS data) is essentially ZERO
  ✓ Park-to-park variation exists but stays within the same order of magnitude

The "mystery" of Missing 411 is not in the data—it's in the storytelling.
When you properly account for the MASSIVE denominator (billions of visits),
the rare tragedies become statistically expected, not mysterious.

THE WILDERNESS IS VAST. SOME PEOPLE ARE NEVER FOUND.
THAT'S STATISTICS, NOT MYSTERY. 🏔️
""")

======================================================================
📋 BAYESIAN ANALYSIS SUMMARY
======================================================================

🎯 KEY FINDINGS:

1️⃣  RATE ESTIMATE
    Posterior mean: 1.572e-09 per visit
    95% Credible Interval: [1.068e-09, 2.172e-09]
    Translation: About 1 in 636,129,032 visits

2️⃣  ANNUAL PREDICTION (NPS, 312M visits)
    Expected: 0.49 cases/year
    95% CI: [0.33, 0.68]

3️⃣  60-YEAR PREDICTION (All Public Lands, 1.68B visits/yr)
    Expected: 159 cases
    95% Predictive Interval: [103, 225]

4️⃣  MISSING 411 PLAUSIBILITY
    M411 claims: 1600 cases
    P(≥1600 | model) ≈ 0
    Ratio (M411 / Expected): 10.1x

======================================================================
🔬 CONCLUSION
======================================================================

The Bayesian analysis provides strong quantitative evidence that:

  ✓ The NPS cold case count (~30) is EXACTLY what the model predicts
  ✓ Missing 411's 1,600 cases would require a rate ~10x higher than observed
  ✓ The probability of M411 being accurate (given NPS data) is essentially ZERO
  ✓ Park-to-park variation exists but stays within the same order of magnitude

The "mystery" of Missing 411 is not in the data—it's in the storytelling.
When you properly account for the MASSIVE denominator (billions of visits),
the rare tragedies become statistically expected, not mysterious.

THE WILDERNESS IS VAST. SOME PEOPLE ARE NEVER FOUND.
THAT'S STATISTICS, NOT MYSTERY. 🏔️

🏔️ Missing 411 Bayesian Analysis¶

Are Wilderness Disappearances Statistically Anomalous?¶

🏔️ Missing 411 Bayesian Analysis¶

Are Wilderness Disappearances Statistically Anomalous?¶

The Claim¶

The Question¶

The Data¶

The Analysis¶

The "Underreporting" Defense¶

The Bottom Line¶

📊 The Data¶

🎲 Bayesian Model 1: Poisson-Gamma Conjugate Analysis¶

🔮 Bayesian Model 2: Posterior Predictive Analysis¶

📊 Visualization: The Evidence¶

📉 The Probability Scale: Just How Unlikely is M411?¶

🏔️ Hierarchical Analysis: Do Some Parks Have Higher Risk?¶

📊 Expected vs Claimed: The Final Comparison¶

📋 Summary Statistics¶

📚 Data Sources¶