import numpy as np
import pandas as pd
import sys
sys.path.append("../src")
import Q_Sea_Battle as qsb 
# Core QSeaBattle imports.
# Adjust these imports if your package layout is different.
from Q_Sea_Battle import GameLayout
from Q_Sea_Battle import GameEnv
from Q_Sea_Battle import Tournament
from Q_Sea_Battle import MajorityPlayers, NeuralNetPlayers

from Q_Sea_Battle import neural_net_imitation_utilities as imitation_utils

print("qsb version loaded. Available symbols:", dir(qsb)[:20])

# Debug-friendly settings — use only during development.
import tensorflow as tf
tf.config.run_functions_eagerly(True)
tf.data.experimental.enable_debug_mode()

print("Debug mode enabled: eager execution + eager tf.data")
# Uncomment these lines only for debugging. For large runs or sweeps, disable to restore full performance.

qsb version loaded. Available symbols: ['AssistedPlayers', 'Game', 'GameEnv', 'GameLayout', 'MajorityPlayers', 'NeuralNetPlayerA', 'NeuralNetPlayerB', 'NeuralNetPlayers', 'PlayerA', 'PlayerB', 'Players', 'SharedRandomness', 'SimplePlayers', 'Tournament', 'TournamentLog', 'TrainableAssistedPlayers', '__all__', '__builtins__', '__cached__', '__doc__']
Debug mode enabled: eager execution + eager tf.data

def inspect_imitation_datasets(layout, dataset_a, dataset_b, sample_size=50_000):
    """
    Inspect dataset_a (field -> comm) and dataset_b (gun+comm -> shoot)
    to confirm that the imitation data matches the intended distribution
    and the MajorityPlayers logic.

    layout: GameLayout
    dataset_a: DataFrame with columns ["field", "comm", ...]
    dataset_b: DataFrame with columns ["field", "gun", "comm", "shoot", ...]
    sample_size: number of rows to subsample for stats (for speed).
    """
    import numpy as np
    import pandas as pd
    import Q_Sea_Battle as qsb

    print("=== Imitation Dataset Inspection ===")

    # -----------------------------
    # Subsample for speed
    # -----------------------------
    if len(dataset_a) > sample_size:
        da = dataset_a.sample(sample_size, random_state=0)
    else:
        da = dataset_a

    if len(dataset_b) > sample_size:
        db = dataset_b.sample(sample_size, random_state=0)
    else:
        db = dataset_b

    n2 = layout.field_size ** 2
    m  = layout.comms_size

    # -----------------------------
    # A1 — Field distribution p_one
    # -----------------------------
    fields = np.stack(da["field"].to_numpy(), axis=0).astype(float)
    p_emp = fields.mean()
    print(f" A1: Field mean p_one (empirical): {p_emp:.4f}")

    # -----------------------------
    # A2 — Comm bit frequencies
    # -----------------------------
    comms_a = np.stack(da["comm"].to_numpy(), axis=0)
    print(" A2: Comm bit frequencies (dataset A):")
    for j in range(m):
        print(f"  bit {j}: mean={comms_a[:, j].mean():.4f}")

    # -----------------------------
    # B1 — Gun index uniformity
    # -----------------------------
    guns_b = np.stack(db["gun"].to_numpy(), axis=0)
    gun_idx = guns_b.argmax(axis=1)
    print(" B1: Gun index statistics (dataset B):")
    print(f"  min idx={gun_idx.min()}, max idx={gun_idx.max()}")
    print(f"  approx std(index) = {gun_idx.std():.1f} (uniform-ish if large)")

    # -----------------------------
    # B2 — Shoot distribution
    # -----------------------------
    shoots = np.array(db["shoot"].to_numpy(), dtype=float)
    print(f" B2: Shoot distribution: mean(shoot)={shoots.mean():.4f}")

    # -----------------------------
    # C1 — Cross-check B vs MajorityPlayers
    # -----------------------------
    print(" C1: Cross-check dataset B vs MajorityPlayers.playerB ...")

    maj = qsb.MajorityPlayers(layout)
    player_a_maj, player_b_maj = maj.players()

    subset_b = db.sample(min(2000, len(db)), random_state=1)

    mismatches_b = 0
    for _, row in subset_b.iterrows():
        gun   = row["gun"]
        comm  = row["comm"]
        shoot_ds = int(row["shoot"])

        shoot_maj = int(player_b_maj.decide(gun=gun, comm=comm))
        if shoot_ds != shoot_maj:
            mismatches_b += 1

    mismatch_rate_b = mismatches_b / len(subset_b)
    print(f"  Majority vs Dataset-B shoot mismatch rate: {mismatch_rate_b:.4f}")

    # -----------------------------
    # C2 — Cross-check A vs MajorityPlayers
    # -----------------------------
    print(" C2: Cross-check dataset A vs MajorityPlayers.playerA ...")

    subset_a = da.sample(min(2000, len(da)), random_state=2)

    mismatches_a = 0
    for _, row in subset_a.iterrows():
        field = row["field"]
        comm  = row["comm"]

        comm_maj = player_a_maj.decide(field)
        if not np.array_equal(comm, comm_maj):
            mismatches_a += 1

    mismatch_rate_a = mismatches_a / len(subset_a)
    print(f"  Majority vs Dataset-A comm mismatch rate: {mismatch_rate_a:.4f}")

    print(" === Inspection Completed ===")

# ----------------------------
# Global configuration
# ----------------------------

FIELD_SIZES = [64,32,16,8,4]
COMMS_SIZES = [8,4,2,1]

# Number of synthetic samples per imitation batch.
# We keep the *total* number of synthetic samples roughly comparable to before,
# but avoid a single huge DataFrame in memory.
NUM_SAMPLES_A = 10_000   # for model_a (field -> comm), per batch
NUM_SAMPLES_B = 10_000   # for model_b (gun + comm -> shoot), per batch

# How many independent imitation batches to generate and train on.
NUM_IM_BATCHES_A = 25
NUM_IM_BATCHES_B = 25

# Training hyper-parameters for imitation
TRAINING_SETTINGS_A = {
    "epochs": 25,
    "batch_size": 256,
    "learning_rate": 1e-3,
    "verbose": 0,
    "use_sample_weight": False,
}

TRAINING_SETTINGS_B = {
    "epochs": 25,
    "batch_size": 256,
    "learning_rate": 1e-3,
    "verbose": 0,
    "use_sample_weight": False,
}

# Tournament configuration
NUM_GAMES_TOURNAMENT = 5_000

# Base seed for reproducibility
BASE_SEED = 12345

 
def run_single_experiment(field_size: int, comms_size: int, seed: int = 0): 
    """ 
    Run imitation-training and tournaments for a single (field_size, comms_size). 
 
    Returns a dict with summary statistics. 
    """ 
    n2 = field_size ** 2 
    if comms_size > n2: 
        raise ValueError("comms_size cannot exceed field_size**2 in this setup.") 
 
    # 1) Build layout and environment 
    layout = GameLayout( 
        field_size=field_size, 
        comms_size=comms_size, 
        number_of_games_in_tournament=NUM_GAMES_TOURNAMENT, 
    ) 
 
    game_env = GameEnv(layout) 
 
    # 2) Evaluate MajorityPlayers teacher 
    majority_players = MajorityPlayers(layout) 
    tournament_teacher = Tournament(game_env, majority_players, layout) 
    log_teacher = tournament_teacher.tournament() 
    maj_mean, maj_stderr = log_teacher.outcome() 
 
    # 3) Build NeuralNetPlayers student 
    nn_players = NeuralNetPlayers(layout) 
 
    # 4) Imitation training with on-the-fly batch generation 
    # 
    # Instead of generating one *huge* imitation dataset and training on it multiple 
    # epochs, we now: 
    #   - generate a fresh synthetic imitation batch for each step, and 
    #   - run a short training phase on that batch. 
    # 
    # This keeps the peak memory usage bounded by NUM_SAMPLES_A / NUM_SAMPLES_B, 
    # while the *total* number of synthetic samples seen during training is 
    # NUM_IM_BATCHES_* × NUM_SAMPLES_*. 
 
    # 4a) Train model_a (field -> comm) via imitation on multiple small batches 
    for k in range(NUM_IM_BATCHES_A): 
        dataset_a, _ = imitation_utils.generate_majority_imitation_datasets( 
            layout=layout, 
            num_samples_a=NUM_SAMPLES_A, 
            num_samples_b=NUM_SAMPLES_B, 
            seed=BASE_SEED + k, 
        ) 
        nn_players.train_model_a(dataset_a, TRAINING_SETTINGS_A) 
 
    # 4b) Train model_b (gun + comm -> shoot) via imitation on multiple small batches 
    for k in range(NUM_IM_BATCHES_B): 
        _, dataset_b = imitation_utils.generate_majority_imitation_datasets( 
            layout=layout, 
            num_samples_a=NUM_SAMPLES_A, 
            num_samples_b=NUM_SAMPLES_B, 
            seed=BASE_SEED + 100 + k, 
        ) 
        nn_players.train_model_b(dataset_b, TRAINING_SETTINGS_B) 
 
    # 5) Evaluate trained NeuralNetPlayers in a fresh tournament 
    #    (re-use the same layout but reset the environment) 
    game_env_nn = GameEnv(layout) 
    tournament_student = Tournament(game_env_nn, nn_players, layout) 
    log_student = tournament_student.tournament() 
    nn_mean, nn_stderr = log_student.outcome() 
 
    return { 
        "field_size": field_size, 
        "comms_size": comms_size, 
        "maj_mean": maj_mean, 
        "maj_stderr": maj_stderr, 
        "nn_mean": nn_mean, 
        "nn_stderr": nn_stderr, 
    }

results = []

for i, field_size in enumerate(FIELD_SIZES):
    for comms_size in COMMS_SIZES:
        # Only run configurations where comms_size is not trivially impossible
        if comms_size > field_size ** 2:
            continue

        print(f"\n=== Running experiment: field_size={field_size}, comms_size={comms_size}, sample_size={NUM_SAMPLES_A} ===")
        summary = run_single_experiment(field_size, comms_size, seed=i)
        results.append(summary)
        print(
            f"Majority: mean={summary['maj_mean']:.4f}, stderr={summary['maj_stderr']:.4f} | "
            f"Neural: mean={summary['nn_mean']:.4f}, stderr={summary['nn_stderr']:.4f}"
        )

results_df = pd.DataFrame(results)
results_df = results_df.sort_values(["field_size", "comms_size"]).reset_index(drop=True)

results_df

=== Running experiment: field_size=64, comms_size=8, sample_size=10000 ===
Majority: mean=0.5122, stderr=0.0071 | Neural: mean=0.5172, stderr=0.0071

=== Running experiment: field_size=64, comms_size=4, sample_size=10000 ===
Majority: mean=0.5114, stderr=0.0071 | Neural: mean=0.5160, stderr=0.0071

=== Running experiment: field_size=64, comms_size=2, sample_size=10000 ===
Majority: mean=0.5022, stderr=0.0071 | Neural: mean=0.5066, stderr=0.0071

=== Running experiment: field_size=64, comms_size=1, sample_size=10000 ===
Majority: mean=0.5078, stderr=0.0071 | Neural: mean=0.5088, stderr=0.0071

=== Running experiment: field_size=32, comms_size=8, sample_size=10000 ===
Majority: mean=0.5350, stderr=0.0071 | Neural: mean=0.5318, stderr=0.0071

=== Running experiment: field_size=32, comms_size=4, sample_size=10000 ===
Majority: mean=0.5272, stderr=0.0071 | Neural: mean=0.5364, stderr=0.0071

=== Running experiment: field_size=32, comms_size=2, sample_size=10000 ===
Majority: mean=0.5068, stderr=0.0071 | Neural: mean=0.5214, stderr=0.0071

=== Running experiment: field_size=32, comms_size=1, sample_size=10000 ===
Majority: mean=0.5170, stderr=0.0071 | Neural: mean=0.5160, stderr=0.0071

=== Running experiment: field_size=16, comms_size=8, sample_size=10000 ===
Majority: mean=0.5708, stderr=0.0070 | Neural: mean=0.5524, stderr=0.0070

=== Running experiment: field_size=16, comms_size=4, sample_size=10000 ===
Majority: mean=0.5478, stderr=0.0070 | Neural: mean=0.5550, stderr=0.0070

=== Running experiment: field_size=16, comms_size=2, sample_size=10000 ===
Majority: mean=0.5374, stderr=0.0071 | Neural: mean=0.5414, stderr=0.0070

=== Running experiment: field_size=16, comms_size=1, sample_size=10000 ===
Majority: mean=0.5198, stderr=0.0071 | Neural: mean=0.5332, stderr=0.0071

=== Running experiment: field_size=8, comms_size=8, sample_size=10000 ===
Majority: mean=0.6436, stderr=0.0068 | Neural: mean=0.6284, stderr=0.0068

=== Running experiment: field_size=8, comms_size=4, sample_size=10000 ===
Majority: mean=0.6018, stderr=0.0069 | Neural: mean=0.5884, stderr=0.0070

=== Running experiment: field_size=8, comms_size=2, sample_size=10000 ===
Majority: mean=0.5716, stderr=0.0070 | Neural: mean=0.5710, stderr=0.0070

=== Running experiment: field_size=8, comms_size=1, sample_size=10000 ===
Majority: mean=0.5578, stderr=0.0070 | Neural: mean=0.5568, stderr=0.0070

=== Running experiment: field_size=4, comms_size=8, sample_size=10000 ===
Majority: mean=0.7508, stderr=0.0061 | Neural: mean=0.7526, stderr=0.0061

=== Running experiment: field_size=4, comms_size=4, sample_size=10000 ===
Majority: mean=0.6816, stderr=0.0066 | Neural: mean=0.6852, stderr=0.0066

=== Running experiment: field_size=4, comms_size=2, sample_size=10000 ===
Majority: mean=0.6300, stderr=0.0068 | Neural: mean=0.6290, stderr=0.0068

=== Running experiment: field_size=4, comms_size=1, sample_size=10000 ===
Majority: mean=0.5968, stderr=0.0069 | Neural: mean=0.5924, stderr=0.0069

from Q_Sea_Battle.reference_performance_utilities import (
    expected_win_rate_majority,
    limit_from_mutual_information,
)

# Add analytic majority prediction (optional)
layout = GameLayout()
results_df["maj_theory"] = results_df.apply(
    lambda row: expected_win_rate_majority(
        field_size=int(row["field_size"]),
        comms_size=int(row["comms_size"]),
        enemy_probability=layout.enemy_probability,
        channel_noise=0.0,
    ),
    axis=1,
)

# Add information-theoretic limit (mutual information bound)
results_df["info_limit"] = results_df.apply(
    lambda row: limit_from_mutual_information(
        field_size=row["field_size"],
        comms_size=row["comms_size"],
        channel_noise=0.0,
    ),
    axis=1,
)

# Format nicely
table = results_df.copy()

table["maj"]         = table["maj_mean"].map(lambda x: f"{x:0.4f}")
table["nn"]          = table["nn_mean"].map(lambda x: f"{x:0.4f}")
table["maj_theory"]  = table["maj_theory"].map(lambda x: f"{x:0.4f}")
table["info_limit"]  = table["info_limit"].map(lambda x: f"{x:0.4f}")

display_cols = ["field_size", "comms_size", "maj", "nn", "maj_theory", "info_limit"]
display(table[display_cols])

Imitation training for `NeuralNetPlayers` and evaluation tournament sweep¶

Summary table¶

field_size	comms_size	maj	nn	maj_theory	info_limit
4	1	0.5968	0.5924	0.5982	0.6461
4	2	0.6300	0.6290	0.6367	0.7051
4	4	0.6816	0.6852	0.6875	0.7855
4	8	0.7508	0.7526	0.7500	0.8900
8	1	0.5578	0.5568	0.5497	0.5735
8	2	0.5716	0.5710	0.5700	0.6037
8	4	0.6018	0.5884	0.5982	0.6461
8	8	0.6436	0.6284	0.6367	0.7051
16	1	0.5198	0.5332	0.5249	0.5368
16	2	0.5374	0.5414	0.5352	0.5520
16	4	0.5478	0.5550	0.5497	0.5735
16	8	0.5708	0.5524	0.5700	0.6037
32	1	0.5170	0.5160	0.5125	0.5184
32	2	0.5068	0.5214	0.5176	0.5260
32	4	0.5272	0.5364	0.5249	0.5368
32	8	0.5350	0.5318	0.5352	0.5520
64	1	0.5078	0.5088	0.5062	0.5092
64	2	0.5022	0.5066	0.5088	0.5130
64	4	0.5114	0.5160	0.5125	0.5184

	field_size	comms_size	maj_mean	maj_stderr	nn_mean	nn_stderr
0	4	1	0.5968	0.006938	0.5924	0.006950
1	4	2	0.6300	0.006829	0.6290	0.006832
2	4	4	0.6816	0.006589	0.6852	0.006569
3	4	8	0.7508	0.006118	0.7526	0.006103
4	8	1	0.5578	0.007024	0.5568	0.007026
5	8	2	0.5716	0.006999	0.5710	0.007000
6	8	4	0.6018	0.006924	0.5884	0.006960
7	8	8	0.6436	0.006774	0.6284	0.006835
8	16	1	0.5198	0.007066	0.5332	0.007056
9	16	2	0.5374	0.007052	0.5414	0.007047
10	16	4	0.5478	0.007039	0.5550	0.007029
11	16	8	0.5708	0.007001	0.5524	0.007033
12	32	1	0.5170	0.007068	0.5160	0.007068
13	32	2	0.5068	0.007071	0.5214	0.007065
14	32	4	0.5272	0.007061	0.5364	0.007053
15	32	8	0.5350	0.007054	0.5318	0.007057
16	64	1	0.5078	0.007071	0.5088	0.007071
17	64	2	0.5022	0.007072	0.5066	0.007071
18	64	4	0.5114	0.007070	0.5160	0.007068
19	64	8	0.5122	0.007070	0.5172	0.007068

Imitation training for NeuralNetPlayers and evaluation tournament sweep¶

Summary table¶

Imitation training for `NeuralNetPlayers` and evaluation tournament sweep¶