Imitation training for NeuralNetPlayers and evaluation tournament sweep¶

This notebook:

  1. Loops over a grid of (field_size, comms_size) values.
  2. For each setting:
    • Builds a GameLayout and majority-teacher players.
    • Uses the imitation utilities to generate synthetic datasets for model_a and model_b, following the majority strategy.
    • Trains NeuralNetPlayers.model_a and NeuralNetPlayers.model_b separately via imitation.
    • Runs a tournament with MajorityPlayers and with NeuralNetPlayers.
  3. Collects all results in a pandas DataFrame and prints a compact summary table.

The notebook assumes that:

  • The qsb package is importable (or adjust the imports below to your local package layout).
  • The module neural_net_imitation_utils.py is available and implements the functions specified in the design document (especially generate_majority_imitation_datasets).
  1. The result is summarized in a table
field_size comms_size maj nn maj_theory info_limit
4 1 0.5968 0.5924 0.5982 0.6461
4 2 0.6300 0.6290 0.6367 0.7051
4 4 0.6816 0.6852 0.6875 0.7855
4 8 0.7508 0.7526 0.7500 0.8900
8 1 0.5578 0.5568 0.5497 0.5735
8 2 0.5716 0.5710 0.5700 0.6037
8 4 0.6018 0.5884 0.5982 0.6461
8 8 0.6436 0.6284 0.6367 0.7051
16 1 0.5198 0.5332 0.5249 0.5368
16 2 0.5374 0.5414 0.5352 0.5520
16 4 0.5478 0.5550 0.5497 0.5735
16 8 0.5708 0.5524 0.5700 0.6037
32 1 0.5170 0.5160 0.5125 0.5184
32 2 0.5068 0.5214 0.5176 0.5260
32 4 0.5272 0.5364 0.5249 0.5368
32 8 0.5350 0.5318 0.5352 0.5520
64 1 0.5078 0.5088 0.5062 0.5092
64 2 0.5022 0.5066 0.5088 0.5130
64 4 0.5114 0.5160 0.5125 0.5184
In [ ]:
import numpy as np
import pandas as pd
import sys
sys.path.append("../src")
import Q_Sea_Battle as qsb 
# Core QSeaBattle imports.
# Adjust these imports if your package layout is different.
from Q_Sea_Battle import GameLayout
from Q_Sea_Battle import GameEnv
from Q_Sea_Battle import Tournament
from Q_Sea_Battle import MajorityPlayers, NeuralNetPlayers

from Q_Sea_Battle import neural_net_imitation_utilities as imitation_utils

print("qsb version loaded. Available symbols:", dir(qsb)[:20])

# Debug-friendly settings — use only during development.
import tensorflow as tf
tf.config.run_functions_eagerly(True)
tf.data.experimental.enable_debug_mode()

print("Debug mode enabled: eager execution + eager tf.data")
# Uncomment these lines only for debugging. For large runs or sweeps, disable to restore full performance.
qsb version loaded. Available symbols: ['AssistedPlayers', 'Game', 'GameEnv', 'GameLayout', 'MajorityPlayers', 'NeuralNetPlayerA', 'NeuralNetPlayerB', 'NeuralNetPlayers', 'PlayerA', 'PlayerB', 'Players', 'SharedRandomness', 'SimplePlayers', 'Tournament', 'TournamentLog', 'TrainableAssistedPlayers', '__all__', '__builtins__', '__cached__', '__doc__']
Debug mode enabled: eager execution + eager tf.data
In [9]:
def inspect_imitation_datasets(layout, dataset_a, dataset_b, sample_size=50_000):
    """
    Inspect dataset_a (field -> comm) and dataset_b (gun+comm -> shoot)
    to confirm that the imitation data matches the intended distribution
    and the MajorityPlayers logic.

    layout: GameLayout
    dataset_a: DataFrame with columns ["field", "comm", ...]
    dataset_b: DataFrame with columns ["field", "gun", "comm", "shoot", ...]
    sample_size: number of rows to subsample for stats (for speed).
    """
    import numpy as np
    import pandas as pd
    import Q_Sea_Battle as qsb

    print("=== Imitation Dataset Inspection ===")

    # -----------------------------
    # Subsample for speed
    # -----------------------------
    if len(dataset_a) > sample_size:
        da = dataset_a.sample(sample_size, random_state=0)
    else:
        da = dataset_a

    if len(dataset_b) > sample_size:
        db = dataset_b.sample(sample_size, random_state=0)
    else:
        db = dataset_b

    n2 = layout.field_size ** 2
    m  = layout.comms_size

    # -----------------------------
    # A1 — Field distribution p_one
    # -----------------------------
    fields = np.stack(da["field"].to_numpy(), axis=0).astype(float)
    p_emp = fields.mean()
    print(f" A1: Field mean p_one (empirical): {p_emp:.4f}")

    # -----------------------------
    # A2 — Comm bit frequencies
    # -----------------------------
    comms_a = np.stack(da["comm"].to_numpy(), axis=0)
    print(" A2: Comm bit frequencies (dataset A):")
    for j in range(m):
        print(f"  bit {j}: mean={comms_a[:, j].mean():.4f}")

    # -----------------------------
    # B1 — Gun index uniformity
    # -----------------------------
    guns_b = np.stack(db["gun"].to_numpy(), axis=0)
    gun_idx = guns_b.argmax(axis=1)
    print(" B1: Gun index statistics (dataset B):")
    print(f"  min idx={gun_idx.min()}, max idx={gun_idx.max()}")
    print(f"  approx std(index) = {gun_idx.std():.1f} (uniform-ish if large)")

    # -----------------------------
    # B2 — Shoot distribution
    # -----------------------------
    shoots = np.array(db["shoot"].to_numpy(), dtype=float)
    print(f" B2: Shoot distribution: mean(shoot)={shoots.mean():.4f}")

    # -----------------------------
    # C1 — Cross-check B vs MajorityPlayers
    # -----------------------------
    print(" C1: Cross-check dataset B vs MajorityPlayers.playerB ...")

    maj = qsb.MajorityPlayers(layout)
    player_a_maj, player_b_maj = maj.players()

    subset_b = db.sample(min(2000, len(db)), random_state=1)

    mismatches_b = 0
    for _, row in subset_b.iterrows():
        gun   = row["gun"]
        comm  = row["comm"]
        shoot_ds = int(row["shoot"])

        shoot_maj = int(player_b_maj.decide(gun=gun, comm=comm))
        if shoot_ds != shoot_maj:
            mismatches_b += 1

    mismatch_rate_b = mismatches_b / len(subset_b)
    print(f"  Majority vs Dataset-B shoot mismatch rate: {mismatch_rate_b:.4f}")

    # -----------------------------
    # C2 — Cross-check A vs MajorityPlayers
    # -----------------------------
    print(" C2: Cross-check dataset A vs MajorityPlayers.playerA ...")

    subset_a = da.sample(min(2000, len(da)), random_state=2)

    mismatches_a = 0
    for _, row in subset_a.iterrows():
        field = row["field"]
        comm  = row["comm"]

        comm_maj = player_a_maj.decide(field)
        if not np.array_equal(comm, comm_maj):
            mismatches_a += 1

    mismatch_rate_a = mismatches_a / len(subset_a)
    print(f"  Majority vs Dataset-A comm mismatch rate: {mismatch_rate_a:.4f}")

    print(" === Inspection Completed ===")
In [10]:
# ----------------------------
# Global configuration
# ----------------------------

FIELD_SIZES = [64,32,16,8,4]
COMMS_SIZES = [8,4,2,1]

# Number of synthetic samples per imitation batch.
# We keep the *total* number of synthetic samples roughly comparable to before,
# but avoid a single huge DataFrame in memory.
NUM_SAMPLES_A = 10_000   # for model_a (field -> comm), per batch
NUM_SAMPLES_B = 10_000   # for model_b (gun + comm -> shoot), per batch

# How many independent imitation batches to generate and train on.
NUM_IM_BATCHES_A = 25
NUM_IM_BATCHES_B = 25

# Training hyper-parameters for imitation
TRAINING_SETTINGS_A = {
    "epochs": 25,
    "batch_size": 256,
    "learning_rate": 1e-3,
    "verbose": 0,
    "use_sample_weight": False,
}

TRAINING_SETTINGS_B = {
    "epochs": 25,
    "batch_size": 256,
    "learning_rate": 1e-3,
    "verbose": 0,
    "use_sample_weight": False,
}

# Tournament configuration
NUM_GAMES_TOURNAMENT = 5_000

# Base seed for reproducibility
BASE_SEED = 12345
In [11]:
 
def run_single_experiment(field_size: int, comms_size: int, seed: int = 0): 
    """ 
    Run imitation-training and tournaments for a single (field_size, comms_size). 
 
    Returns a dict with summary statistics. 
    """ 
    n2 = field_size ** 2 
    if comms_size > n2: 
        raise ValueError("comms_size cannot exceed field_size**2 in this setup.") 
 
    # 1) Build layout and environment 
    layout = GameLayout( 
        field_size=field_size, 
        comms_size=comms_size, 
        number_of_games_in_tournament=NUM_GAMES_TOURNAMENT, 
    ) 
 
    game_env = GameEnv(layout) 
 
    # 2) Evaluate MajorityPlayers teacher 
    majority_players = MajorityPlayers(layout) 
    tournament_teacher = Tournament(game_env, majority_players, layout) 
    log_teacher = tournament_teacher.tournament() 
    maj_mean, maj_stderr = log_teacher.outcome() 
 
    # 3) Build NeuralNetPlayers student 
    nn_players = NeuralNetPlayers(layout) 
 
    # 4) Imitation training with on-the-fly batch generation 
    # 
    # Instead of generating one *huge* imitation dataset and training on it multiple 
    # epochs, we now: 
    #   - generate a fresh synthetic imitation batch for each step, and 
    #   - run a short training phase on that batch. 
    # 
    # This keeps the peak memory usage bounded by NUM_SAMPLES_A / NUM_SAMPLES_B, 
    # while the *total* number of synthetic samples seen during training is 
    # NUM_IM_BATCHES_* × NUM_SAMPLES_*. 
 
    # 4a) Train model_a (field -> comm) via imitation on multiple small batches 
    for k in range(NUM_IM_BATCHES_A): 
        dataset_a, _ = imitation_utils.generate_majority_imitation_datasets( 
            layout=layout, 
            num_samples_a=NUM_SAMPLES_A, 
            num_samples_b=NUM_SAMPLES_B, 
            seed=BASE_SEED + k, 
        ) 
        nn_players.train_model_a(dataset_a, TRAINING_SETTINGS_A) 
 
    # 4b) Train model_b (gun + comm -> shoot) via imitation on multiple small batches 
    for k in range(NUM_IM_BATCHES_B): 
        _, dataset_b = imitation_utils.generate_majority_imitation_datasets( 
            layout=layout, 
            num_samples_a=NUM_SAMPLES_A, 
            num_samples_b=NUM_SAMPLES_B, 
            seed=BASE_SEED + 100 + k, 
        ) 
        nn_players.train_model_b(dataset_b, TRAINING_SETTINGS_B) 
 
    # 5) Evaluate trained NeuralNetPlayers in a fresh tournament 
    #    (re-use the same layout but reset the environment) 
    game_env_nn = GameEnv(layout) 
    tournament_student = Tournament(game_env_nn, nn_players, layout) 
    log_student = tournament_student.tournament() 
    nn_mean, nn_stderr = log_student.outcome() 
 
    return { 
        "field_size": field_size, 
        "comms_size": comms_size, 
        "maj_mean": maj_mean, 
        "maj_stderr": maj_stderr, 
        "nn_mean": nn_mean, 
        "nn_stderr": nn_stderr, 
    } 
In [12]:
results = []

for i, field_size in enumerate(FIELD_SIZES):
    for comms_size in COMMS_SIZES:
        # Only run configurations where comms_size is not trivially impossible
        if comms_size > field_size ** 2:
            continue

        print(f"\n=== Running experiment: field_size={field_size}, comms_size={comms_size}, sample_size={NUM_SAMPLES_A} ===")
        summary = run_single_experiment(field_size, comms_size, seed=i)
        results.append(summary)
        print(
            f"Majority: mean={summary['maj_mean']:.4f}, stderr={summary['maj_stderr']:.4f} | "
            f"Neural: mean={summary['nn_mean']:.4f}, stderr={summary['nn_stderr']:.4f}"
        )

results_df = pd.DataFrame(results)
results_df = results_df.sort_values(["field_size", "comms_size"]).reset_index(drop=True)

results_df
=== Running experiment: field_size=64, comms_size=8, sample_size=10000 ===
Majority: mean=0.5122, stderr=0.0071 | Neural: mean=0.5172, stderr=0.0071

=== Running experiment: field_size=64, comms_size=4, sample_size=10000 ===
Majority: mean=0.5114, stderr=0.0071 | Neural: mean=0.5160, stderr=0.0071

=== Running experiment: field_size=64, comms_size=2, sample_size=10000 ===
Majority: mean=0.5022, stderr=0.0071 | Neural: mean=0.5066, stderr=0.0071

=== Running experiment: field_size=64, comms_size=1, sample_size=10000 ===
Majority: mean=0.5078, stderr=0.0071 | Neural: mean=0.5088, stderr=0.0071

=== Running experiment: field_size=32, comms_size=8, sample_size=10000 ===
Majority: mean=0.5350, stderr=0.0071 | Neural: mean=0.5318, stderr=0.0071

=== Running experiment: field_size=32, comms_size=4, sample_size=10000 ===
Majority: mean=0.5272, stderr=0.0071 | Neural: mean=0.5364, stderr=0.0071

=== Running experiment: field_size=32, comms_size=2, sample_size=10000 ===
Majority: mean=0.5068, stderr=0.0071 | Neural: mean=0.5214, stderr=0.0071

=== Running experiment: field_size=32, comms_size=1, sample_size=10000 ===
Majority: mean=0.5170, stderr=0.0071 | Neural: mean=0.5160, stderr=0.0071

=== Running experiment: field_size=16, comms_size=8, sample_size=10000 ===
Majority: mean=0.5708, stderr=0.0070 | Neural: mean=0.5524, stderr=0.0070

=== Running experiment: field_size=16, comms_size=4, sample_size=10000 ===
Majority: mean=0.5478, stderr=0.0070 | Neural: mean=0.5550, stderr=0.0070

=== Running experiment: field_size=16, comms_size=2, sample_size=10000 ===
Majority: mean=0.5374, stderr=0.0071 | Neural: mean=0.5414, stderr=0.0070

=== Running experiment: field_size=16, comms_size=1, sample_size=10000 ===
Majority: mean=0.5198, stderr=0.0071 | Neural: mean=0.5332, stderr=0.0071

=== Running experiment: field_size=8, comms_size=8, sample_size=10000 ===
Majority: mean=0.6436, stderr=0.0068 | Neural: mean=0.6284, stderr=0.0068

=== Running experiment: field_size=8, comms_size=4, sample_size=10000 ===
Majority: mean=0.6018, stderr=0.0069 | Neural: mean=0.5884, stderr=0.0070

=== Running experiment: field_size=8, comms_size=2, sample_size=10000 ===
Majority: mean=0.5716, stderr=0.0070 | Neural: mean=0.5710, stderr=0.0070

=== Running experiment: field_size=8, comms_size=1, sample_size=10000 ===
Majority: mean=0.5578, stderr=0.0070 | Neural: mean=0.5568, stderr=0.0070

=== Running experiment: field_size=4, comms_size=8, sample_size=10000 ===
Majority: mean=0.7508, stderr=0.0061 | Neural: mean=0.7526, stderr=0.0061

=== Running experiment: field_size=4, comms_size=4, sample_size=10000 ===
Majority: mean=0.6816, stderr=0.0066 | Neural: mean=0.6852, stderr=0.0066

=== Running experiment: field_size=4, comms_size=2, sample_size=10000 ===
Majority: mean=0.6300, stderr=0.0068 | Neural: mean=0.6290, stderr=0.0068

=== Running experiment: field_size=4, comms_size=1, sample_size=10000 ===
Majority: mean=0.5968, stderr=0.0069 | Neural: mean=0.5924, stderr=0.0069
Out[12]:
field_size comms_size maj_mean maj_stderr nn_mean nn_stderr
0 4 1 0.5968 0.006938 0.5924 0.006950
1 4 2 0.6300 0.006829 0.6290 0.006832
2 4 4 0.6816 0.006589 0.6852 0.006569
3 4 8 0.7508 0.006118 0.7526 0.006103
4 8 1 0.5578 0.007024 0.5568 0.007026
5 8 2 0.5716 0.006999 0.5710 0.007000
6 8 4 0.6018 0.006924 0.5884 0.006960
7 8 8 0.6436 0.006774 0.6284 0.006835
8 16 1 0.5198 0.007066 0.5332 0.007056
9 16 2 0.5374 0.007052 0.5414 0.007047
10 16 4 0.5478 0.007039 0.5550 0.007029
11 16 8 0.5708 0.007001 0.5524 0.007033
12 32 1 0.5170 0.007068 0.5160 0.007068
13 32 2 0.5068 0.007071 0.5214 0.007065
14 32 4 0.5272 0.007061 0.5364 0.007053
15 32 8 0.5350 0.007054 0.5318 0.007057
16 64 1 0.5078 0.007071 0.5088 0.007071
17 64 2 0.5022 0.007072 0.5066 0.007071
18 64 4 0.5114 0.007070 0.5160 0.007068
19 64 8 0.5122 0.007070 0.5172 0.007068

Summary table¶

In [13]:
from Q_Sea_Battle.reference_performance_utilities import (
    expected_win_rate_majority,
    limit_from_mutual_information,
)

# Add analytic majority prediction (optional)
layout = GameLayout()
results_df["maj_theory"] = results_df.apply(
    lambda row: expected_win_rate_majority(
        field_size=int(row["field_size"]),
        comms_size=int(row["comms_size"]),
        enemy_probability=layout.enemy_probability,
        channel_noise=0.0,
    ),
    axis=1,
)

# Add information-theoretic limit (mutual information bound)
results_df["info_limit"] = results_df.apply(
    lambda row: limit_from_mutual_information(
        field_size=row["field_size"],
        comms_size=row["comms_size"],
        channel_noise=0.0,
    ),
    axis=1,
)

# Format nicely
table = results_df.copy()

table["maj"]         = table["maj_mean"].map(lambda x: f"{x:0.4f}")
table["nn"]          = table["nn_mean"].map(lambda x: f"{x:0.4f}")
table["maj_theory"]  = table["maj_theory"].map(lambda x: f"{x:0.4f}")
table["info_limit"]  = table["info_limit"].map(lambda x: f"{x:0.4f}")

display_cols = ["field_size", "comms_size", "maj", "nn", "maj_theory", "info_limit"]
display(table[display_cols])
field_size comms_size maj nn maj_theory info_limit
0 4 1 0.5968 0.5924 0.5982 0.6461
1 4 2 0.6300 0.6290 0.6367 0.7051
2 4 4 0.6816 0.6852 0.6875 0.7855
3 4 8 0.7508 0.7526 0.7500 0.8900
4 8 1 0.5578 0.5568 0.5497 0.5735
5 8 2 0.5716 0.5710 0.5700 0.6037
6 8 4 0.6018 0.5884 0.5982 0.6461
7 8 8 0.6436 0.6284 0.6367 0.7051
8 16 1 0.5198 0.5332 0.5249 0.5368
9 16 2 0.5374 0.5414 0.5352 0.5520
10 16 4 0.5478 0.5550 0.5497 0.5735
11 16 8 0.5708 0.5524 0.5700 0.6037
12 32 1 0.5170 0.5160 0.5125 0.5184
13 32 2 0.5068 0.5214 0.5176 0.5260
14 32 4 0.5272 0.5364 0.5249 0.5368
15 32 8 0.5350 0.5318 0.5352 0.5520
16 64 1 0.5078 0.5088 0.5062 0.5092
17 64 2 0.5022 0.5066 0.5088 0.5130
18 64 4 0.5114 0.5160 0.5125 0.5184
19 64 8 0.5122 0.5172 0.5176 0.5260
field_size comms_size maj nn maj_theory info_limit
4 1 0.5968 0.5924 0.5982 0.6461
4 2 0.6300 0.6290 0.6367 0.7051
4 4 0.6816 0.6852 0.6875 0.7855
4 8 0.7508 0.7526 0.7500 0.8900
8 1 0.5578 0.5568 0.5497 0.5735
8 2 0.5716 0.5710 0.5700 0.6037
8 4 0.6018 0.5884 0.5982 0.6461
8 8 0.6436 0.6284 0.6367 0.7051
16 1 0.5198 0.5332 0.5249 0.5368
16 2 0.5374 0.5414 0.5352 0.5520
16 4 0.5478 0.5550 0.5497 0.5735
16 8 0.5708 0.5524 0.5700 0.6037
32 1 0.5170 0.5160 0.5125 0.5184
32 2 0.5068 0.5214 0.5176 0.5260
32 4 0.5272 0.5364 0.5249 0.5368
32 8 0.5350 0.5318 0.5352 0.5520
64 1 0.5078 0.5088 0.5062 0.5092
64 2 0.5022 0.5066 0.5088 0.5130
64 4 0.5114 0.5160 0.5125 0.5184
64 8 0.5122 0.5172 0.5176 0.5260