Imitation training for NeuralNetPlayers and evaluation tournament sweep¶
This notebook:
- Loops over a grid of
(field_size, comms_size)values. - For each setting:
- Builds a
GameLayoutand majority-teacher players. - Uses the imitation utilities to generate synthetic datasets for
model_aandmodel_b, following the majority strategy. - Trains
NeuralNetPlayers.model_aandNeuralNetPlayers.model_bseparately via imitation. - Runs a tournament with
MajorityPlayersand withNeuralNetPlayers.
- Builds a
- Collects all results in a pandas
DataFrameand prints a compact summary table.
The notebook assumes that:
- The
qsbpackage is importable (or adjust the imports below to your local package layout). - The module
neural_net_imitation_utils.pyis available and implements the functions specified in the design document (especiallygenerate_majority_imitation_datasets).
- The result is summarized in a table
| field_size | comms_size | maj | nn | maj_theory | info_limit |
|---|---|---|---|---|---|
| 4 | 1 | 0.5968 | 0.5924 | 0.5982 | 0.6461 |
| 4 | 2 | 0.6300 | 0.6290 | 0.6367 | 0.7051 |
| 4 | 4 | 0.6816 | 0.6852 | 0.6875 | 0.7855 |
| 4 | 8 | 0.7508 | 0.7526 | 0.7500 | 0.8900 |
| 8 | 1 | 0.5578 | 0.5568 | 0.5497 | 0.5735 |
| 8 | 2 | 0.5716 | 0.5710 | 0.5700 | 0.6037 |
| 8 | 4 | 0.6018 | 0.5884 | 0.5982 | 0.6461 |
| 8 | 8 | 0.6436 | 0.6284 | 0.6367 | 0.7051 |
| 16 | 1 | 0.5198 | 0.5332 | 0.5249 | 0.5368 |
| 16 | 2 | 0.5374 | 0.5414 | 0.5352 | 0.5520 |
| 16 | 4 | 0.5478 | 0.5550 | 0.5497 | 0.5735 |
| 16 | 8 | 0.5708 | 0.5524 | 0.5700 | 0.6037 |
| 32 | 1 | 0.5170 | 0.5160 | 0.5125 | 0.5184 |
| 32 | 2 | 0.5068 | 0.5214 | 0.5176 | 0.5260 |
| 32 | 4 | 0.5272 | 0.5364 | 0.5249 | 0.5368 |
| 32 | 8 | 0.5350 | 0.5318 | 0.5352 | 0.5520 |
| 64 | 1 | 0.5078 | 0.5088 | 0.5062 | 0.5092 |
| 64 | 2 | 0.5022 | 0.5066 | 0.5088 | 0.5130 |
| 64 | 4 | 0.5114 | 0.5160 | 0.5125 | 0.5184 |
In [ ]:
import numpy as np
import pandas as pd
import sys
sys.path.append("../src")
import Q_Sea_Battle as qsb
# Core QSeaBattle imports.
# Adjust these imports if your package layout is different.
from Q_Sea_Battle import GameLayout
from Q_Sea_Battle import GameEnv
from Q_Sea_Battle import Tournament
from Q_Sea_Battle import MajorityPlayers, NeuralNetPlayers
from Q_Sea_Battle import neural_net_imitation_utilities as imitation_utils
print("qsb version loaded. Available symbols:", dir(qsb)[:20])
# Debug-friendly settings — use only during development.
import tensorflow as tf
tf.config.run_functions_eagerly(True)
tf.data.experimental.enable_debug_mode()
print("Debug mode enabled: eager execution + eager tf.data")
# Uncomment these lines only for debugging. For large runs or sweeps, disable to restore full performance.
qsb version loaded. Available symbols: ['AssistedPlayers', 'Game', 'GameEnv', 'GameLayout', 'MajorityPlayers', 'NeuralNetPlayerA', 'NeuralNetPlayerB', 'NeuralNetPlayers', 'PlayerA', 'PlayerB', 'Players', 'SharedRandomness', 'SimplePlayers', 'Tournament', 'TournamentLog', 'TrainableAssistedPlayers', '__all__', '__builtins__', '__cached__', '__doc__'] Debug mode enabled: eager execution + eager tf.data
In [9]:
def inspect_imitation_datasets(layout, dataset_a, dataset_b, sample_size=50_000):
"""
Inspect dataset_a (field -> comm) and dataset_b (gun+comm -> shoot)
to confirm that the imitation data matches the intended distribution
and the MajorityPlayers logic.
layout: GameLayout
dataset_a: DataFrame with columns ["field", "comm", ...]
dataset_b: DataFrame with columns ["field", "gun", "comm", "shoot", ...]
sample_size: number of rows to subsample for stats (for speed).
"""
import numpy as np
import pandas as pd
import Q_Sea_Battle as qsb
print("=== Imitation Dataset Inspection ===")
# -----------------------------
# Subsample for speed
# -----------------------------
if len(dataset_a) > sample_size:
da = dataset_a.sample(sample_size, random_state=0)
else:
da = dataset_a
if len(dataset_b) > sample_size:
db = dataset_b.sample(sample_size, random_state=0)
else:
db = dataset_b
n2 = layout.field_size ** 2
m = layout.comms_size
# -----------------------------
# A1 — Field distribution p_one
# -----------------------------
fields = np.stack(da["field"].to_numpy(), axis=0).astype(float)
p_emp = fields.mean()
print(f" A1: Field mean p_one (empirical): {p_emp:.4f}")
# -----------------------------
# A2 — Comm bit frequencies
# -----------------------------
comms_a = np.stack(da["comm"].to_numpy(), axis=0)
print(" A2: Comm bit frequencies (dataset A):")
for j in range(m):
print(f" bit {j}: mean={comms_a[:, j].mean():.4f}")
# -----------------------------
# B1 — Gun index uniformity
# -----------------------------
guns_b = np.stack(db["gun"].to_numpy(), axis=0)
gun_idx = guns_b.argmax(axis=1)
print(" B1: Gun index statistics (dataset B):")
print(f" min idx={gun_idx.min()}, max idx={gun_idx.max()}")
print(f" approx std(index) = {gun_idx.std():.1f} (uniform-ish if large)")
# -----------------------------
# B2 — Shoot distribution
# -----------------------------
shoots = np.array(db["shoot"].to_numpy(), dtype=float)
print(f" B2: Shoot distribution: mean(shoot)={shoots.mean():.4f}")
# -----------------------------
# C1 — Cross-check B vs MajorityPlayers
# -----------------------------
print(" C1: Cross-check dataset B vs MajorityPlayers.playerB ...")
maj = qsb.MajorityPlayers(layout)
player_a_maj, player_b_maj = maj.players()
subset_b = db.sample(min(2000, len(db)), random_state=1)
mismatches_b = 0
for _, row in subset_b.iterrows():
gun = row["gun"]
comm = row["comm"]
shoot_ds = int(row["shoot"])
shoot_maj = int(player_b_maj.decide(gun=gun, comm=comm))
if shoot_ds != shoot_maj:
mismatches_b += 1
mismatch_rate_b = mismatches_b / len(subset_b)
print(f" Majority vs Dataset-B shoot mismatch rate: {mismatch_rate_b:.4f}")
# -----------------------------
# C2 — Cross-check A vs MajorityPlayers
# -----------------------------
print(" C2: Cross-check dataset A vs MajorityPlayers.playerA ...")
subset_a = da.sample(min(2000, len(da)), random_state=2)
mismatches_a = 0
for _, row in subset_a.iterrows():
field = row["field"]
comm = row["comm"]
comm_maj = player_a_maj.decide(field)
if not np.array_equal(comm, comm_maj):
mismatches_a += 1
mismatch_rate_a = mismatches_a / len(subset_a)
print(f" Majority vs Dataset-A comm mismatch rate: {mismatch_rate_a:.4f}")
print(" === Inspection Completed ===")
In [10]:
# ----------------------------
# Global configuration
# ----------------------------
FIELD_SIZES = [64,32,16,8,4]
COMMS_SIZES = [8,4,2,1]
# Number of synthetic samples per imitation batch.
# We keep the *total* number of synthetic samples roughly comparable to before,
# but avoid a single huge DataFrame in memory.
NUM_SAMPLES_A = 10_000 # for model_a (field -> comm), per batch
NUM_SAMPLES_B = 10_000 # for model_b (gun + comm -> shoot), per batch
# How many independent imitation batches to generate and train on.
NUM_IM_BATCHES_A = 25
NUM_IM_BATCHES_B = 25
# Training hyper-parameters for imitation
TRAINING_SETTINGS_A = {
"epochs": 25,
"batch_size": 256,
"learning_rate": 1e-3,
"verbose": 0,
"use_sample_weight": False,
}
TRAINING_SETTINGS_B = {
"epochs": 25,
"batch_size": 256,
"learning_rate": 1e-3,
"verbose": 0,
"use_sample_weight": False,
}
# Tournament configuration
NUM_GAMES_TOURNAMENT = 5_000
# Base seed for reproducibility
BASE_SEED = 12345
In [11]:
def run_single_experiment(field_size: int, comms_size: int, seed: int = 0):
"""
Run imitation-training and tournaments for a single (field_size, comms_size).
Returns a dict with summary statistics.
"""
n2 = field_size ** 2
if comms_size > n2:
raise ValueError("comms_size cannot exceed field_size**2 in this setup.")
# 1) Build layout and environment
layout = GameLayout(
field_size=field_size,
comms_size=comms_size,
number_of_games_in_tournament=NUM_GAMES_TOURNAMENT,
)
game_env = GameEnv(layout)
# 2) Evaluate MajorityPlayers teacher
majority_players = MajorityPlayers(layout)
tournament_teacher = Tournament(game_env, majority_players, layout)
log_teacher = tournament_teacher.tournament()
maj_mean, maj_stderr = log_teacher.outcome()
# 3) Build NeuralNetPlayers student
nn_players = NeuralNetPlayers(layout)
# 4) Imitation training with on-the-fly batch generation
#
# Instead of generating one *huge* imitation dataset and training on it multiple
# epochs, we now:
# - generate a fresh synthetic imitation batch for each step, and
# - run a short training phase on that batch.
#
# This keeps the peak memory usage bounded by NUM_SAMPLES_A / NUM_SAMPLES_B,
# while the *total* number of synthetic samples seen during training is
# NUM_IM_BATCHES_* × NUM_SAMPLES_*.
# 4a) Train model_a (field -> comm) via imitation on multiple small batches
for k in range(NUM_IM_BATCHES_A):
dataset_a, _ = imitation_utils.generate_majority_imitation_datasets(
layout=layout,
num_samples_a=NUM_SAMPLES_A,
num_samples_b=NUM_SAMPLES_B,
seed=BASE_SEED + k,
)
nn_players.train_model_a(dataset_a, TRAINING_SETTINGS_A)
# 4b) Train model_b (gun + comm -> shoot) via imitation on multiple small batches
for k in range(NUM_IM_BATCHES_B):
_, dataset_b = imitation_utils.generate_majority_imitation_datasets(
layout=layout,
num_samples_a=NUM_SAMPLES_A,
num_samples_b=NUM_SAMPLES_B,
seed=BASE_SEED + 100 + k,
)
nn_players.train_model_b(dataset_b, TRAINING_SETTINGS_B)
# 5) Evaluate trained NeuralNetPlayers in a fresh tournament
# (re-use the same layout but reset the environment)
game_env_nn = GameEnv(layout)
tournament_student = Tournament(game_env_nn, nn_players, layout)
log_student = tournament_student.tournament()
nn_mean, nn_stderr = log_student.outcome()
return {
"field_size": field_size,
"comms_size": comms_size,
"maj_mean": maj_mean,
"maj_stderr": maj_stderr,
"nn_mean": nn_mean,
"nn_stderr": nn_stderr,
}
In [12]:
results = []
for i, field_size in enumerate(FIELD_SIZES):
for comms_size in COMMS_SIZES:
# Only run configurations where comms_size is not trivially impossible
if comms_size > field_size ** 2:
continue
print(f"\n=== Running experiment: field_size={field_size}, comms_size={comms_size}, sample_size={NUM_SAMPLES_A} ===")
summary = run_single_experiment(field_size, comms_size, seed=i)
results.append(summary)
print(
f"Majority: mean={summary['maj_mean']:.4f}, stderr={summary['maj_stderr']:.4f} | "
f"Neural: mean={summary['nn_mean']:.4f}, stderr={summary['nn_stderr']:.4f}"
)
results_df = pd.DataFrame(results)
results_df = results_df.sort_values(["field_size", "comms_size"]).reset_index(drop=True)
results_df
=== Running experiment: field_size=64, comms_size=8, sample_size=10000 === Majority: mean=0.5122, stderr=0.0071 | Neural: mean=0.5172, stderr=0.0071 === Running experiment: field_size=64, comms_size=4, sample_size=10000 === Majority: mean=0.5114, stderr=0.0071 | Neural: mean=0.5160, stderr=0.0071 === Running experiment: field_size=64, comms_size=2, sample_size=10000 === Majority: mean=0.5022, stderr=0.0071 | Neural: mean=0.5066, stderr=0.0071 === Running experiment: field_size=64, comms_size=1, sample_size=10000 === Majority: mean=0.5078, stderr=0.0071 | Neural: mean=0.5088, stderr=0.0071 === Running experiment: field_size=32, comms_size=8, sample_size=10000 === Majority: mean=0.5350, stderr=0.0071 | Neural: mean=0.5318, stderr=0.0071 === Running experiment: field_size=32, comms_size=4, sample_size=10000 === Majority: mean=0.5272, stderr=0.0071 | Neural: mean=0.5364, stderr=0.0071 === Running experiment: field_size=32, comms_size=2, sample_size=10000 === Majority: mean=0.5068, stderr=0.0071 | Neural: mean=0.5214, stderr=0.0071 === Running experiment: field_size=32, comms_size=1, sample_size=10000 === Majority: mean=0.5170, stderr=0.0071 | Neural: mean=0.5160, stderr=0.0071 === Running experiment: field_size=16, comms_size=8, sample_size=10000 === Majority: mean=0.5708, stderr=0.0070 | Neural: mean=0.5524, stderr=0.0070 === Running experiment: field_size=16, comms_size=4, sample_size=10000 === Majority: mean=0.5478, stderr=0.0070 | Neural: mean=0.5550, stderr=0.0070 === Running experiment: field_size=16, comms_size=2, sample_size=10000 === Majority: mean=0.5374, stderr=0.0071 | Neural: mean=0.5414, stderr=0.0070 === Running experiment: field_size=16, comms_size=1, sample_size=10000 === Majority: mean=0.5198, stderr=0.0071 | Neural: mean=0.5332, stderr=0.0071 === Running experiment: field_size=8, comms_size=8, sample_size=10000 === Majority: mean=0.6436, stderr=0.0068 | Neural: mean=0.6284, stderr=0.0068 === Running experiment: field_size=8, comms_size=4, sample_size=10000 === Majority: mean=0.6018, stderr=0.0069 | Neural: mean=0.5884, stderr=0.0070 === Running experiment: field_size=8, comms_size=2, sample_size=10000 === Majority: mean=0.5716, stderr=0.0070 | Neural: mean=0.5710, stderr=0.0070 === Running experiment: field_size=8, comms_size=1, sample_size=10000 === Majority: mean=0.5578, stderr=0.0070 | Neural: mean=0.5568, stderr=0.0070 === Running experiment: field_size=4, comms_size=8, sample_size=10000 === Majority: mean=0.7508, stderr=0.0061 | Neural: mean=0.7526, stderr=0.0061 === Running experiment: field_size=4, comms_size=4, sample_size=10000 === Majority: mean=0.6816, stderr=0.0066 | Neural: mean=0.6852, stderr=0.0066 === Running experiment: field_size=4, comms_size=2, sample_size=10000 === Majority: mean=0.6300, stderr=0.0068 | Neural: mean=0.6290, stderr=0.0068 === Running experiment: field_size=4, comms_size=1, sample_size=10000 === Majority: mean=0.5968, stderr=0.0069 | Neural: mean=0.5924, stderr=0.0069
Out[12]:
| field_size | comms_size | maj_mean | maj_stderr | nn_mean | nn_stderr | |
|---|---|---|---|---|---|---|
| 0 | 4 | 1 | 0.5968 | 0.006938 | 0.5924 | 0.006950 |
| 1 | 4 | 2 | 0.6300 | 0.006829 | 0.6290 | 0.006832 |
| 2 | 4 | 4 | 0.6816 | 0.006589 | 0.6852 | 0.006569 |
| 3 | 4 | 8 | 0.7508 | 0.006118 | 0.7526 | 0.006103 |
| 4 | 8 | 1 | 0.5578 | 0.007024 | 0.5568 | 0.007026 |
| 5 | 8 | 2 | 0.5716 | 0.006999 | 0.5710 | 0.007000 |
| 6 | 8 | 4 | 0.6018 | 0.006924 | 0.5884 | 0.006960 |
| 7 | 8 | 8 | 0.6436 | 0.006774 | 0.6284 | 0.006835 |
| 8 | 16 | 1 | 0.5198 | 0.007066 | 0.5332 | 0.007056 |
| 9 | 16 | 2 | 0.5374 | 0.007052 | 0.5414 | 0.007047 |
| 10 | 16 | 4 | 0.5478 | 0.007039 | 0.5550 | 0.007029 |
| 11 | 16 | 8 | 0.5708 | 0.007001 | 0.5524 | 0.007033 |
| 12 | 32 | 1 | 0.5170 | 0.007068 | 0.5160 | 0.007068 |
| 13 | 32 | 2 | 0.5068 | 0.007071 | 0.5214 | 0.007065 |
| 14 | 32 | 4 | 0.5272 | 0.007061 | 0.5364 | 0.007053 |
| 15 | 32 | 8 | 0.5350 | 0.007054 | 0.5318 | 0.007057 |
| 16 | 64 | 1 | 0.5078 | 0.007071 | 0.5088 | 0.007071 |
| 17 | 64 | 2 | 0.5022 | 0.007072 | 0.5066 | 0.007071 |
| 18 | 64 | 4 | 0.5114 | 0.007070 | 0.5160 | 0.007068 |
| 19 | 64 | 8 | 0.5122 | 0.007070 | 0.5172 | 0.007068 |
Summary table¶
In [13]:
from Q_Sea_Battle.reference_performance_utilities import (
expected_win_rate_majority,
limit_from_mutual_information,
)
# Add analytic majority prediction (optional)
layout = GameLayout()
results_df["maj_theory"] = results_df.apply(
lambda row: expected_win_rate_majority(
field_size=int(row["field_size"]),
comms_size=int(row["comms_size"]),
enemy_probability=layout.enemy_probability,
channel_noise=0.0,
),
axis=1,
)
# Add information-theoretic limit (mutual information bound)
results_df["info_limit"] = results_df.apply(
lambda row: limit_from_mutual_information(
field_size=row["field_size"],
comms_size=row["comms_size"],
channel_noise=0.0,
),
axis=1,
)
# Format nicely
table = results_df.copy()
table["maj"] = table["maj_mean"].map(lambda x: f"{x:0.4f}")
table["nn"] = table["nn_mean"].map(lambda x: f"{x:0.4f}")
table["maj_theory"] = table["maj_theory"].map(lambda x: f"{x:0.4f}")
table["info_limit"] = table["info_limit"].map(lambda x: f"{x:0.4f}")
display_cols = ["field_size", "comms_size", "maj", "nn", "maj_theory", "info_limit"]
display(table[display_cols])
| field_size | comms_size | maj | nn | maj_theory | info_limit | |
|---|---|---|---|---|---|---|
| 0 | 4 | 1 | 0.5968 | 0.5924 | 0.5982 | 0.6461 |
| 1 | 4 | 2 | 0.6300 | 0.6290 | 0.6367 | 0.7051 |
| 2 | 4 | 4 | 0.6816 | 0.6852 | 0.6875 | 0.7855 |
| 3 | 4 | 8 | 0.7508 | 0.7526 | 0.7500 | 0.8900 |
| 4 | 8 | 1 | 0.5578 | 0.5568 | 0.5497 | 0.5735 |
| 5 | 8 | 2 | 0.5716 | 0.5710 | 0.5700 | 0.6037 |
| 6 | 8 | 4 | 0.6018 | 0.5884 | 0.5982 | 0.6461 |
| 7 | 8 | 8 | 0.6436 | 0.6284 | 0.6367 | 0.7051 |
| 8 | 16 | 1 | 0.5198 | 0.5332 | 0.5249 | 0.5368 |
| 9 | 16 | 2 | 0.5374 | 0.5414 | 0.5352 | 0.5520 |
| 10 | 16 | 4 | 0.5478 | 0.5550 | 0.5497 | 0.5735 |
| 11 | 16 | 8 | 0.5708 | 0.5524 | 0.5700 | 0.6037 |
| 12 | 32 | 1 | 0.5170 | 0.5160 | 0.5125 | 0.5184 |
| 13 | 32 | 2 | 0.5068 | 0.5214 | 0.5176 | 0.5260 |
| 14 | 32 | 4 | 0.5272 | 0.5364 | 0.5249 | 0.5368 |
| 15 | 32 | 8 | 0.5350 | 0.5318 | 0.5352 | 0.5520 |
| 16 | 64 | 1 | 0.5078 | 0.5088 | 0.5062 | 0.5092 |
| 17 | 64 | 2 | 0.5022 | 0.5066 | 0.5088 | 0.5130 |
| 18 | 64 | 4 | 0.5114 | 0.5160 | 0.5125 | 0.5184 |
| 19 | 64 | 8 | 0.5122 | 0.5172 | 0.5176 | 0.5260 |
| field_size | comms_size | maj | nn | maj_theory | info_limit |
|---|---|---|---|---|---|
| 4 | 1 | 0.5968 | 0.5924 | 0.5982 | 0.6461 |
| 4 | 2 | 0.6300 | 0.6290 | 0.6367 | 0.7051 |
| 4 | 4 | 0.6816 | 0.6852 | 0.6875 | 0.7855 |
| 4 | 8 | 0.7508 | 0.7526 | 0.7500 | 0.8900 |
| 8 | 1 | 0.5578 | 0.5568 | 0.5497 | 0.5735 |
| 8 | 2 | 0.5716 | 0.5710 | 0.5700 | 0.6037 |
| 8 | 4 | 0.6018 | 0.5884 | 0.5982 | 0.6461 |
| 8 | 8 | 0.6436 | 0.6284 | 0.6367 | 0.7051 |
| 16 | 1 | 0.5198 | 0.5332 | 0.5249 | 0.5368 |
| 16 | 2 | 0.5374 | 0.5414 | 0.5352 | 0.5520 |
| 16 | 4 | 0.5478 | 0.5550 | 0.5497 | 0.5735 |
| 16 | 8 | 0.5708 | 0.5524 | 0.5700 | 0.6037 |
| 32 | 1 | 0.5170 | 0.5160 | 0.5125 | 0.5184 |
| 32 | 2 | 0.5068 | 0.5214 | 0.5176 | 0.5260 |
| 32 | 4 | 0.5272 | 0.5364 | 0.5249 | 0.5368 |
| 32 | 8 | 0.5350 | 0.5318 | 0.5352 | 0.5520 |
| 64 | 1 | 0.5078 | 0.5088 | 0.5062 | 0.5092 |
| 64 | 2 | 0.5022 | 0.5066 | 0.5088 | 0.5130 |
| 64 | 4 | 0.5114 | 0.5160 | 0.5125 | 0.5184 |
| 64 | 8 | 0.5122 | 0.5172 | 0.5176 | 0.5260 |