Hands-on Examples
From basic data loading to training ISAC baselines.
Usage & Examples
LAMBDA follows Sionna's Right-Handed Coordinate System.
- Basic: Load CSI
- Task 1: Localization
- Task 2: Beam Prediction
This example demonstrates how to load and inspect the compressed .npz multipath files.
import numpy as np
# Load the compressed CSI file
data = np.load("path/to/csi_xxxxxx.npz")
# Access Multipath Components
a_real = data['a_real']
a_imag = data['a_imag']
delays = data['tau']
doppler = data['doppler']
# Reconstruct Complex Gain
complex_gain = a_real + 1j * a_imag
# Access Angles (AoD / AoA)
theta_t, phi_t = data['theta_t'], data['phi_t']
theta_r, phi_r = data['theta_r'], data['phi_r']
print(f"Detected {len(delays)} paths.")
print(f"Max Doppler Shift: {np.max(np.abs(doppler)):.2f} Hz")
This example demonstrates an end-to-end pipeline for UAV Localization. It defines a custom PyTorch Dataset that loads RGB images, Depth maps, and multipath-based labels, and trains a regression network (modified MobileNetV2) to predict 3D coordinates.
Step 1: Dataset Preparation
Define a Dataset class to handle multi-modal inputs.
Show Code
import torch
import torch.nn as nn
from torchvision import models, transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import pandas as pd
import os
import numpy as np
from tqdm import tqdm
from sklearn.model_selection import train_test_split
# Camera Position: X, Y, Z, Pitch, Yaw, Roll
CAMERA_POSE = np.array([-8.10, -157, -35.7, 40, -180, 0], dtype=np.float32)
# Paths
CSV_PATH = r"d:\exp\beam_labels.csv"
RGB_DIR = r"E:\Datasets1\San Francisco\Scene 1\roof_bs_01\cam"
DEPTH_DIR = r"E:\Datasets1\San Francisco\Scene 1\roof_bs_01\depth"
CSI_DIR = r"D:\multi_path_npz" # Path to csi_xxxx.npz files containing uav_pos
BATCH_SIZE = 16
EPOCHS = 20
LEARNING_RATE = 0.001
# --- 1. Dataset ---
class UAVLocDataset(Dataset):
def __init__(self, csv_file, rgb_dir, depth_dir, csi_dir, transform=None):
self.df = pd.read_csv(csv_file)
self.rgb_dir = rgb_dir
self.depth_dir = depth_dir
self.csi_dir = csi_dir
# Static camera pose repeated for each sample
self.camera_pose = torch.tensor(CAMERA_POSE, dtype=torch.float32)
if transform is None:
self.transform = transforms.Compose([
transforms.Resize((512, 512)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
else:
self.transform = transform
# Depth transform (resize and to tensor)
self.depth_transform = transforms.Compose([
transforms.Resize((512, 512)),
transforms.ToTensor()
])
def __len__(self):
return len(self.df)
def __getitem__(self, idx):
# 1. Filenames
row = self.df.iloc[idx]
npz_filename = row['filename'] # csi_xxxxxx.npz
file_id = npz_filename.replace('csi_', '').replace('.npz', '')
rgb_name = f"img_{file_id}.png"
depth_name = f"depth_{file_id}.npz"
rgb_path = os.path.join(self.rgb_dir, rgb_name)
depth_path = os.path.join(self.depth_dir, depth_name)
csi_path = os.path.join(self.csi_dir, npz_filename)
# 2. Load Inputs (RGB + Depth)
try:
# RGB
rgb_img = Image.open(rgb_path).convert('RGB')
rgb_tensor = self.transform(rgb_img)
# Depth
with np.load(depth_path) as data:
key = data.files[0]
depth_arr = data[key]
depth_img = Image.fromarray(depth_arr, mode='F')
depth_tensor = self.depth_transform(depth_img) # (1, 512, 512)
# Early Fusion: Concatenate RGB and Depth -> (4, 512, 512)
input_img = torch.cat([rgb_tensor, depth_tensor], dim=0)
except Exception as e:
print(f"Error loading images for {file_id}: {e}")5
input_img = torch.zeros(4, 512, 512)
# 3. Load Label (UAV Position)
try:
with np.load(csi_path) as data:
# 'uav_pos' contains [x, z, y] implicitly based on description
# Description: y is in 3rd dim (index 2), z in 2nd dim (index 1)
# target y needs negation
raw_pos = data['uav_pos'] # shape expected (3,)
# Coordinate transformation
# Target: (x, y, z)
# Ensure 1D and convert to float to avoid shape mismatches (e.g. (3,1) vs (3,))
raw_pos = raw_pos.flatten()
t_x = float(raw_pos[0])
t_z = float(raw_pos[1]) # z is index 1
t_y = -float(raw_pos[2]) # y is index 2, needs negation
# Final order: x, y, z
target_pos = torch.tensor([t_x, t_y, t_z], dtype=torch.float32)
except Exception as e:
print(f"Error loading label for {file_id}: {e}")
target_pos = torch.zeros(3, dtype=torch.float32)
return input_img, self.camera_pose, target_pos
Step 2: Model Architecture
Modify MobileNetV2 to accept 4-channel inputs (RGB+D). The classifier is replaced with a regression head that fuses image features with camera pose to output the (x, y, z) coordinates.
Show Code
# --- 2. Network Model ---
class LocRegressionNet(nn.Module):
def __init__(self, pose_dim=6, output_dim=3):
super(LocRegressionNet, self).__init__()
# Backbone (MobileNetV2)
# Modified input to 4 channels (RGBD)
self.backbone = models.mobilenet_v2(weights=None)
original_first_layer = self.backbone.features[0][0]
self.backbone.features[0][0] = nn.Conv2d(
in_channels=4,
out_channels=original_first_layer.out_channels,
kernel_size=original_first_layer.kernel_size,
stride=original_first_layer.stride,
padding=original_first_layer.padding,
bias=False
)
# Remove classifier, use features only
# MobileNetV2 features output: (1280, 7, 7) for 224x224 input
# For 512x512 input, spatial dim will be larger (16x16)
# We use Global Average Pooling to get (1280,)
self.gap = nn.AdaptiveAvgPool2d(1)
self.feature_extractor = self.backbone.features
feature_dim = 1280 # MobileNetV2 last channel
# Regression Head
# Concatenate Image Features (1280) + Camera Pose (6) = 1286
self.regressor = nn.Sequential(
nn.Linear(feature_dim + pose_dim, 512),
nn.ReLU(),
nn.Dropout(0.2),
nn.Linear(512, 128),
nn.ReLU(),
nn.Linear(128, output_dim) # Output: x, y, z
)
def forward(self, x_img, x_pose):
# Image Features
x = self.feature_extractor(x_img) # (B, 1280, H, W)
x = self.gap(x) # (B, 1280, 1, 1)
x = torch.flatten(x, 1) # (B, 1280)
# Fusion
combined = torch.cat((x, x_pose), dim=1) # (B, 1286)
# Regression
output = self.regressor(combined)
return output
Step 3: Training & Evaluation
Use MSE Loss for regression.
Show Code
def main():
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
# Data
print("Preparing Data...")
if not os.path.exists(CSV_PATH):
print(f"Error: CSV file not found at {CSV_PATH}")
return
full_df = pd.read_csv(CSV_PATH)
train_df, test_df = train_test_split(full_df, test_size=0.2, random_state=42)
# Save temp CSVs
train_df.to_csv('temp_loc_train.csv', index=False)
test_df.to_csv('temp_loc_test.csv', index=False)
train_dataset = UAVLocDataset('temp_loc_train.csv', RGB_DIR, DEPTH_DIR, CSI_DIR)
test_dataset = UAVLocDataset('temp_loc_test.csv', RGB_DIR, DEPTH_DIR, CSI_DIR)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)
# Model
model = LocRegressionNet().to(device)
# Loss: MSE for regression
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
print("Starting Training (Regression Task)...")
best_loss = float('inf')
for epoch in range(EPOCHS):
model.train()
running_loss = 0.0
loop = tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS}")
for imgs, poses, targets in loop:
imgs = imgs.to(device)
poses = poses.to(device)
targets = targets.to(device)
optimizer.zero_grad()
outputs = model(imgs, poses)
loss = criterion(outputs, targets)
loss.backward()
optimizer.step()
running_loss += loss.item()
loop.set_postfix(mse_loss=loss.item())
avg_train_loss = running_loss / len(train_loader)
# Evaluation
model.eval()
test_loss = 0.0
with torch.no_grad():
for imgs, poses, targets in test_loader:
imgs = imgs.to(device)
poses = poses.to(device)
targets = targets.to(device)
outputs = model(imgs, poses)
loss = criterion(outputs, targets)
test_loss += loss.item()
avg_test_loss = test_loss / len(test_loader)
print(f"Epoch {epoch+1}: Train MSE={avg_train_loss:.4f}, Test MSE={avg_test_loss:.4f}, Test RMSE={np.sqrt(avg_test_loss):.4f}")
if avg_test_loss < best_loss:
best_loss = avg_test_loss
torch.save(model.state_dict(), "best_uav_loc_model.pth")
print(" [Saved Best Model]")
print(f"Training Finished! Best Test MSE: {best_loss:.4f}")
# Clean up
if os.path.exists('temp_loc_train.csv'): os.remove('temp_loc_train.csv')
if os.path.exists('temp_loc_test.csv'): os.remove('temp_loc_test.csv')
if __name__ == "__main__":
main()
Step 4: Experimental Results
The model achieves a localization accuracy of approximately 4.37 meters (RMSE) after 20 training epochs on the San Francisco (Scene 3) dataset.
Show Full Training Logs
Preparing Data...
Epoch 1/20: 0%| | 0/2003 [00:00<?, ?it/s]Starting Training (Regression Task)...
Epoch 1/20: 100%|██████████| 2003/2003 [1:16:44<00:00, 2.30s/it, mse_loss=39.4]
Epoch 1: Train MSE=213.2896, Test MSE=91.6826, Test RMSE=9.5751
[Saved Best Model]
Epoch 2/20: 100%|██████████| 2003/2003 [1:16:25<00:00, 2.29s/it, mse_loss=103]
Epoch 2: Train MSE=81.4697, Test MSE=91.0072, Test RMSE=9.5398
[Saved Best Model]
Epoch 3/20: 100%|██████████| 2003/2003 [1:16:29<00:00, 2.29s/it, mse_loss=77.1]
Epoch 3: Train MSE=65.3953, Test MSE=248.1088, Test RMSE=15.7515
Epoch 4/20: 100%|██████████| 2003/2003 [1:18:45<00:00, 2.36s/it, mse_loss=45.4]
Epoch 4: Train MSE=56.0888, Test MSE=34.6644, Test RMSE=5.8877
[Saved Best Model]
Epoch 5/20: 100%|██████████| 2003/2003 [1:16:41<00:00, 2.30s/it, mse_loss=37.3]
Epoch 5: Train MSE=52.3550, Test MSE=24.6334, Test RMSE=4.9632
[Saved Best Model]
Epoch 6/20: 100%|██████████| 2003/2003 [1:19:00<00:00, 2.37s/it, mse_loss=58.8]
Epoch 7/20: 0%| | 0/2003 [00:00<?, ?it/s]Epoch 6: Train MSE=50.6629, Test MSE=50.1175, Test RMSE=7.0794
Epoch 7/20: 100%|██████████| 2003/2003 [1:17:14<00:00, 2.31s/it, mse_loss=16.3]
Epoch 7: Train MSE=48.5903, Test MSE=38.5171, Test RMSE=6.2062
Epoch 8/20: 100%|██████████| 2003/2003 [1:19:27<00:00, 2.38s/it, mse_loss=18.1]
Epoch 9/20: 0%| | 0/2003 [00:00<?, ?it/s]Epoch 8: Train MSE=46.0435, Test MSE=1913.6239, Test RMSE=43.7450
Epoch 9/20: 100%|██████████| 2003/2003 [1:19:23<00:00, 2.38s/it, mse_loss=12.8]
Epoch 10/20: 0%| | 0/2003 [00:00<?, ?it/s]Epoch 9: Train MSE=45.0446, Test MSE=59.3143, Test RMSE=7.7016
Epoch 10/20: 100%|██████████| 2003/2003 [1:17:08<00:00, 2.31s/it, mse_loss=46.3]
Epoch 10: Train MSE=43.3956, Test MSE=24.0482, Test RMSE=4.9039
[Saved Best Model]
Epoch 11/20: 100%|██████████| 2003/2003 [1:17:10<00:00, 2.31s/it, mse_loss=40.5]
Epoch 11: Train MSE=43.5014, Test MSE=21.3252, Test RMSE=4.6179
Epoch 12/20: 0%| | 0/2003 [00:00<?, ?it/s] [Saved Best Model]
Epoch 12/20: 100%|██████████| 2003/2003 [1:17:37<00:00, 2.33s/it, mse_loss=23.3]
Epoch 13/20: 0%| | 0/2003 [00:00<?, ?it/s]Epoch 12: Train MSE=41.2209, Test MSE=26.2099, Test RMSE=5.1196
Epoch 13/20: 100%|██████████| 2003/2003 [1:17:12<00:00, 2.31s/it, mse_loss=56.4]
Epoch 13: Train MSE=41.5635, Test MSE=21.1861, Test RMSE=4.6028
[Saved Best Model]
Epoch 14/20: 100%|██████████| 2003/2003 [1:17:29<00:00, 2.32s/it, mse_loss=83.5]
Epoch 15/20: 0%| | 0/2003 [00:00<?, ?it/s]Epoch 14: Train MSE=40.3168, Test MSE=22.0331, Test RMSE=4.6939
Epoch 15/20: 100%|██████████| 2003/2003 [1:17:26<00:00, 2.32s/it, mse_loss=19.2]
Epoch 15: Train MSE=39.6902, Test MSE=23.4658, Test RMSE=4.8442
Epoch 16/20: 100%|██████████| 2003/2003 [1:16:59<00:00, 2.31s/it, mse_loss=29.5]
Epoch 16: Train MSE=40.3435, Test MSE=20.3338, Test RMSE=4.5093
[Saved Best Model]
Epoch 17/20: 100%|██████████| 2003/2003 [1:17:07<00:00, 2.31s/it, mse_loss=61.2]
Epoch 17: Train MSE=39.6885, Test MSE=41.2090, Test RMSE=6.4194
Epoch 18/20: 100%|██████████| 2003/2003 [1:18:01<00:00, 2.34s/it, mse_loss=65.1]
Epoch 19/20: 0%| | 0/2003 [00:00<?, ?it/s]Epoch 18: Train MSE=38.5666, Test MSE=21.0614, Test RMSE=4.5893
Epoch 19/20: 100%|██████████| 2003/2003 [1:18:52<00:00, 2.36s/it, mse_loss=55.3]
Epoch 19: Train MSE=38.0966, Test MSE=19.1094, Test RMSE=4.3714
[Saved Best Model]
Epoch 20/20: 100%|██████████| 2003/2003 [1:16:42<00:00, 2.30s/it, mse_loss=53.4]
Epoch 20: Train MSE=38.0472, Test MSE=20.9054, Test RMSE=4.5722
Training Finished! Best Test MSE: 19.1094
This task demonstrates ISAC Beam Prediction using an expanded 256-DFT Codebook. It includes two steps: label generation and model training.
Step 1: Label Generation
Use the multipath components to calculate the optimal beam index for a 16x16 (256) UPA codebook.
Show Code
import numpy as np
import os
import glob
import pandas as pd
# --- Configuration ---
FC = 4.9e9 # Carrier frequency 4.9 GHz
C = 299792458 # Speed of light
WAVELENGTH = C / FC
ANTENNA_SPACING = WAVELENGTH / 2 # Half-wavelength spacing
# BS Antenna Config (8x8 UPA)
Nx, Ny = 8, 8
NUM_ANTENNAS = Nx * Ny
# Beam Config (Oversampled)
Mx, My = 16, 16 # 16x16 = 256 beams
NUM_BEAMS = Mx * My
def get_upa_steering_vector(theta, phi, N_x, N_y):
"""
Generate UPA steering vector.
theta: Elevation angle (0-180, 0 is zenith)
phi: Azimuth angle (-180-180)
"""
# Radian input
theta_rad = theta
phi_rad = phi
# Antenna index grid
# Array in XY plane
m = np.arange(N_x)
n = np.arange(N_y)
# Spatial frequencies u, v
# u: x-axis phase change
# v: y-axis phase change
u = np.sin(theta_rad) * np.cos(phi_rad)
v = np.sin(theta_rad) * np.sin(phi_rad)
# Generate steering vectors
# a_x shape (Nx, 1), a_y shape (Ny, 1)
a_x = np.exp(1j * 2 * np.pi * 0.5 * m * u)
a_y = np.exp(1j * 2 * np.pi * 0.5 * n * v)
# Kronecker product for full array response
# output shape: (Nx*Ny, )
steering_vector = np.kron(a_x, a_y)
return steering_vector
def build_channel_from_multipath(npz_path):
"""
Read multipath data from .npz and synthesize MISO channel vector h.
"""
data = np.load(npz_path)
# 1. Extract multipath components
# Flatten to avoid shape mismatch
# Complex gains
alphas = (data['a_real'] + 1j * data['a_imag']).flatten()
# Angles (AoD for BS)
thetas_t = data['theta_t'].flatten()
phis_t = data['phi_t'].flatten()
# Delays
taus = data['tau'].flatten()
# 2. Init channel vector h (64,)
h = np.zeros(NUM_ANTENNAS, dtype=complex)
# 3. Sum of Paths
# Assume Rx steering vector is 1 (MISO)
num_paths = len(alphas)
for i in range(num_paths):
# Phase shift (Delay Term)
phase_shift = np.exp(-1j * 2 * np.pi * FC * taus[i])
# Path coefficient
path_coeff = alphas[i] * phase_shift
# Tx Steering Vector
a_t = get_upa_steering_vector(thetas_t[i], phis_t[i], Nx, Ny)
# Accumulate
h += path_coeff * a_t
return h
def create_oversampled_dft_codebook(N, M):
"""
Create Oversampled DFT codebook.
N: Number of antennas
M: Number of beams
Returns matrix of shape (N, M)
"""
n = np.arange(N)
k = np.arange(M)
# DFT vectors: exp(j * 2 * pi * n * k / M)
# Normalized by sqrt(N) to maintain unit power (per antenna element scaling)
# W[n, k]
W = np.exp(1j * 2 * np.pi * np.outer(n, k) / M) / np.sqrt(N)
return W
def create_codebook(N_x, N_y, M_x, M_y):
"""
Create 2D Oversampled DFT codebook.
Codebook size = (Nx * Ny, Mx * My)
"""
# X-dim DFT matrix (Nx antennas, Mx beams)
Fx = create_oversampled_dft_codebook(N_x, M_x)
# Y-dim DFT matrix (Ny antennas, My beams)
Fy = create_oversampled_dft_codebook(N_y, M_y)
# 2D-DFT Codebook via Kronecker product
# Codebook shape: (Nx*Ny, Mx*My)
# Each column is a beam codeword
Codebook = np.kron(Fx, Fy)
return Codebook
def get_optimal_beam_label(h, codebook):
"""
Calculate optimal beam index.
h: Channel vector (Num_Antennas, )
codebook: Codebook matrix (Num_Antennas, Num_Beams)
"""
# 1. Beamforming: Calculate signal strength
# Transpose conjugate
# received_signals shape: (Num_Beams, )
received_signals = codebook.conj().T @ h
# 2. Calculate power
powers = np.abs(received_signals) ** 2
# 3. Find index of max power
best_beam_idx = np.argmax(powers)
return best_beam_idx, powers
# --- Batch Processing ---
if __name__ == "__main__":
# Paths
INPUT_DIR = r"D:\multi_path_npz"
# Save results to script dir
OUTPUT_CSV = os.path.join(os.path.dirname(os.path.abspath(__file__)), "beam_labels.csv")
# 1. Generate Codebook
print(f"Generating Oversampled DFT Codebook ({Mx}x{My} = {NUM_BEAMS} beams)...")
try:
W = create_codebook(Nx, Ny, Mx, My)
print(f"Codebook shape: {W.shape}")
# 2. Get all .npz files
pattern = os.path.join(INPUT_DIR, "csi_*.npz")
files = sorted(glob.glob(pattern))
print(f"Found {len(files)} data files.")
results = []
# 3. Batch processing
print("Starting batch processing...")
for idx, file_path in enumerate(files):
try:
# Parse filename
filename = os.path.basename(file_path)
# Generate channel
h_channel = build_channel_from_multipath(file_path)
# Get label
label, power_vectors = get_optimal_beam_label(h_channel, W)
max_power = power_vectors[label]
results.append({
'filename': filename,
'beam_index': label,
'received_power': max_power
})
# Log progress every 100 files
if (idx + 1) % 100 == 0:
print(f"Processed {idx + 1}/{len(files)} files...")
except Exception as e:
print(f"Error processing {filename}: {e}")
# 4. Save results
if results:
df = pd.DataFrame(results)
df.to_csv(OUTPUT_CSV, index=False)
print(f"\nFinished! Results saved to: {OUTPUT_CSV}")
print("First 5 rows:")
print(df.head())
else:
print("No results generated. Please check the paths.")
except Exception as e:
print(f"An error occurred: {e}")
Step 2: Ablation Study (RGB vs Depth vs Fusion)
Design an ISAC_MobileNet to predict the optimal beam index (0-255). The script compares performance using RGB-only, Depth-only, and Fusion inputs.
Show Code
import torch
import torch.nn as nn
from torchvision import models, transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import pandas as pd
import os
import numpy as np
# --- 1. Network Model Design (Based on MobileNetV2) ---
class ISAC_MobileNet(nn.Module):
def __init__(self, num_classes=256, in_channels=4, pretrained=False):
super(ISAC_MobileNet, self).__init__()
# Load MobileNetV2
# pretrained=False: Random initialization, no ImageNet weights
self.backbone = models.mobilenet_v2(weights=None)
# --- Modify Input Layer ---
# Modify first layer to accept custom channels (RGB+D=4, RGB=3, Depth=1)
original_first_layer = self.backbone.features[0][0]
# Create new conv layer with specified input channels
new_first_layer = nn.Conv2d(
in_channels=in_channels,
out_channels=original_first_layer.out_channels,
kernel_size=original_first_layer.kernel_size,
stride=original_first_layer.stride,
padding=original_first_layer.padding,
bias=False
)
# Replace the first layer
self.backbone.features[0][0] = new_first_layer
# --- Modify Output Layer (Classifier) ---
# MobileNetV2 classifier is classifier[1]
in_features = self.backbone.classifier[1].in_features
self.backbone.classifier[1] = nn.Linear(in_features, num_classes)
def forward(self, x):
return self.backbone(x)
# --- 2. Data Loader Design ---
class RGBDBeamDataset(Dataset):
def __init__(self, csv_file, rgb_dir, depth_dir, transform=None):
"""
csv_file: path to beam_labels.csv generated by generateH.py
rgb_dir: path to RGB image directory
depth_dir: path to depth map directory
"""
self.df = pd.read_csv(csv_file)
self.rgb_dir = rgb_dir
self.depth_dir = depth_dir
# Default preprocessing
if transform is None:
self.transform = transforms.Compose([
transforms.Resize((512, 512)), # Resize to 512x512
transforms.ToTensor(), # ToTensor (normalizes to [0,1])
])
else:
self.transform = transform
# RGB Normalization (ImageNet stats)
self.rgb_normalize = transforms.Normalize(
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]
)
def __len__(self):
return len(self.df)
def __getitem__(self, idx):
# 1. Get filename and label
row = self.df.iloc[idx]
npz_name = row['filename'] # e.g., csi_000000.npz
label = int(row['beam_index'])
# 2. Construct filenames
# e.g., csi_000000.npz -> 000000
file_id = npz_name.replace('csi_', '').replace('.npz', '')
# Modify filename format
rgb_name = f"img_{file_id}.png"
depth_name = f"depth_{file_id}.npz" # Depth in .npz format
rgb_path = os.path.join(self.rgb_dir, rgb_name)
depth_path = os.path.join(self.depth_dir, depth_name)
# 3. Load images
try:
# Load RGB (PIL default)
rgb_img = Image.open(rgb_path).convert('RGB')
# Load Depth (.npz)
with np.load(depth_path) as data:
# Assume single array or unknown key
key = data.files[0]
depth_arr = data[key] # Read as numpy array
# Assume float depth
depth_img = Image.fromarray(depth_arr, mode='F') # mode='F' (32-bit float)
except Exception as e:
print(f"Error reading image: {rgb_path} or {depth_path} : {e}")
# Return dummy data on error
return torch.zeros(4, 512, 512), label
# 4. Preprocessing
# Ensure consistent resize for RGB and Depth
rgb_tensor = self.transform(rgb_img) # (3, 512, 512), range [0, 1]
depth_tensor = self.transform(depth_img) # (1, 512, 512), range [0, 1]
# 5. Normalization
rgb_tensor = self.rgb_normalize(rgb_tensor) # Normalize RGB
# 6. Early Fusion
# Concat channel dim -> (4, 512, 512)
input_tensor = torch.cat([rgb_tensor, depth_tensor], dim=0)
return input_tensor, label
# --- 3. Training/Testing Example ---
if __name__ == "__main__":
from sklearn.model_selection import train_test_split
from tqdm import tqdm
# --- 1. Settings ---
csv_path = r"d:\exp\beam_labels.csv"
rgb_path = r"E:\Datasets1\San Francisco\Scene 1\roof_bs_01\cam"
depth_path = r"E:\Datasets1\San Francisco\Scene 1\roof_bs_01\depth"
BATCH_SIZE = 16
EPOCHS = 10
LEARNING_RATE = 0.001
# Auto device selection
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
# --- 2. Data Splitting ---
# Load full data
full_df = pd.read_csv(csv_path)
# Split train (80%) and test (20%)
train_df, test_df = train_test_split(full_df, test_size=0.2, random_state=42, shuffle=True)
# Save temp files for Dataset
train_df.to_csv('temp_train.csv', index=False)
test_df.to_csv('temp_test.csv', index=False)
train_dataset = RGBDBeamDataset('temp_train.csv', rgb_path, depth_path)
test_dataset = RGBDBeamDataset('temp_test.csv', rgb_path, depth_path)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)
# --- 3. Model, Loss, Optimizer ---
# Instantiate 3 models for ablation study
print("Initializing models for Ablation Study...")
# 1. RGB+Depth (4 channels)
model_all = ISAC_MobileNet(num_classes=256, in_channels=4, pretrained=False).to(device)
opt_all = torch.optim.Adam(model_all.parameters(), lr=LEARNING_RATE)
# 2. RGB Only (3 channels)
model_rgb = ISAC_MobileNet(num_classes=256, in_channels=3, pretrained=False).to(device)
opt_rgb = torch.optim.Adam(model_rgb.parameters(), lr=LEARNING_RATE)
# 3. Depth Only (1 channel)
model_depth = ISAC_MobileNet(num_classes=256, in_channels=1, pretrained=False).to(device)
opt_depth = torch.optim.Adam(model_depth.parameters(), lr=LEARNING_RATE)
criterion = nn.CrossEntropyLoss()
# --- 4. Training Loop ---
print("\nStarting Parallel Training for 3 Models...")
best_acc_all = 0.0
best_acc_rgb = 0.0
best_acc_depth = 0.0
for epoch in range(EPOCHS):
model_all.train()
model_rgb.train()
model_depth.train()
# Statistics for 3 models
stats = {
'all': {'loss': 0.0, 'correct': 0},
'rgb': {'loss': 0.0, 'correct': 0},
'depth': {'loss': 0.0, 'correct': 0},
'total': 0
}
loop = tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS}")
for inputs, labels in loop:
# Prepare inputs on GPU
inputs = inputs.to(device)
labels = labels.to(device)
# Split inputs
# inputs is [B, 4, H, W] -> RGB=[B, :3, ...], Depth=[B, 3:, ...]
input_rgb = inputs[:, :3, :, :]
input_depth = inputs[:, 3:, :, :]
# --- Train Model 1 (RGB+D) ---
opt_all.zero_grad()
out_all = model_all(inputs)
loss_all = criterion(out_all, labels)
loss_all.backward()
opt_all.step()
# --- Train Model 2 (RGB) ---
opt_rgb.zero_grad()
out_rgb = model_rgb(input_rgb)
loss_rgb = criterion(out_rgb, labels)
loss_rgb.backward()
opt_rgb.step()
# --- Train Model 3 (Depth) ---
opt_depth.zero_grad()
out_depth = model_depth(input_depth)
loss_depth = criterion(out_depth, labels)
loss_depth.backward()
opt_depth.step()
# --- Stats Update ---
stats['total'] += labels.size(0)
# RGB+D
stats['all']['loss'] += loss_all.item()
_, pred_all = torch.max(out_all.data, 1)
stats['all']['correct'] += (pred_all == labels).sum().item()
# RGB
stats['rgb']['loss'] += loss_rgb.item()
_, pred_rgb = torch.max(out_rgb.data, 1)
stats['rgb']['correct'] += (pred_rgb == labels).sum().item()
# Depth
stats['depth']['loss'] += loss_depth.item()
_, pred_depth = torch.max(out_depth.data, 1)
stats['depth']['correct'] += (pred_depth == labels).sum().item()
loop.set_postfix({
'L_All': f"{loss_all.item():.2f}",
'L_RGB': f"{loss_rgb.item():.2f}",
'L_Dep': f"{loss_depth.item():.2f}"
})
# Calculate epoch metrics
num_batches = len(train_loader)
total_samples = stats['total']
train_res = {}
for k in ['all', 'rgb', 'depth']:
train_res[k] = {
'loss': stats[k]['loss'] / num_batches,
'acc': stats[k]['correct'] / total_samples
}
# --- 5. Testing/Validation ---
model_all.eval()
model_rgb.eval()
model_depth.eval()
test_stats = {
'all': {'correct': 0, 'top5': 0},
'rgb': {'correct': 0, 'top5': 0},
'depth': {'correct': 0, 'top5': 0},
'total': 0
}
with torch.no_grad():
for inputs, labels in test_loader:
inputs = inputs.to(device)
labels = labels.to(device)
input_rgb = inputs[:, :3, :, :]
input_depth = inputs[:, 3:, :, :]
test_stats['total'] += labels.size(0)
# Helper function for eval
def eval_batch(model, x, key):
out = model(x)
# Top 1
_, pred = torch.max(out.data, 1)
test_stats[key]['correct'] += (pred == labels).sum().item()
# Top 5
_, top5 = out.topk(5, 1, True, True)
top5 = top5.t()
correct_row = top5.eq(labels.view(1, -1).expand_as(top5))
test_stats[key]['top5'] += correct_row.reshape(-1).float().sum().item()
eval_batch(model_all, inputs, 'all')
eval_batch(model_rgb, input_rgb, 'rgb')
eval_batch(model_depth, input_depth, 'depth')
# Print Results
print(f"\nEpoch {epoch+1} Results:")
print(f" [RGB+D] Train Loss: {train_res['all']['loss']:.4f}, Acc: {train_res['all']['acc']:.4f} | "
f"Test Acc: {test_stats['all']['correct']/test_stats['total']:.4f}, Top5: {test_stats['all']['top5']/test_stats['total']:.4f}")
print(f" [RGB ] Train Loss: {train_res['rgb']['loss']:.4f}, Acc: {train_res['rgb']['acc']:.4f} | "
f"Test Acc: {test_stats['rgb']['correct']/test_stats['total']:.4f}, Top5: {test_stats['rgb']['top5']/test_stats['total']:.4f}")
print(f" [Depth] Train Loss: {train_res['depth']['loss']:.4f}, Acc: {train_res['depth']['acc']:.4f} | "
f"Test Acc: {test_stats['depth']['correct']/test_stats['total']:.4f}, Top5: {test_stats['depth']['top5']/test_stats['total']:.4f}")
# Save Checkpoints
current_acc_all = test_stats['all']['correct']/test_stats['total']
current_acc_rgb = test_stats['rgb']['correct']/test_stats['total']
current_acc_depth = test_stats['depth']['correct']/test_stats['total']
if current_acc_all > best_acc_all:
best_acc_all = current_acc_all
torch.save(model_all.state_dict(), "best_model_rgbd.pth")
if current_acc_rgb > best_acc_rgb:
best_acc_rgb = current_acc_rgb
torch.save(model_rgb.state_dict(), "best_model_rgb.pth")
if current_acc_depth > best_acc_depth:
best_acc_depth = current_acc_depth
torch.save(model_depth.state_dict(), "best_model_depth.pth")
print(f"\nTraining Finished!")
print(f"Best Acc - RGB+D: {best_acc_all:.4f}, RGB: {best_acc_rgb:.4f}, Depth: {best_acc_depth:.4f}")
# Clean temp files
if os.path.exists('temp_train.csv'): os.remove('temp_train.csv')
if os.path.exists('temp_test.csv'): os.remove('temp_test.csv')
Step 3: Experimental Results
After a parallel training ablation study over 10 epochs using the San Francisco (Scene 3) dataset, results demonstrate that Depth maps (geometric structure) are significantly more effective for beam prediction than RGB images in low-altitude base station-to-UAV communication.
Show Full Training Logs
Starting Parallel Training for 3 Models...
Epoch 1/10: 100%|██████████| 2003/2003 [1:27:43<00:00, 2.63s/it, L_All=1.44, L_RGB=2.33, L_Dep=1.23]
Epoch 1 Results:
[RGB+D] Train Loss: 1.5566, Acc: 0.4908 | Test Acc: 0.1020, Top5: 0.3739
[RGB ] Train Loss: 2.5704, Acc: 0.2164 | Test Acc: 0.2200, Top5: 0.6700
[Depth] Train Loss: 1.5120, Acc: 0.5069 | Test Acc: 0.1503, Top5: 0.5386
Epoch 2/10: 100%|██████████| 2003/2003 [1:16:58<00:00, 2.31s/it, L_All=0.67, L_RGB=2.21, L_Dep=1.09]
Epoch 2 Results:
[RGB+D] Train Loss: 0.7116, Acc: 0.7756 | Test Acc: 0.7205, Top5: 0.9236
[RGB ] Train Loss: 2.3355, Acc: 0.2583 | Test Acc: 0.2953, Top5: 0.7357
[Depth] Train Loss: 0.8193, Acc: 0.7398 | Test Acc: 0.7637, Top5: 0.9032
Epoch 3/10: 100%|██████████| 2003/2003 [1:15:57<00:00, 2.28s/it, L_All=0.64, L_RGB=2.34, L_Dep=0.29]
Epoch 3 Results:
[RGB+D] Train Loss: 0.4532, Acc: 0.8547 | Test Acc: 0.1600, Top5: 0.2278
[RGB ] Train Loss: 1.9412, Acc: 0.3443 | Test Acc: 0.3920, Top5: 0.9171
[Depth] Train Loss: 0.6296, Acc: 0.8060 | Test Acc: 0.8263, Top5: 0.9532
Epoch 4/10: 100%|██████████| 2003/2003 [1:15:56<00:00, 2.27s/it, L_All=0.06, L_RGB=1.11, L_Dep=0.50]
Epoch 4 Results:
[RGB+D] Train Loss: 0.3539, Acc: 0.8837 | Test Acc: 0.5800, Top5: 0.8875
[RGB ] Train Loss: 1.5842, Acc: 0.4114 | Test Acc: 0.4221, Top5: 0.9574
[Depth] Train Loss: 0.5568, Acc: 0.8250 | Test Acc: 0.5573, Top5: 0.8552
Epoch 5/10: 100%|██████████| 2003/2003 [1:15:57<00:00, 2.28s/it, L_All=0.38, L_RGB=0.76, L_Dep=0.59]
Epoch 5 Results:
[RGB+D] Train Loss: 0.2961, Acc: 0.9043 | Test Acc: 0.2723, Top5: 0.6395
[RGB ] Train Loss: 1.3952, Acc: 0.4430 | Test Acc: 0.4412, Top5: 0.9687
[Depth] Train Loss: 0.5185, Acc: 0.8366 | Test Acc: 0.8388, Top5: 0.9563
Epoch 6/10: 100%|██████████| 2003/2003 [1:15:59<00:00, 2.28s/it, L_All=0.17, L_RGB=1.26, L_Dep=0.45]
Epoch 6 Results:
[RGB+D] Train Loss: 0.2500, Acc: 0.9234 | Test Acc: 0.5200, Top5: 0.8578
[RGB ] Train Loss: 1.2897, Acc: 0.4591 | Test Acc: 0.4424, Top5: 0.9829
[Depth] Train Loss: 0.4953, Acc: 0.8416 | Test Acc: 0.8236, Top5: 0.9562
Epoch 7/10: 100%|██████████| 2003/2003 [1:16:05<00:00, 2.28s/it, L_All=0.18, L_RGB=1.28, L_Dep=0.38]
Epoch 7 Results:
[RGB+D] Train Loss: 0.2032, Acc: 0.9385 | Test Acc: 0.6484, Top5: 0.9045
[RGB ] Train Loss: 1.2166, Acc: 0.4632 | Test Acc: 0.4584, Top5: 0.9853
[Depth] Train Loss: 0.4778, Acc: 0.8476 | Test Acc: 0.8471, Top5: 0.9579
Epoch 8/10: 100%|██████████| 2003/2003 [1:15:47<00:00, 2.27s/it, L_All=0.67, L_RGB=2.25, L_Dep=1.68]
Epoch 8 Results:
[RGB+D] Train Loss: 0.1874, Acc: 0.9436 | Test Acc: 0.2829, Top5: 0.5870
[RGB ] Train Loss: 1.1534, Acc: 0.4729 | Test Acc: 0.4563, Top5: 0.9878
[Depth] Train Loss: 0.4628, Acc: 0.8509 | Test Acc: 0.8395, Top5: 0.9574
Epoch 9/10: 100%|██████████| 2003/2003 [1:24:08<00:00, 2.52s/it, L_All=0.07, L_RGB=1.52, L_Dep=0.14]
Epoch 9 Results:
[RGB+D] Train Loss: 0.1677, Acc: 0.9481 | Test Acc: 0.1099, Top5: 0.3260
[RGB ] Train Loss: 1.1056, Acc: 0.4796 | Test Acc: 0.4567, Top5: 0.9916
[Depth] Train Loss: 0.4489, Acc: 0.8524 | Test Acc: 0.7974, Top5: 0.9559
Epoch 10/10: 100%|██████████| 2003/2003 [1:56:59<00:00, 3.50s/it, L_All=0.27, L_RGB=0.69, L_Dep=0.40]
Epoch 10 Results:
[RGB+D] Train Loss: 0.1534, Acc: 0.9517 | Test Acc: 0.3906, Top5: 0.7149
[RGB ] Train Loss: 1.0673, Acc: 0.4847 | Test Acc: 0.4745, Top5: 0.9920
[Depth] Train Loss: 0.4380, Acc: 0.8561 | Test Acc: 0.8468, Top5: 0.9598
Training Finished!
Best Acc - RGB+D: 0.7205, RGB: 0.4745, Depth: 0.8471