Source code for bioneuralnet.network.pysmccnet.math_helpers

import numpy as np
import torch
import warnings
from typing import Union, List, Dict, Any

[docs] def l2n(vec: Union[np.ndarray, list]) -> float: """Computes the L2 norm of a vector; returns 0.05 if norm is zero. Args: vec (np.ndarray | list): Input vector. Returns: float: The L2 norm or 0.05 if zero. """ vec = np.array(vec) a = np.sqrt(np.sum(vec**2)) if a == 0: a = 0.05 return a
[docs] def soft(x: Union[np.ndarray, list, float], d: float) -> Union[np.ndarray, float]: """Applies soft thresholding to input x with threshold d. Args: x (np.ndarray | list | float): Input data. d (float): Threshold value. Returns: np.ndarray | float: Thresholded output; sign(x) * max(0, abs(x) - d). """ x = np.array(x) return np.sign(x) * np.maximum(0, np.abs(x) - d)
[docs] def r_vec_mult_sum(v1: Union[torch.Tensor, np.ndarray, list], v2: Union[torch.Tensor, np.ndarray, list]) -> float: """Computes element-wise multiplication and sum with vector recycling. Args: v1 (torch.Tensor | np.ndarray | list): First input vector. v2 (torch.Tensor | np.ndarray | list): Second input vector. Returns: float: Sum of element-wise product after recycling to matching lengths. """ # ensure 1d tensors if not isinstance(v1, torch.Tensor): v1 = torch.tensor(np.array(v1).flatten(), dtype=torch.float64) else: v1 = v1.flatten() if not isinstance(v2, torch.Tensor): v2 = torch.tensor(np.array(v2).flatten(), dtype=torch.float64) else: v2 = v2.flatten() # device alignment device = v1.device v2 = v2.to(device) n1 = v1.numel() n2 = v2.numel() if n1 == 0 or n2 == 0: return 0.0 target_len = max(n1, n2) # recycling via repeat if n1 < target_len: repeats = (target_len // n1) + 1 v1 = v1.repeat(repeats)[:target_len] if n2 < target_len: repeats = (target_len // n2) + 1 v2 = v2.repeat(repeats)[:target_len] return torch.sum(v1 * v2).item()
[docs] def r_scale_torch(x: Union[torch.Tensor, np.ndarray, list]) -> torch.Tensor: """Pytorch scaling using sample standard deviation. Args: x (torch.Tensor | np.ndarray | list): Input data; converted to float32 tensor if not already. Returns: torch.Tensor: Centered and scaled data tensor. """ # convert to tensor if not isinstance(x, torch.Tensor): x = torch.tensor(x, dtype=torch.float32) # handle 1d arrays if x.ndim == 1: x = x.view(-1, 1) # calculate mean mean = torch.mean(x, dim=0) # calculate std std = torch.std(x, dim=0, unbiased=True) # handle constant columns std[std == 0] = 1.0 return (x - mean) / std
[docs] def r_scale(x: Union[np.ndarray, list]) -> np.ndarray: """Numpy scaling using sample standard deviation. Args: x (np.ndarray | list): Input data matrix. Returns: np.ndarray: Scaled data where each column has mean 0 and sample std 1. """ # convert to float numpy x = np.array(x, dtype=float) # handle 1d arrays if x.ndim == 1: x = x.reshape(-1, 1) # calculate mean mean = np.mean(x, axis=0) # calculate std std = np.std(x, axis=0, ddof=1) # handle constant columns std[std == 0] = 1.0 return (x - mean) / std
def _splsda(x: np.ndarray, y: Union[np.ndarray, list], K: int = 3, eta: float = 0.5, kappa: float = 0.5, scale_x: bool = False) -> Dict[str, Union[np.ndarray, List[int]]]: """Sparse PLS-DA implementation Args: x (np.ndarray): Predictor matrix of shape (n, p). y (np.ndarray | list): Binary response vector (0/1) of shape (n,). K (int): Number of latent components. eta (float): Sparsity parameter (0 to 1); higher values induce more sparsity. kappa (float): Ridge mixing parameter for direction estimation. scale_x (bool): If True, scale x internally before processing. Returns: Dict: Dictionary containing 'T' (scores), 'W' (weights), and 'A' (selected indices). """ n, p = x.shape y_vec = np.array(y).flatten().astype(float) if scale_x: x = r_scale(x) X_res = x.copy() T_scores = np.zeros((n, K)) W_full = np.zeros((p, K)) K_actual = min(K, min(n, p)) for k in range(K_actual): # direction vector if kappa > 0: XtX = X_res.T @ X_res ridge = kappa * np.eye(p) Xty = X_res.T @ y_vec try: w = np.linalg.solve(XtX + ridge, Xty) except np.linalg.LinAlgError: w = X_res.T @ y_vec else: w = X_res.T @ y_vec # normalize w_norm = np.linalg.norm(w) if w_norm > 0: w = w / w_norm # soft thresholding threshold = eta * np.max(np.abs(w)) w_sparse = np.sign(w) * np.maximum(0, np.abs(w) - threshold) # normalize sparse weights w_sparse_norm = np.linalg.norm(w_sparse) if w_sparse_norm > 0: w_sparse = w_sparse / w_sparse_norm # compute scores t = X_res @ w_sparse t_norm_sq = t @ t if t_norm_sq == 0: T_scores[:, k] = 0 W_full[:, k] = 0 continue # deflation p_loading = X_res.T @ t / t_norm_sq X_res = X_res - np.outer(t, p_loading) T_scores[:, k] = t W_full[:, k] = w_sparse # identify selected variables selected_mask = np.any(W_full != 0, axis=1) A = np.where(selected_mask)[0] # fallback for empty selection if len(A) == 0: importance = np.sum(np.abs(W_full), axis=1) n_keep = max(1, int(p * (1 - eta))) A = np.argsort(importance)[-n_keep:] A = np.sort(A) W_selected = W_full[A, :] return { 'T': T_scores, 'W': W_selected, 'A': A }