Source code for pysimrel.pysimrel

from pysimrel.utilities import *
from pysimrel.pysimrel import *
from functools import reduce
from typing import Union
from dataclasses import dataclass, field
import numpy as np

Prm = Union[str, int]

[docs]@dataclass(init=False) class Covariances: """Class defining various covariances of the simulated data This provides a nice graphical output of covariances. :param cov_ww: Covariance matrix of latent components of response :type cov_ww: np.ndarray :param cov_zz: Covariance matrix of latent components of predictors :type cov_zz: np.ndarray :param cov_zw: Covariance matrix containing covariances between latent components of predictors and response :type cov_zw: np.ndarray :param cov_yy: Covariance matrix of response :type cov_yy: np.ndarray :param cov_xx: Covariance matrix of response :type cov_xx: np.ndarray :param cov_xy: Covariance matrix containing covariances between predictors and response :type cov_xy: np.ndarray """ cov_ww: np.ndarray cov_zz: np.ndarray cov_zw: np.ndarray cov_yy: np.ndarray cov_xx: np.ndarray cov_xy: np.ndarray def __repr__(self): ww = self.cov_ww.shape zz = self.cov_zz.shape zw = self.cov_zw.shape wz = self.cov_zw.T.shape yy = self.cov_yy.shape xx = self.cov_xx.shape xy = self.cov_xy.shape yx = self.cov_xy.T.shape return f""" Numpy Arrays: {'+'.ljust(14, '-') + '+'.ljust(20, '-') + '+'} {'|'.ljust(14, ' ') + '|'.ljust(20, ' ') + '|'} {'|' + 'cov_ww'.center(13) + '|' + 'cov_wz'.center(19) + '|'} {'|' + 'cov_yy'.center(13) + '|' + 'cov_xy'.center(19) + '|'} {'|' + str(yy).center(13) + '|' + str(yx).center(19) + '|'} {'|'.ljust(14, ' ') + '|'.ljust(20, ' ') + '|'} {'+'.ljust(14, '-') + '+'.ljust(20, '-') + '+'} {'|'.ljust(14, ' ') + '|'.ljust(20, ' ') + '|'} {'|'.ljust(14, ' ') + '|'.ljust(20, ' ') + '|'} {'|' + 'cov_zw'.center(13) + '|' + 'cov_zz'.center(19) + '|'} {'|' + 'cov_xy'.center(13) + '|' + 'cov_xx'.center(19) + '|'} {'|' + str(xy).center(13) + '|' + str(xx).center(19) + '|'} {'|'.ljust(14, ' ') + '|'.ljust(20, ' ') + '|'} {'|'.ljust(14, ' ') + '|'.ljust(20, ' ') + '|'} {'+'.ljust(14, '-') + '+'.ljust(20, '-') + '+'} """
[docs]@dataclass(init=False) class Properties: """A data class for different properties of simulated object :param eigen_x: Eigenvalues corresponding to predictors :type eigen_x: np.ndarray :param eigen_y: Eigenvalues corresponding to responses :type eigen_y: np.ndarray :param relevant_predictors: Position index of relevant predictors for each responses :type relevant_predictors: np.ndarray :param sigma_latent: Variance-Covariance matrix of latent components of predictors and Responses :type sigma_latent: np.ndarray :param sigma: Variance-Covariance matrix of predictors and Responses :type sigma: np.ndarray :param beta_z: Regression coefficient corresponding to the principal components of predictors :type beta_z: np.ndarray :param beta: Regression coefficient corresponding to the predictor variables :type beta: np.ndarray :param beta0: Regression Intercept :type beta0: np.ndarray :param rsq_w: Coefficient of determination for latent component of responses (Variation explained by latent components of predictors on latent components of response) :type rsq_w: np.ndarray :param rsq: Coefficient of determination for responses (Variation explained by predictors on response) :type rsq: np.ndarray :param minerror: True minimum model error :type minerror: np.ndarray :param rotation_x: Rotation Matrix (eigenvector matrix) corresponding to predictors :type rotation_x: np.ndarray :param rotation_y: Rotation Matrix (eigenvector matrix) corresponding to response :type rotation_y: np.ndarray = None """ eigen_x: np.ndarray eigen_y: np.ndarray relevant_predictors: np.ndarray sigma_latent: np.ndarray sigma: np.ndarray beta_z: np.ndarray beta: np.ndarray beta0: np.ndarray rsq_w: np.ndarray rsq: np.ndarray minerror: np.ndarray rotation_x: np.ndarray rotation_y: np.ndarray = None def __repr__(self): arr_items = {x: y for x, y in self.__dict__.items() if isinstance(y, np.ndarray)} dict_items = {x: y for x, y in self.__dict__.items() if isinstance(y, dict)} out = [] out.append("Numpy Arrays:") out.append("".center(45, "-")) for key, value in arr_items.items(): out.append(f'{str(key) + ":":<25}{"Shape: " + str(value.shape):<20}') out.append("\nDictionaries:") out.append("".center(45, "-")) for key, value in dict_items.items(): out.append(f'{key + ":":<25}{"Keys: " + ", ".join(value.keys()):<20}') return '\n'.join(out)
[docs]@dataclass class Data: X: np.ndarray Y: np.ndarray
[docs]@dataclass class Simrel: """Main Class for simulated objects The class contains all the definitions of `simrel` objects. The class will also provide necessary methods to compute various population properties. >>> sobj = Simrel(n_pred = 10, n_relpred = '4, 5', pos_relcomp = '0, 1; 2, 3, 4', gamma = 0.7, rsq = '0.7, 0.8', n_resp = 4, eta = 0.7, pos_resp = '0, 2; 1, 3') >>> print(sobj.properties) Numpy Arrays: --------------------------------------------- eigen_x: Shape: (10,) eigen_y: Shape: (4,) rotation_x: Shape: (10, 10) rotation_y: Shape: (4, 4) sigma_latent: Shape: (14, 14) sigma: Shape: (14, 14) rsq: Shape: (4, 4) rsq_w: Shape: (4, 4) minerror: Shape: (4, 4) beta_z: Shape: (10, 4) beta: Shape: (10, 4) beta0: Shape: (4,) Dictionaries: --------------------------------------------- relevant_predictors: Keys: rel, irrel >>> print(sobj.covariances) Numpy Arrays: +-------------+-------------------+ | | | | cov_ww | cov_wz | | cov_yy | cov_xy | | (4, 4) | (4, 10) | | | | +-------------+-------------------+ | | | | | | | cov_zw | cov_zz | | cov_xy | cov_xx | | (10, 4) | (10, 10) | | | | | | | +-------------+-------------------+ Attributes ---------- n_pred: Either integer or string Number of predictor variables. Ex: `n_pred: 10` n_relpred: Either integer or string Number of relevant predictor variables for each response components In the case of single response model, the parameters refers to the number of predictors relevant for that single response """ n_pred: Prm = 10 n_relpred: Prm = '4, 5' pos_relcomp: Prm = '0, 1; 2, 3, 4' gamma: float = 0.7 rsq: Prm = '0.7, 0.8' n_resp: Prm = 4 eta: float = 0.7 pos_resp: Prm = '0, 2; 1, 3' mu_x: Prm = None mu_y: Prm = None parameter_parsed: bool = field(default=False, repr=False) properties_computed: bool = field(default=False, repr=False) def __post_init__(self): self.parse_parameters() self.properties = Properties() self.covariances = Covariances() self.compute_properties()
[docs] def parse_parameters(self): """Parse the parameters passed during initialization This method parse the parameters which are passed as string into a nested list. It uses :func:`~pysimrel.parse_parm` function where further documentation can be found. """ self.n_relpred = parse_param(self.n_relpred) self.n_relpred = [x for y in self.n_relpred for x in y] self.pos_relcomp = parse_param(self.pos_relcomp) if self.pos_resp is not None: self.pos_resp = parse_param(self.pos_resp) if isinstance(self.rsq, str): self.rsq = [float(x) for x in self.rsq.replace(" ", "").split(",")] else: self.rsq = [self.rsq] self.parameter_parsed = True
def compute_sigma(self): self.covariances.cov_zz = np.diag(self.properties.eigen_x) self.covariances.cov_ww = np.diag(self.properties.eigen_y) self.covariances.cov_zw = get_cov( self.pos_relcomp, self.rsq, self.properties.eigen_y, self.properties.eigen_x ) self.covariances.cov_yy = reduce( np.dot, [self.properties.rotation_y, self.covariances.cov_ww, np.transpose(self.properties.rotation_y)] ) self.covariances.cov_xx = reduce( np.dot, [self.properties.rotation_x, self.covariances.cov_zz, np.transpose(self.properties.rotation_x)] ) self.covariances.cov_xy = reduce( np.dot, [self.properties.rotation_x, self.covariances.cov_zw.T, self.properties.rotation_y.T] ) self.properties.sigma_latent = np.vstack(( np.hstack((self.covariances.cov_ww, self.covariances.cov_zw)), np.hstack((self.covariances.cov_zw.T, self.covariances.cov_zz)) )) self.properties.sigma = np.vstack(( np.hstack((self.covariances.cov_yy, self.covariances.cov_xy.T)), np.hstack((self.covariances.cov_xy, self.covariances.cov_xx)) )) def compute_rsq(self): var_w = np.diag(1 / np.sqrt(np.diag(self.covariances.cov_ww))) sigma_zw = self.covariances.cov_zw sigma_zinv = np.diag(1 / self.properties.eigen_x) rsq_w = reduce(np.dot, [var_w, sigma_zw, sigma_zinv, sigma_zw.T, var_w]) var_y = np.diag(1 / np.sqrt(np.diag(self.covariances.cov_yy))) rot_y = self.properties.rotation_y rsq = reduce(np.dot, [var_y, rot_y, sigma_zw, sigma_zinv, sigma_zw.T, rot_y.T, var_y]) self.properties.rsq = rsq self.properties.rsq_w = rsq_w return rsq_w, rsq def compute_minerror(self): rot_y = self.properties.rotation_y sigma_w = self.covariances.cov_ww sigma_zw = self.covariances.cov_zw sigma_zinv = np.diag(1 / self.properties.eigen_x) minerror0 = sigma_w - reduce(np.dot, [sigma_zw, sigma_zinv, sigma_zw.T]) minerror = reduce(np.dot, [rot_y.T, minerror0, rot_y]) self.properties.minerror = minerror return minerror def compute_coef(self): sigma_zinv = np.diag(1 / self.properties.eigen_x) beta_z = np.dot(sigma_zinv, self.covariances.cov_zw.T) beta_y = reduce(np.dot, [self.properties.rotation_x, beta_z, self.properties.rotation_y.T]) beta0 = np.zeros((self.n_resp,)) beta0 = beta0 + self.mu_y if self.mu_y is not None else beta0 beta0 = beta0 - np.matmul(beta.T, self.mu_x) if self.mu_x is not None else beta0 self.properties.beta_z = beta_z self.properties.beta = beta_y self.properties.beta0 = beta0 def compute_properties(self): self.properties.eigen_x = get_eigen(self.gamma, self.n_pred) self.properties.eigen_y = get_eigen(self.eta, self.n_resp) self.properties.relevant_predictors = get_relpred( self.n_pred, self.n_relpred, self.pos_relcomp ) self.properties.rotation_x = get_rotation(self.properties.relevant_predictors) self.properties.rotation_y = get_rotation(get_relpred( self.n_resp, [len(x) for x in self.pos_resp], self.pos_resp )) self.compute_sigma() self.compute_rsq() self.compute_minerror() self.compute_coef() self.properties_computed = True def simulate_data(self, nobs, random_state=None): rotate_y = False if self.pos_resp is None else True if isinstance(self.properties.sigma, property): self.compute_sigma() sigma_rot = np.linalg.cholesky(self.properties.sigma) rs = np.random.RandomState(random_state) npred = self.n_pred nresp = self.n_resp train_cal = rs.standard_normal(nobs * (npred + nresp)) train_cal = np.matmul(train_cal.reshape((nobs, nresp + npred)), sigma_rot) z = train_cal[:, range(nresp, nresp + npred)] w = train_cal[:, range(nresp)] x = np.matmul(z, self.properties.rotation_x.T) y = np.matmul(w, self.properties.rotation_y.T) if rotate_y else w x = x + self.mu_x if self.mu_x is not None else x y = y + self.mu_y if self.mu_y is not None else y y = y.flatten() if y.shape[1] == 1 else y out = Data(X=x, Y=y) return out
if __name__ == "__main__": pass