Source code for tuskitoo.sky_sub.dummy_pca

import numpy as np


[docs] def dummies_pca(array,standar_scaler=False): "#array with shape (samples,componentes/n_features) are prefered" def svd_flip(u, v, u_based_decision=True): """Sign correction to ensure deterministic output from SVD. Adjusts the columns of u and the rows of v such that the loadings in the columns in u that are largest in absolute value are always positive. If u_based_decision is False, then the same sign correction is applied to so that the rows in v that are largest in absolute value are always positive. Parameters ---------- u : ndarray Parameters u and v are the output of `linalg.svd` or :func:`~sklearn.utils.extmath.randomized_svd`, with matching inner dimensions so one can compute `np.dot(u * s, v)`. v : ndarray Parameters u and v are the output of `linalg.svd` or :func:`~sklearn.utils.extmath.randomized_svd`, with matching inner dimensions so one can compute `np.dot(u * s, v)`. The input v should really be called vt to be consistent with scipy's output. u_based_decision : bool, default=True If True, use the columns of u as the basis for sign flipping. Otherwise, use the rows of v. The choice of which variable to base the decision on is generally algorithm dependent. Returns ------- u_adjusted : ndarray Array u with adjusted columns and the same dimensions as u. v_adjusted : ndarray Array v with adjusted rows and the same dimensions as v. """ if u_based_decision: # columns of u, rows of v, or equivalently rows of u.T and v max_abs_u_cols = np.argmax(np.abs(u.T), axis=1) shift = np.arange(u.T.shape[0]) indices = max_abs_u_cols + shift * u.T.shape[1] signs = np.sign(np.take(np.reshape(u.T, (-1,)), indices, axis=0)) u *= signs[np.newaxis, :] v *= signs[:, np.newaxis] else: # rows of v, columns of u max_abs_v_rows = np.argmax(np.abs(v), axis=1) shift = np.arange(v.shape[0]) indices = max_abs_v_rows + shift * v.shape[1] signs = np.sign(np.take(np.reshape(v, (-1,)), indices)) u *= signs[np.newaxis, :] v *= signs[:, np.newaxis] return u, v def standar_scalere(data_array): return ((data_array-np.mean(data_array,axis=0))/np.std(data_array,axis=0)) if standar_scaler: array= standar_scalere(array)#standar_scaler_jax(jnp.array(Sky0)) n_samples = max(array.shape) # number of observations n_components_ = min(array.shape) # n_components_ U, S, Vt = np.linalg.svd(array, full_matrices=False) #what is this = jnp.linalg.svd explained_variance_ = (S**2) / (n_samples - 1) #same as pca.explained_variance_ total_var = np.sum(explained_variance_) explained_variance_ratio_ = explained_variance_/total_var#same as pca.explained_variance_ratio_ # https://github.com/scikit-learn/scikit-learn/blob/main/sklearn/utils/extmath.py U, Vt = svd_flip(U, Vt) ## flip eigenvectors' sign to enforce deterministic output components_ = Vt # same as pca.components_ U = U[:, : n_components_] whiten = False if whiten: U *= np.sqrt(n_samples - 1) else: # # X_new = X * V = U * S * Vt * V = U * S U *= S[: n_components_] #pca.transform(dfx) return {"transform":U,"components_":components_,"std":S,"explained_variance_":explained_variance_,"components_":components_,"explained_variance_ratio_":explained_variance_ratio_}