Skip to content

Commit 01397b6

Browse files
committed
Quantile normalisation modified using pandas.DataFrame.rank: if found two scores are same then assign the minimum rank to both the score
1 parent 9cd4d46 commit 01397b6

File tree

5 files changed

+24
-20
lines changed

5 files changed

+24
-20
lines changed

data/UCEC_clinical_phenotype.mat

-13.4 KB
Binary file not shown.

data/results_NBS_Hofree_100.mat

-3.8 MB
Binary file not shown.

data/results_NBS_Hofree_1000.mat

-4.03 MB
Binary file not shown.

data/somatic_data_UCEC.mat

-349 KB
Binary file not shown.

stratipy/filtering_diffusion.py

Lines changed: 24 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import sys
22
import numpy as np
33
import scipy.sparse as sp
4+
import pandas as pd
45
from scipy.sparse.linalg import norm
56
from scipy.io import loadmat, savemat
67
from stratipy.nbs_class import Ppi, Patient
@@ -236,8 +237,6 @@ def calcul_final_influence(M, adj, result_folder, influence_weight='min',
236237

237238
else:
238239
if compute:
239-
start = time.time()
240-
241240
# check if influence distance file exists
242241
existance_same_influence = os.path.exists(influence_distance_file)
243242
if existance_same_influence:
@@ -283,8 +282,6 @@ def calcul_final_influence(M, adj, result_folder, influence_weight='min',
283282
else:
284283
final_influence = final_influence_max
285284

286-
end = time.time()
287-
288285
# take most recent file
289286
else:
290287
for x in final_influence_file, influence_distance_directory:
@@ -424,8 +421,7 @@ def filter_ppi_patients(result_folder, influence_weight, simplification, alpha,
424421
return ppi_final, mut_final
425422

426423

427-
# @profile
428-
def quantile_norm_mean(anarray):
424+
def quantile_norm_mean(df):
429425
"""Helper function for propagation_profile
430426
431427
Forces the observations/variables to have identical intensity distribution.
@@ -437,22 +433,30 @@ def quantile_norm_mean(anarray):
437433
-------
438434
439435
"""
440-
A = np.squeeze(np.asarray(anarray.T))
441-
AA = np.zeros_like(A)
442-
I = np.argsort(A, axis=0)
443-
AA[I, np.arange(A.shape[1])] = np.mean(A[I, np.arange(A.shape[1])],
444-
axis=1)[:, np.newaxis]
445-
return AA.T
436+
if not isinstance(df, pd.DataFrame):
437+
df = pd.DataFrame(df)
438+
rank_mean = df.stack().groupby(df.rank(method='first').stack().astype(int)).mean()
439+
df_norm = df.rank(method='min').stack().astype(int).map(rank_mean).unstack()
440+
return df_norm.values
446441

447442

448-
# @profile
449-
def quantile_norm_median(anarray):
450-
A = np.squeeze(np.asarray(anarray.T))
451-
AA = np.zeros_like(A)
452-
I = np.argsort(A, axis=0)
453-
AA[I, np.arange(A.shape[1])] = np.median(A[I, np.arange(A.shape[1])],
454-
axis=1)[:, np.newaxis]
455-
return AA.T
443+
def quantile_norm_median(df):
444+
"""Helper function for propagation_profile
445+
446+
Forces the observations/variables to have identical intensity distribution.
447+
448+
Parameters
449+
----------
450+
451+
Returns
452+
-------
453+
454+
"""
455+
if not isinstance(df, pd.DataFrame):
456+
df = pd.DataFrame(df)
457+
rank_median = df.stack().groupby(df.rank(method='first').stack().astype(int)).median()
458+
df_norm = df.rank(method='min').stack().astype(int).map(rank_median).unstack()
459+
return df_norm.values
456460

457461

458462
def propagation_profile(mut_raw, adj, result_folder, alpha, tol, mut_type):

0 commit comments

Comments
 (0)