Skip to content

Commit

Permalink
Merge pull request #1 from Yue-Jiang/python3
Browse files Browse the repository at this point in the history
try to make ZIFA python3 compatible
  • Loading branch information
Yue-Jiang committed May 28, 2016
2 parents 627f779 3308177 commit eeeb170
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 17 deletions.
8 changes: 4 additions & 4 deletions ZIFA/ZIFA.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,9 360,9 @@ def fitModel(Y, K, singleSigma = False):
"""
N, D = Y.shape
if D > 2000:
print 'Warning: this dataset has a large number of genes. If ZIFA takes too long to run, try using block_ZIFA.py instead'
print('Warning: this dataset has a large number of genes. If ZIFA takes too long to run, try using block_ZIFA.py instead')
testInputData(Y)
print 'Running zero-inflated factor analysis with N = %i, D = %i, K = %i' % (N, D, K)
print('Running zero-inflated factor analysis with N = %i, D = %i, K = %i' % (N, D, K))
#initialize the parameters
np.random.seed(23)
A, mus, sigmas, decay_coef = initializeParams(Y, K, singleSigma = singleSigma)
Expand All @@ -388,10 388,10 @@ def fitModel(Y, K, singleSigma = False):
sigmas = new_sigmas
decay_coef = new_decay_coef
if paramsNotChanging:
print 'Param change below threshold %2.3e after %i iterations' % (param_change_thresh, n_iter)
print('Param change below threshold %2.3e after %i iterations' % (param_change_thresh, n_iter))
break
if n_iter >= max_iter:
print 'Maximum number of iterations reached; terminating loop'
print('Maximum number of iterations reached; terminating loop')
n_iter = 1
EZ, EZZT, EX, EXZ, EX2 = Estep(Y, A, mus, sigmas, decay_coef)
params = {'A':A, 'mus':mus, 'sigmas':sigmas, 'decay_coef':decay_coef}
Expand Down
24 changes: 12 additions & 12 deletions ZIFA/block_ZIFA.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 355,7 @@ def generateIndices(n_blocks, N, D):
y_indices_to_use[i][j] is the indices of block j in sample i.
"""
y_indices_to_use = []
idxs = range(D)
idxs = list(range(D))
n_in_block = int(1.*D / n_blocks)
for i in range(N):
partition = []
Expand All @@ -372,7 372,7 @@ def generateIndices(n_blocks, N, D):
n_added = len(idxs_in_block)
y_indices_to_use.append(partition)
if i == 0:
print 'Block sizes', [len(a) for a in partition]
print('Block sizes', [len(a) for a in partition])
assert(n_added == D)
return y_indices_to_use

Expand Down Expand Up @@ -419,20 419,20 @@ def testInputData(Y):
zero_fracs = Y_is_zero.mean(axis = 0)
column_is_all_zero = zero_fracs == 1.
if column_is_all_zero.sum() > 0:
print "Warning: Your Y matrix has %i columns which are entirely zero; filtering these out before continuing." % (column_is_all_zero.sum())
print("Warning: Your Y matrix has %i columns which are entirely zero; filtering these out before continuing." % (column_is_all_zero.sum()))
Y = Y[:, ~column_is_all_zero]
elif (zero_fracs > .9).sum() > 0:
print 'Warning: your Y matrix contains genes which are frequently zero. If the algorithm fails to converge, try filtering out genes which are zero more than 80 - 90% of the time, or using standard ZIFA.'
print('Warning: your Y matrix contains genes which are frequently zero. If the algorithm fails to converge, try filtering out genes which are zero more than 80 - 90% of the time, or using standard ZIFA.')
return Y

def runEMAlgorithm(Y, K, singleSigma = False, n_blocks = None):
Y = testInputData(Y)
N, D = Y.shape
if n_blocks is None:
n_blocks = max(1, D / 500)
print 'Number of blocks has been set to', n_blocks
print('Number of blocks has been set to', n_blocks)

print 'Running block zero-inflated factor analysis with N = %i, D = %i, K = %i, n_blocks = %i' % (N, D, K, n_blocks)
print('Running block zero-inflated factor analysis with N = %i, D = %i, K = %i, n_blocks = %i' % (N, D, K, n_blocks))
#generate blocks.
y_indices_to_use = generateIndices(n_blocks, N, D)

Expand Down Expand Up @@ -473,7 473,7 @@ def runEMAlgorithm(Y, K, singleSigma = False, n_blocks = None):
try:
checkNoNans([EZ, EZZT, EX, EXZ, EX2, new_A, new_mus, new_sigmas, new_decay_coef])
except:
print "Error: algorithm failed to converge. Usual solutions to this problem: filtering out genes which are zero more than 80 - 90% of the time, or using standard ZIFA. Automatically retrying ZIFA when filtering out genes."
print("Error: algorithm failed to converge. Usual solutions to this problem: filtering out genes which are zero more than 80 - 90% of the time, or using standard ZIFA. Automatically retrying ZIFA when filtering out genes.")
return None

paramsNotChanging = True
Expand All @@ -489,10 489,10 @@ def runEMAlgorithm(Y, K, singleSigma = False, n_blocks = None):
sigmas = new_sigmas
decay_coef = new_decay_coef
if paramsNotChanging:
print 'Param change below threshold %2.3e after %i iterations' % (param_change_thresh, n_iter)
print('Param change below threshold %2.3e after %i iterations' % (param_change_thresh, n_iter))
break
if n_iter >= max_iter:
print 'Maximum number of iterations reached; terminating loop'
print('Maximum number of iterations reached; terminating loop')
n_iter = 1
params = {'A':A, 'mus':mus, 'sigmas':sigmas, 'decay_coef':decay_coef, 'X':EX}
return EZ, params
Expand All @@ -509,17 509,17 @@ def fitModel(Y, K, singleSigma = False, n_blocks = None, p0_thresh = .95):
params: a dictionary of model parameters. Throughout, we refer to lambda as "decay_coef".
"""
assert(p0_thresh >= 0 and p0_thresh <= 1)
print 'Filtering out all genes which are zero in more than %2.1f%% of samples. To change this, change p0_thresh.' % (p0_thresh * 100)
print('Filtering out all genes which are zero in more than %2.1f%% of samples. To change this, change p0_thresh.' % (p0_thresh * 100))
Y = Y[:, (np.abs(Y) < 1e-6).mean(axis = 0) <= p0_thresh]
results = runEMAlgorithm(Y, K, singleSigma = singleSigma, n_blocks = n_blocks)

while results is None:
Y_is_zero = np.abs(Y) < 1e-6
max_zero_frac = Y_is_zero.mean(axis = 0).max()
new_max_zero_frac = max_zero_frac * .95
print 'Previously, maximum fraction of zeros for a gene was %2.3f; now lowering that to %2.3f and rerunning ZIFA' % (max_zero_frac, new_max_zero_frac)
print('Previously, maximum fraction of zeros for a gene was %2.3f; now lowering that to %2.3f and rerunning ZIFA' % (max_zero_frac, new_max_zero_frac))
Y = Y[:, Y_is_zero.mean(axis = 0) < new_max_zero_frac]
print 'After filtering out genes with too many zeros, %i samples and %i genes' % Y.shape
print('After filtering out genes with too many zeros, %i samples and %i genes' % Y.shape)
results = runEMAlgorithm(Y, K, singleSigma = singleSigma, n_blocks = n_blocks)
return results

Expand Down
2 changes: 1 addition & 1 deletion example.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 38,7 @@ def generateSimulatedDimensionalityReductionData(n_clusters, n, d, k, sigma, dec
cutoff = np.exp(-decay_coef * (Y ** 2))
zero_mask = rand_matrix < cutoff
Y[zero_mask] = 0
print 'Fraction of zeros: %2.3f; decay coef: %2.3f' % ((Y == 0).mean(), decay_coef)
print('Fraction of zeros: %2.3f; decay coef: %2.3f' % ((Y == 0).mean(), decay_coef))

return X, Y, Z.transpose(), cluster_ids

Expand Down

0 comments on commit eeeb170

Please sign in to comment.