Skip to content

Commit

Permalink
add set of tests to more rigorously check future PRs; switch tabs to …
Browse files Browse the repository at this point in the history
…spaces in example; modify README
  • Loading branch information
epierson9 committed Oct 12, 2017
1 parent 2855be9 commit 91aedc5
Show file tree
Hide file tree
Showing 3 changed files with 130 additions and 69 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 29,7 @@ See read_in_real_data_example.py for a example demonstrating how to read in real

ZIFA requires pylab, scipy, numpy, and scikits.learn for full functionality.

Please contact [email protected] with any questions or comments.
Please contact [email protected] with any questions or comments. Prior to issuing pull requests, please confirm that your code passes the tests by running unitTests.py. The tests take about 30 seconds to run.

##Installation

Expand Down
136 changes: 68 additions & 68 deletions example.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,95 7,95 @@


def generateSimulatedDimensionalityReductionData(n_clusters, n, d, k, sigma, decay_coef):
"""
generates data with multiple clusters.
Checked.
"""
mu = 3
range_from_value = .1
"""
generates data with multiple clusters.
Checked.
"""
mu = 3
range_from_value = .1

if n_clusters == 1:
Z = np.random.multivariate_normal(mean=np.zeros([k, ]), cov=np.eye(k), size=n).transpose()
cluster_ids = np.ones([n])
if n_clusters == 1:
Z = np.random.multivariate_normal(mean=np.zeros([k, ]), cov=np.eye(k), size=n).transpose()
cluster_ids = np.ones([n])

else:
Z = np.zeros([k, n])
cluster_ids = np.array([random.choice(range(n_clusters)) for i in range(n)])
else:
Z = np.zeros([k, n])
cluster_ids = np.array([random.choice(range(n_clusters)) for i in range(n)])

for id in list(set(cluster_ids)):
idxs = cluster_ids == id
cluster_mu = (np.random.random([k]) - .5) * 5
Z[:, idxs] = np.random.multivariate_normal(mean=cluster_mu, cov=.05 * np.eye(k), size=idxs.sum()).transpose()
for id in list(set(cluster_ids)):
idxs = cluster_ids == id
cluster_mu = (np.random.random([k]) - .5) * 5
Z[:, idxs] = np.random.multivariate_normal(mean=cluster_mu, cov=.05 * np.eye(k), size=idxs.sum()).transpose()

A = np.random.random([d, k]) - .5
mu = np.array([(np.random.uniform() * range_from_value * 2 (1 - range_from_value)) * mu for i in range(d)])
sigmas = np.array([(np.random.uniform() * range_from_value * 2 (1 - range_from_value)) * sigma for i in range(d)])
noise = np.zeros([d, n])
A = np.random.random([d, k]) - .5
mu = np.array([(np.random.uniform() * range_from_value * 2 (1 - range_from_value)) * mu for i in range(d)])
sigmas = np.array([(np.random.uniform() * range_from_value * 2 (1 - range_from_value)) * sigma for i in range(d)])
noise = np.zeros([d, n])

for j in range(d):
noise[j, :] = mu[j] np.random.normal(loc=0, scale=sigmas[j], size=n)
for j in range(d):
noise[j, :] = mu[j] np.random.normal(loc=0, scale=sigmas[j], size=n)

X = (np.dot(A, Z) noise).transpose()
Y = deepcopy(X)
Y[Y < 0] = 0
rand_matrix = np.random.random(Y.shape)
X = (np.dot(A, Z) noise).transpose()
Y = deepcopy(X)
Y[Y < 0] = 0
rand_matrix = np.random.random(Y.shape)

cutoff = np.exp(-decay_coef * (Y ** 2))
zero_mask = rand_matrix < cutoff
Y[zero_mask] = 0
cutoff = np.exp(-decay_coef * (Y ** 2))
zero_mask = rand_matrix < cutoff
Y[zero_mask] = 0

print('Fraction of zeros: %2.3f; decay coef: %2.3f' % ((Y == 0).mean(), decay_coef))
print('Fraction of zeros: %2.3f; decay coef: %2.3f' % ((Y == 0).mean(), decay_coef))

return X, Y, Z.transpose(), cluster_ids
return X, Y, Z.transpose(), cluster_ids


def testAlgorithm():
import matplotlib.pyplot as plt
import matplotlib.pyplot as plt

random.seed(35)
np.random.seed(32)
random.seed(35)
np.random.seed(32)

n = 200
d = 20
k = 2
sigma = .3
n_clusters = 3
decay_coef = .1
n = 200
d = 20
k = 2
sigma = .3
n_clusters = 3
decay_coef = .1

X, Y, Z, ids = generateSimulatedDimensionalityReductionData(n_clusters, n, d, k, sigma, decay_coef)
X, Y, Z, ids = generateSimulatedDimensionalityReductionData(n_clusters, n, d, k, sigma, decay_coef)

Zhat, params = block_ZIFA.fitModel(Y, k)
colors = ['red', 'blue', 'green']
cluster_ids = sorted(list(set(ids)))
model = FactorAnalysis(n_components=k)
factor_analysis_Zhat = model.fit_transform(Y)
Zhat, params = block_ZIFA.fitModel(Y, k)
colors = ['red', 'blue', 'green']
cluster_ids = sorted(list(set(ids)))
model = FactorAnalysis(n_components=k)
factor_analysis_Zhat = model.fit_transform(Y)

plt.figure(figsize=[15, 5])
plt.figure(figsize=[15, 5])

plt.subplot(131)
for id in cluster_ids:
plt.scatter(Z[ids == id, 0], Z[ids == id, 1], color=colors[id - 1], s=4)
plt.title('True Latent Positions\nFraction of Zeros %2.3f' % (Y == 0).mean())
plt.xlim([-4, 4])
plt.ylim([-4, 4])
plt.subplot(131)
for id in cluster_ids:
plt.scatter(Z[ids == id, 0], Z[ids == id, 1], color=colors[id - 1], s=4)
plt.title('True Latent Positions\nFraction of Zeros %2.3f' % (Y == 0).mean())
plt.xlim([-4, 4])
plt.ylim([-4, 4])

plt.subplot(132)
for id in cluster_ids:
plt.scatter(Zhat[ids == id, 0], Zhat[ids == id, 1], color=colors[id - 1], s=4)
plt.xlim([-4, 4])
plt.ylim([-4, 4])
plt.title('ZIFA Estimated Latent Positions')
# title(titles[method])
plt.subplot(132)
for id in cluster_ids:
plt.scatter(Zhat[ids == id, 0], Zhat[ids == id, 1], color=colors[id - 1], s=4)
plt.xlim([-4, 4])
plt.ylim([-4, 4])
plt.title('ZIFA Estimated Latent Positions')
# title(titles[method])

plt.subplot(133)
for id in cluster_ids:
plt.scatter(factor_analysis_Zhat[ids == id, 0], factor_analysis_Zhat[ids == id, 1], color = colors[id - 1], s = 4)
plt.xlim([-4, 4])
plt.ylim([-4, 4])
plt.title('Factor Analysis Estimated Latent Positions')
plt.subplot(133)
for id in cluster_ids:
plt.scatter(factor_analysis_Zhat[ids == id, 0], factor_analysis_Zhat[ids == id, 1], color = colors[id - 1], s = 4)
plt.xlim([-4, 4])
plt.ylim([-4, 4])
plt.title('Factor Analysis Estimated Latent Positions')

plt.show()
plt.show()


if __name__ == '__main__':
testAlgorithm()
testAlgorithm()
61 changes: 61 additions & 0 deletions unitTests.py
Original file line number Diff line number Diff line change
@@ -0,0 1,61 @@
from __future__ import print_function
from ZIFA import ZIFA, block_ZIFA
import numpy as np
import random
from copy import deepcopy
from sklearn.decomposition import FactorAnalysis
from example import generateSimulatedDimensionalityReductionData
from scipy.stats import pearsonr


def unitTests():
"""
Just test ZIFA and block ZIFA under a variety of conditions to make sure projected dimensions don't change.
"""
random.seed(35)
np.random.seed(32)

n = 200
d = 20
k = 2
sigma = .3
n_clusters = 3
decay_coef = .1

X, Y, Z, ids = generateSimulatedDimensionalityReductionData(n_clusters, n, d, k, sigma, decay_coef)
Zhat, params = ZIFA.fitModel(Y, k)
assert np.allclose(Zhat[-1, :], [ 1.50067515, 0.04742477])
assert np.allclose(params['A'][0, :], [ 0.66884415, -0.17173555])
assert np.allclose(params['decay_coef'], 0.10458794970222711)
assert np.allclose(params['sigmas'][0], 0.30219903)

Zhat, params = block_ZIFA.fitModel(Y, k)
assert np.allclose(Zhat[-1, :], [1.49712162, 0.05823952]) # this is slightly different (though highly correlated) because ZIFA runs one extra half-step of EM
assert np.allclose(params['A'][0, :], [ 0.66884415, -0.17173555])
assert np.allclose(params['decay_coef'], 0.10458794970222711)
assert np.allclose(params['sigmas'][0], 0.30219903)

Zhat, params = block_ZIFA.fitModel(Y, k, n_blocks = 3)
assert np.allclose(Zhat[-1, :], [ 9.84455438e-01, 4.50924335e-02])

n = 50
d = 60
k = 3
sigma = .3
n_clusters = 3
decay_coef = .1

X, Y, Z, ids = generateSimulatedDimensionalityReductionData(n_clusters, n, d, k, sigma, decay_coef)
Zhat, params = block_ZIFA.fitModel(Y, k, n_blocks = 3)
assert np.allclose(Zhat[-1, :], [-1.69609638,-0.5475882, 0.08008015])

X, Y, Z, ids = generateSimulatedDimensionalityReductionData(n_clusters, n, d, k, sigma, decay_coef)
Zhat, params = ZIFA.fitModel(Y, k)
print(Zhat[-1, :])
assert np.allclose(Zhat[-1, :], [-0.63075905, -0.77361427, -0.11544281])


print('Tests passed!')

if __name__ == '__main__':
unitTests()

0 comments on commit 91aedc5

Please sign in to comment.