1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
|
#!/usr/bin/python3
"""Unit tests for the CNVkit library, cnvlib."""
import unittest
import logging
logging.basicConfig(level=logging.ERROR, format="%(message)s")
# unittest/pomegranate 0.10.0: ImportWarning: can't resolve package from
# __spec__ or __package__, falling back on __name__ and __path__
import warnings
warnings.filterwarnings('ignore', category=ImportWarning)
import numpy as np
from skgenome import GenomicArray, tabio
import cnvlib
from cnvlib import (cnary, core, fix, smoothing, vary)
class CNATests(unittest.TestCase):
"""Tests for the CopyNumArray class."""
def test_empty(self):
"""Instantiate from an empty file."""
cnarr = cnvlib.read("formats/empty")
self.assertEqual(len(cnarr), 0)
def test_basic(self):
"""Test basic container functionality and magic methods."""
cna = cnvlib.read('formats/reference-tr.cnn')
# Length
self.assertEqual(len(cna),
linecount('formats/reference-tr.cnn') - 1)
# Equality
same = cnvlib.read('formats/reference-tr.cnn')
self.assertEqual(cna, same)
# Item access
orig = cna[0]
cna[0] = orig
cna[3:4] = cna[3:4]
cna[6:10] = cna[6:10]
self.assertEqual(tuple(cna[0]), tuple(same[0]))
self.assertEqual(cna[3:6], same[3:6])
def test_center_all(self):
"""Test recentering."""
cna = cnvlib.read('formats/reference-tr.cnn')
# Median-centering an already median-centered array -> no change
chr1 = cna.in_range('chr1')
self.assertAlmostEqual(0, np.median(chr1['log2']), places=1)
chr1.center_all()
orig_chr1_cvg = np.median(chr1['log2'])
self.assertAlmostEqual(0, orig_chr1_cvg)
# Median-centering resets a shift away from the median
chr1plus2 = chr1.copy()
chr1plus2['log2'] = 2.0
chr1plus2.center_all()
self.assertAlmostEqual(np.median(chr1plus2['log2']), orig_chr1_cvg)
# Other methods for centering are similar for a CN-neutral chromosome
for method in ("mean", "mode", "biweight"):
cp = chr1.copy()
cp.center_all(method)
self.assertLess(abs(cp['log2'].median() - orig_chr1_cvg), 0.1)
def test_drop_extra_columns(self):
"""Test removal of optional 'gc' column."""
cna = cnvlib.read('formats/reference-tr.cnn')
self.assertIn('gc', cna)
cleaned = cna.drop_extra_columns()
self.assertNotIn('gc', cleaned)
self.assertTrue((cleaned['log2'] == cna['log2']).all())
def test_guess_xx(self):
"""Guess chromosomal sex from chrX log2 ratio value."""
for (fname, sample_is_f, ref_is_m) in (
("formats/f-on-f.cns", True, False),
("formats/f-on-m.cns", True, True),
("formats/m-on-f.cns", False, False),
("formats/m-on-m.cns", False, True),
("formats/amplicon.cnr", False, True),
("formats/cl_seq.cns", True, True),
("formats/tr95t.cns", True, True),
("formats/reference-tr.cnn", False, False),
):
guess = cnvlib.read(fname).guess_xx(ref_is_m)
self.assertEqual(guess, sample_is_f,
"%s: guessed XX %s but is %s"
% (fname, guess, sample_is_f))
def test_residuals(self):
cnarr = cnvlib.read("formats/amplicon.cnr")
segments = cnvlib.read("formats/amplicon.cns")
regions = GenomicArray(segments.data).drop_extra_columns()
for grouping_arg in (None, segments, regions):
resid = cnarr.residuals(grouping_arg)
self.assertAlmostEqual(0, resid.mean(), delta=.3)
self.assertAlmostEqual(1, np.percentile(resid, 80), delta=.2)
self.assertAlmostEqual(2, resid.std(), delta=.5)
def test_smooth_log2(self):
for fname in ["formats/amplicon.cnr",
"formats/wgs-chr17.cnr",
"formats/p2-20_1.cnr",
"formats/p2-20_2.cnr"]:
cnarr = cnvlib.read(fname)
orig_vals = cnarr.log2.values.copy()
signal = cnarr.smooth_log2()
self.assertTrue((orig_vals == cnarr.log2.values).all())
self.assertGreaterEqual(signal.min(), cnarr.log2.min())
self.assertLessEqual(signal.max(), cnarr.log2.max())
class OtherTests(unittest.TestCase):
"""Tests for other functionality."""
def test_fix_edge(self):
"""Test the 'edge' bias correction calculations."""
# NB: With no gap, gain and loss should balance out
# Wide target, no secondary corrections triggered
insert_size = 250
gap_size = np.zeros(1) # Adjacent
target_size = np.array([600])
loss = fix.edge_losses(target_size, insert_size)
gain = fix.edge_gains(target_size, gap_size, insert_size)
gain *= 2 # Same on the other side
self.assertAlmostEqual(loss, gain)
# Trigger 'loss' correction (target_size < 2 * insert_size)
target_size = np.array([450])
self.assertAlmostEqual(
fix.edge_losses(target_size, insert_size),
2 * fix.edge_gains(target_size, gap_size, insert_size))
# Trigger 'gain' correction (target_size gap_size < insert_size)
target_size = np.array([300])
self.assertAlmostEqual(fix.edge_losses(target_size, insert_size),
2 * fix.edge_gains(target_size, gap_size, insert_size))
# call
# Test: convert_clonal(x, 1, 2) == convert_diploid(x)
class VATests(unittest.TestCase):
"""Tests for the VariantArray class."""
def test_read(self):
"""Instantiate from a VCF file."""
variants = tabio.read("formats/na12878_na12882_mix.vcf", "vcf")
self.assertGreater(len(variants), 0)
# == helpers ==
def linecount(filename):
i = -1
with open(filename) as handle:
for i, _line in enumerate(handle):
pass
return i 1
if __name__ == '__main__':
unittest.main(verbosity=2)
|