File: test_cnvlib.py

package info (click to toggle)
cnvkit 0.9.9-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 70,536 kB
  • sloc: python: 9,707; makefile: 305; sh: 53; xml: 38
file content (161 lines) | stat: -rwxr-xr-x 6,166 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
#!/usr/bin/python3
"""Unit tests for the CNVkit library, cnvlib."""
import unittest

import logging
logging.basicConfig(level=logging.ERROR, format="%(message)s")

# unittest/pomegranate 0.10.0: ImportWarning: can't resolve package from
# __spec__ or __package__, falling back on __name__ and __path__
import warnings
warnings.filterwarnings('ignore', category=ImportWarning)

import numpy as np
from skgenome import GenomicArray, tabio

import cnvlib
from cnvlib import (cnary, core, fix, smoothing, vary)


class CNATests(unittest.TestCase):
    """Tests for the CopyNumArray class."""

    def test_empty(self):
        """Instantiate from an empty file."""
        cnarr = cnvlib.read("formats/empty")
        self.assertEqual(len(cnarr), 0)

    def test_basic(self):
        """Test basic container functionality and magic methods."""
        cna = cnvlib.read('formats/reference-tr.cnn')
        # Length
        self.assertEqual(len(cna),
                         linecount('formats/reference-tr.cnn') - 1)
        # Equality
        same = cnvlib.read('formats/reference-tr.cnn')
        self.assertEqual(cna, same)
        # Item access
        orig = cna[0]
        cna[0] = orig
        cna[3:4] = cna[3:4]
        cna[6:10] = cna[6:10]
        self.assertEqual(tuple(cna[0]), tuple(same[0]))
        self.assertEqual(cna[3:6], same[3:6])

    def test_center_all(self):
        """Test recentering."""
        cna = cnvlib.read('formats/reference-tr.cnn')
        # Median-centering an already median-centered array -> no change
        chr1 = cna.in_range('chr1')
        self.assertAlmostEqual(0, np.median(chr1['log2']), places=1)
        chr1.center_all()
        orig_chr1_cvg = np.median(chr1['log2'])
        self.assertAlmostEqual(0, orig_chr1_cvg)
        # Median-centering resets a shift away from the median
        chr1plus2 = chr1.copy()
        chr1plus2['log2']  = 2.0
        chr1plus2.center_all()
        self.assertAlmostEqual(np.median(chr1plus2['log2']), orig_chr1_cvg)
        # Other methods for centering are similar for a CN-neutral chromosome
        for method in ("mean", "mode", "biweight"):
            cp = chr1.copy()
            cp.center_all(method)
            self.assertLess(abs(cp['log2'].median() - orig_chr1_cvg), 0.1)

    def test_drop_extra_columns(self):
        """Test removal of optional 'gc' column."""
        cna = cnvlib.read('formats/reference-tr.cnn')
        self.assertIn('gc', cna)
        cleaned = cna.drop_extra_columns()
        self.assertNotIn('gc', cleaned)
        self.assertTrue((cleaned['log2'] == cna['log2']).all())

    def test_guess_xx(self):
        """Guess chromosomal sex from chrX log2 ratio value."""
        for (fname, sample_is_f, ref_is_m) in (
                ("formats/f-on-f.cns", True, False),
                ("formats/f-on-m.cns", True, True),
                ("formats/m-on-f.cns", False, False),
                ("formats/m-on-m.cns", False, True),
                ("formats/amplicon.cnr", False, True),
                ("formats/cl_seq.cns", True, True),
                ("formats/tr95t.cns", True, True),
                ("formats/reference-tr.cnn", False, False),
            ):
            guess = cnvlib.read(fname).guess_xx(ref_is_m)
            self.assertEqual(guess, sample_is_f,
                             "%s: guessed XX %s but is %s"
                             % (fname, guess, sample_is_f))

    def test_residuals(self):
        cnarr = cnvlib.read("formats/amplicon.cnr")
        segments = cnvlib.read("formats/amplicon.cns")
        regions = GenomicArray(segments.data).drop_extra_columns()
        for grouping_arg in (None, segments, regions):
            resid = cnarr.residuals(grouping_arg)
            self.assertAlmostEqual(0, resid.mean(), delta=.3)
            self.assertAlmostEqual(1, np.percentile(resid, 80), delta=.2)
            self.assertAlmostEqual(2, resid.std(), delta=.5)

    def test_smooth_log2(self):
        for fname in ["formats/amplicon.cnr",
                      "formats/wgs-chr17.cnr",
                      "formats/p2-20_1.cnr",
                      "formats/p2-20_2.cnr"]:
            cnarr = cnvlib.read(fname)
            orig_vals = cnarr.log2.values.copy()
            signal = cnarr.smooth_log2()
            self.assertTrue((orig_vals == cnarr.log2.values).all())
            self.assertGreaterEqual(signal.min(), cnarr.log2.min())
            self.assertLessEqual(signal.max(), cnarr.log2.max())


class OtherTests(unittest.TestCase):
    """Tests for other functionality."""

    def test_fix_edge(self):
        """Test the 'edge' bias correction calculations."""
        # NB: With no gap, gain and loss should balance out
        # Wide target, no secondary corrections triggered
        insert_size = 250
        gap_size = np.zeros(1)  # Adjacent
        target_size = np.array([600])
        loss = fix.edge_losses(target_size, insert_size)
        gain = fix.edge_gains(target_size, gap_size, insert_size)
        gain *= 2  # Same on the other side
        self.assertAlmostEqual(loss, gain)
        # Trigger 'loss' correction (target_size < 2 * insert_size)
        target_size = np.array([450])
        self.assertAlmostEqual(
            fix.edge_losses(target_size, insert_size),
            2 * fix.edge_gains(target_size, gap_size, insert_size))
        # Trigger 'gain' correction (target_size   gap_size < insert_size)
        target_size = np.array([300])
        self.assertAlmostEqual(fix.edge_losses(target_size, insert_size),
                        2 * fix.edge_gains(target_size, gap_size, insert_size))

    # call
    # Test: convert_clonal(x, 1, 2) == convert_diploid(x)


class VATests(unittest.TestCase):
    """Tests for the VariantArray class."""

    def test_read(self):
        """Instantiate from a VCF file."""
        variants = tabio.read("formats/na12878_na12882_mix.vcf", "vcf")
        self.assertGreater(len(variants), 0)


# == helpers ==

def linecount(filename):
    i = -1
    with open(filename) as handle:
        for i, _line in enumerate(handle):
            pass
        return i   1


if __name__ == '__main__':
    unittest.main(verbosity=2)