|
1 | 1 | from django.utils.unittest import TestCase
|
2 | 2 |
|
3 |
| -from scipy.stats import mannwhitneyu as scipy_mann_whitney |
4 | 3 | from experiments.significance import mann_whitney
|
5 | 4 |
|
| 5 | +# The hardcoded p and u values in these tests were calculated using scipy |
6 | 6 | class MannWhitneyTestCase(TestCase):
|
7 |
| - def frequencies_to_list(self, frequencies): |
8 |
| - entries = [] |
9 |
| - for entry,count in frequencies.items(): |
10 |
| - entries.extend([entry] * count) |
11 |
| - return entries |
| 7 | + longMessage = True |
12 | 8 |
|
13 | 9 | def test_empty_sets(self):
|
14 | 10 | mann_whitney(dict(), dict())
|
15 | 11 |
|
16 | 12 | def test_identical_ranges(self):
|
17 |
| - distribution = dict((x,1) for x in range(50)) |
18 |
| - self.assertMatchesSciPy(distribution, distribution) |
| 13 | + distribution = dict((x, 1) for x in range(50)) |
| 14 | + self.assertUandPCorrect(distribution, distribution, 1250.0, 0.49862467827855483) |
19 | 15 |
|
20 | 16 | def test_many_repeated_values(self):
|
21 |
| - self.assertMatchesSciPy({0: 100, 1: 50}, {0: 110, 1: 60}) |
| 17 | + self.assertUandPCorrect({0: 100, 1: 50}, {0: 110, 1: 60}, 12500.0, 0.35672951675909859) |
22 | 18 |
|
23 | 19 | def test_large_range(self):
|
24 |
| - distribution_a = dict((x,1) for x in range(10000)) |
25 |
| - distribution_b = dict((x+1,1) for x in range(10000)) |
26 |
| - self.assertMatchesSciPy(distribution_a, distribution_b) |
| 20 | + distribution_a = dict((x, 1) for x in range(10000)) |
| 21 | + distribution_b = dict((x+1, 1) for x in range(10000)) |
| 22 | + self.assertUandPCorrect(distribution_a, distribution_b, 49990000.5, 0.49023014794874586) |
27 | 23 |
|
28 | 24 | def test_very_different_sizes(self):
|
29 |
| - distribution_a = dict((x,1) for x in range(10000)) |
30 |
| - distribution_b = dict((x,1) for x in range(20)) |
31 |
| - self.assertMatchesSciPy(distribution_a, distribution_b) |
| 25 | + distribution_a = dict((x, 1) for x in range(10000)) |
| 26 | + distribution_b = dict((x, 1) for x in range(20)) |
| 27 | + self.assertUandPCorrect(distribution_a, distribution_b, 200.0, 0) |
32 | 28 |
|
33 |
| - def assertMatchesSciPy(self, distribution_a, distribution_b): |
| 29 | + def assertUandPCorrect(self, distribution_a, distribution_b, u, p): |
34 | 30 | our_u, our_p = mann_whitney(distribution_a, distribution_b)
|
35 |
| - correct_u, correct_p = scipy_mann_whitney( |
36 |
| - self.frequencies_to_list(distribution_a), |
37 |
| - self.frequencies_to_list(distribution_b)) |
38 |
| - self.assertEqual(our_u, correct_u, "U score incorrect") |
39 |
| - self.assertAlmostEqual(our_p, correct_p, msg="p value incorrect") |
| 31 | + self.assertEqual(our_u, u, "U score incorrect") |
| 32 | + self.assertAlmostEqual(our_p, p, msg="p value incorrect") |
0 commit comments