/* @(#)biased.c 1.2 (QUALCOMM) 03/19/03 */ /* Tell whether observed data is biased or not */ #include #include #include #include /* being balanced isn't interesting until you have lots of bits; * P(exactly 5000 out of 10000) ~= .008) */ #define BAL 10000 struct { double limit; /* double ended test probability */ int report; char *message; } sigtable[] = { { 6, 3, "certainly non-uniform (>6sigma, z = %g)\n" }, { 3.2908, 2, "almost certainly non-uniform (>99.9%%)\n" }, { 2.5762, 2, "likely non-uniform (>99%%)\n" }, { 1.9604, 1, "might be non-uniform (>95%%)\n" }, { -1, 0, "not statistically significant\n" }, }; void printbit(double nbits, double nones) { double p, z; int i, balanced; balanced = (nbits == (nones * 2)); p = (double)nones / nbits; z = 2.0 * sqrt((double)nbits) * (p - 0.5); /* find category */ for (i = 0; sigtable[i].report; ++i) if (abs(z) > sigtable[i].limit) break; printf("%.20g/%.20g %-7g ", nones, nbits, p); if (balanced && nbits > BAL) printf("balanced\n"); else if (sigtable[i].report > 2) printf(sigtable[i].message, z); else printf(sigtable[i].message); } int main(int ac, char **av) { register double nbits = 0, nones = 0; char *endptr; char *myname; myname = av[0]; if (ac != 3) { usage: fprintf(stderr, "usage: %s samples bias/proportion/ones\n", myname); return 1; } nbits = strtod(av[1], &endptr); if (*endptr != '\0') { fprintf(stderr, "'%s' was expected to be total number of items\n", av[1]); goto usage; } if (nbits < 50) fprintf(stderr, "%s: Warning: not enough data for accurate confidence intervals\n", myname); nones = strtod(av[2], &endptr); if (*endptr != '\0' || nones < 0 || nones > nbits) { fprintf(stderr, "'%s' was expected to be bias/proportion/ones\n", av[2]); goto usage; } /* adjust "nones" according to what's come in... */ if (nones < 0.2) /* assume it's a "bias" */ nones = nbits * 0.5 * (1 - nones); else if (nones <= 1) /* assume it's a proportion */ nones = nbits * nones; printbit(nbits, nones); return 0; }