// from sal/inc/rtl/math.hxx #include #include #include #include #include #include #include #include using namespace std; class IllegalArgument {}; class ArrayTooBig {}; double getTime() { timeval tv1; gettimeofday( &tv1, NULL ); double fTime = tv1.tv_sec + tv1.tv_usec / 1000000.0; return fTime; } /** Test equality of two values with an accuracy of the magnitude of the given values scaled by 2^-48 (4 bits roundoff stripped). @ATTENTION approxEqual( value!=0.0, 0.0 ) _never_ yields true. */ inline bool approxEqual(double a, double b) { if ( a == b ) return true; double x = a - b; return (x < 0.0 ? -x : x) < ((a < 0.0 ? -a : a) * (1.0 / (16777216.0 * 16777216.0))); } /** floor() method taking approxEqual() into account. Use for expected integer values being calculated by double functions. @ATTENTION The threshhold value is 3.55271e-015 */ inline double approxFloor(double a) { double b = floor( a ); // The second approxEqual() is necessary for values that are near the limit // of numbers representable with 4 bits stripped off. (#i12446#) if ( approxEqual( a - 1.0, b ) && !approxEqual( a, b ) ) return b + 1.0; return b; } // from interpr3.cxx double BinomKoeff(double n, double k) { double nVal = 0.0; k = approxFloor(k); if (n < k) nVal = 0.0; else if (k == 0.0) nVal = 1.0; else { nVal = n/k; n--; k--; while (k > 0.0) { nVal *= n/k; k--; n--; } } return nVal; } double ScHypGeomDist( double N, double M, double n, double x ) { // N n_population // M successes // n n_sample // x X if( (x < 0.0) || (n < x) || (M < x) || (N < n) || (N < M) || (x < n - N + M) ) throw IllegalArgument(); double fFactor = BinomKoeff( n, x ) / BinomKoeff( N, M ) * BinomKoeff( N - n, M - x ); return fFactor; } void putFactorialElements( vector< double >& set, double fLower, double fUpper, double fBase ) { for ( double i = fLower; i <= fUpper; ++i ) { double fVal = fBase - i; if ( fVal > 1.0 ) set.push_back( fVal ); } } double getHypGeomDist( double N, double M, double n, double x ) { // N n_population // M successes // n n_sample // x X if( (x < 0.0) || (n < x) || (M < x) || (N < n) || (N < M) || (x < n - N + M) ) throw IllegalArgument(); typedef vector< double > LongMultiSet; LongMultiSet setEnum, setDenom; size_t nMaxSize = setEnum.max_size(); size_t nReservedSize = static_cast( x + min( n, M ) ); setEnum.reserve( nReservedSize + 10 ); setDenom.reserve( nReservedSize + 10 ); // nMaxSize = 100000; // cout << "max array size: " << nMaxSize << endl; // Trim coefficient C first double nCNumVarUpper = N - n - M + x - 1.0; double nCDenomVarLower = 1.0; if ( N - n - M + x >= M - x + 1.0 ) { nCNumVarUpper = M - x - 1.0; nCDenomVarLower = N - n - 2.0*(M - x) + 1.0; } double nCNumLower = N - n - nCNumVarUpper; double nCDenomUpper = N - n - M + x + 1.0 - nCDenomVarLower; if ( n >= M + 1.0 ) { if ( N - M < n + 1.0 ) { // Case 1 if ( N - n < n + 1.0 ) { // no overlap putFactorialElements( setEnum, 0.0, nCNumVarUpper, N - n ); putFactorialElements( setDenom, 0.0, N - n - 1.0, N ); } else { // overlap assert( nCNumLower < n + 1.0 ); putFactorialElements( setEnum, N - 2.0*n, nCNumVarUpper, N - n ); putFactorialElements( setDenom, 0.0, n - 1.0, N ); } assert( nCDenomUpper <= N - M ); if ( nCDenomUpper < n - x + 1.0 ) // no overlap putFactorialElements( setEnum, 1.0, N - M - n + x, N - M + 1.0 ); else { // overlap putFactorialElements( setEnum, 1.0, N - M - nCDenomUpper, N - M + 1.0 ); nCDenomUpper = n - x; nCDenomVarLower = N - M - 2.0*(n - x) + 1.0; } assert( nCDenomUpper <= M ); if ( nCDenomUpper < x + 1.0 ) { // no overlap putFactorialElements( setEnum, n - M, n - x - 1.0, n ); putFactorialElements( setDenom, nCDenomVarLower, N - n - M + x, N - n - M + x + 1.0 ); } else { putFactorialElements( setEnum, n - M, n - nCDenomUpper - 1.0, n ); putFactorialElements( setDenom, N - n - M + 1.0, N - n - M + x, N - n - M + x + 1.0 ); } } else { // Case 2 if ( n > M - 1.0 ) { // no overlap putFactorialElements( setEnum, 0.0, nCNumVarUpper, N - n ); putFactorialElements( setDenom, 0.0, M - 1.0, N ); } else { putFactorialElements( setEnum, M - n, nCNumVarUpper, N - n ); putFactorialElements( setDenom, 0.0, n - 1.0, N ); } assert( nCDenomUpper <= n ); if ( nCDenomUpper < n - x + 1.0 ) // no overlap putFactorialElements( setEnum, N - M - n + 1.0, N - M - n + x, N - M + 1.0 ); else { putFactorialElements( setEnum, N - M - n + 1.0, N - M - nCDenomUpper, N - M + 1.0 ); nCDenomUpper = n - x; nCDenomVarLower = N - M - 2.0*(n - x) + 1.0; } assert( nCDenomUpper <= M ); if ( nCDenomUpper < x + 1.0 ) { putFactorialElements( setEnum, n - M, n - x - 1.0, n ); putFactorialElements( setDenom, nCDenomVarLower, N - n - M + x, N - n - M + x + 1.0 ); } else { putFactorialElements( setEnum, n - M, n - nCDenomUpper - 1.0, n ); putFactorialElements( setDenom, N - n - M + 1.0, N - n - M + x, N - n - M + x + 1.0 ); } } } else { if ( N - M < M + 1.0 ) { // Case 3 if ( N - n < M + 1.0 ) { // No overlap putFactorialElements( setEnum, 0.0, nCNumVarUpper, N - n ); putFactorialElements( setDenom, 0.0, N - M - 1.0, N ); } else { putFactorialElements( setEnum, N - n - M, nCNumVarUpper, N - n ); putFactorialElements( setDenom, 0.0, n - 1.0, N ); } if ( n - x + 1.0 > nCDenomUpper ) // No overlap putFactorialElements( setEnum, 1.0, N - M - n + x, N - M + 1.0 ); else { // Overlap putFactorialElements( setEnum, 1.0, N - M - nCDenomUpper, N - M + 1.0 ); nCDenomVarLower = N - M - 2.0*(n - x) + 1.0; nCDenomUpper = n - x; } } else { // Case 4 assert( M >= n - x ); assert( M - x <= N - M + 1.0 ); if ( N - n < N - M + 1.0 ) { // No overlap putFactorialElements( setEnum, 0.0, nCNumVarUpper, N - n ); putFactorialElements( setDenom, 0.0, M - 1.0, N ); } else { // Overlap assert( nCNumLower <= N - M + 1.0 ); putFactorialElements( setEnum, M - n, nCNumVarUpper, N - n ); putFactorialElements( setDenom, 0.0, n - 1.0, N ); } if ( n - x + 1.0 > nCDenomUpper ) // No overlap putFactorialElements( setEnum, N - 2.0*M + 1.0, N - M - n + x, N - M + 1.0 ); else if ( M >= nCDenomUpper ) { putFactorialElements( setEnum, N - 2.0*M + 1.0, N - M - nCDenomUpper, N - M + 1.0 ); nCDenomUpper = n - x; nCDenomVarLower = N - M - 2.0*(n - x) + 1.0; } else { assert( M <= nCDenomUpper ); putFactorialElements( setDenom, nCDenomVarLower, N - n - 2.0*M + x, N - n - M + x + 1.0 ); nCDenomUpper = n - x; nCDenomVarLower = N - M - 2.0*(n - x) + 1.0; } } assert( nCDenomUpper <= n ); if ( nCDenomUpper < x + 1.0 ) { // No overlap putFactorialElements( setEnum, 0.0, n - x - 1.0, n ); putFactorialElements( setDenom, nCDenomVarLower, N - n - M + x, N - n - M + x + 1.0 ); } else { // Overlap (good!) putFactorialElements( setEnum, 0.0, n - 1.0 - nCDenomUpper, n ); putFactorialElements( setDenom, N - n - M + 1.0, N - n - M + x, N - n - M + x + 1.0 ); } } double fAns = 1.0; sort( setEnum.begin(), setEnum.end() ); sort( setDenom.begin(), setDenom.end() ); LongMultiSet::reverse_iterator it1 = setEnum.rbegin(), it1End = setEnum.rend(); LongMultiSet::reverse_iterator it2 = setDenom.rbegin(), it2End = setDenom.rend(); // cout << "numerator: " << setEnum.size() << endl; // cout << "denominator: " << setDenom.size() << endl; for ( ; it1 != it1End || it2 != it2End; ) { double fEnum = 1.0, fDenom = 1.0; if ( it1 != it1End ) fEnum = *it1++; if ( it2 != it2End ) fDenom = *it2++; fAns *= fEnum / fDenom; } return fAns; } int main( int argc, char** argv ) { double fPopUpper = 100.0; cout << setprecision( 16 ); if ( argc < 5 ) { // x >= 0.0 && x <= n && x <= M // n <= N // M <= N // x >= n - N + M >= 0 double fMaxDiff = 0.0; unsigned short nCount = 0; unsigned long nTotalCount = 0; for ( double N = 0.0; N < fPopUpper; ++N ) for ( double M = 0.0; M <= N; ++M ) for ( double n = 0.0; n <= N; ++n ) for ( double x = n - N + M; x <= n && x <= M; ++x ) { try { double fOld = ScHypGeomDist( N, M, n, x ); // old method double fNew = getHypGeomDist( N, M, n, x ); // new method double fAbsDiff = fabs( fOld - fNew ); if ( !approxEqual( fOld, fNew ) ) { cerr << "x = " << x << " n = " << n << " M = " << M << " N = " << N << endl; return EXIT_FAILURE; } fMaxDiff = fMaxDiff < fAbsDiff ? fAbsDiff : fMaxDiff; if ( nCount > 20000 ) { nCount = 0; cout << x << " " << n << " " << M << " " << N << "\t"; cout << "max diff: " << fMaxDiff << endl; } ++nCount; ++nTotalCount; } catch( const IllegalArgument& ex ) { } } cout << "maximum difference between old and new methods: " << fMaxDiff << endl; cout << "number of comparisons performed: " << nTotalCount << endl; } else { double N = atof( argv[4] ); // population double M = atof( argv[3] ); // # successes double n = atof( argv[2] ); // # sample double x = atof( argv[1] ); // i = x cout << "population (N) = " << N << endl; cout << " # success (M) = " << M << endl; cout << " # samples (n) = " << n << endl; cout << " x = " << x << endl; try { double f1 = getTime(); double fOld = ScHypGeomDist( N, M, n, x ); double f2 = getTime(); cout << "old: " << fOld << setprecision(4) << " (" << f2 - f1 << " sec)" << endl; f1 = getTime(); double fNew = getHypGeomDist( N, M, n, x ); f2 = getTime(); cout << setprecision(16) << "new: " << fNew << setprecision(4) << " (" << f2 - f1 << " sec)" << endl; } catch( const ArrayTooBig& ex ) { cout << "array too big" << endl; } catch( const IllegalArgument& ex ) { cout << "illegal argument" << endl; } } return EXIT_SUCCESS; }