Index: sc/source/core/tool/interpr3.cxx
===================================================================
RCS file: /cvs/sc/sc/source/core/tool/interpr3.cxx,v
retrieving revision 1.12
diff -u -r1.12 interpr3.cxx
--- sc/source/core/tool/interpr3.cxx	15 Nov 2004 16:35:44 -0000	1.12
+++ sc/source/core/tool/interpr3.cxx	17 Sep 2005 02:45:49 -0000
@@ -71,6 +71,8 @@
 #include <stdlib.h>
 #include <string.h>
 #include <math.h>
+#include <vector>
+#include <algorithm>
 
 #include "interpre.hxx"
 #include "global.hxx"
@@ -1107,46 +1109,241 @@
 	}
 }
 
+/** Local function used in the calculation of the hypergeometric distribution.
+ */
+void lcl_PutFactorialElements( ::std::vector< double >& cn, double fLower, double fUpper, double fBase )
+{
+	for ( double i = fLower; i <= fUpper; ++i )
+	{
+		double fVal = fBase - i;
+		if ( fVal > 1.0 )
+			cn.push_back( fVal );
+	}
+}
+
+/** Calculates a value of the hypergeometric distribution.
+
+	The algorithm is designed to avoid unnecessary multiplications and division
+	by expanding all factorial elements (9 of them).  It is done by excluding 
+	those ranges that overlap in the numerator and the denominator.  This allows
+	for a fast calculation for large values which would otherwise cause an overflow
+	in the intermediate values.
+
+	@author Kohei Yoshida <kohei@openoffice.org>
+
+	@see #i47296#
+
+ */
 void ScInterpreter::ScHypGeomDist()
 {
-	if ( MustHaveParamCount( GetByte(), 4 ) )
+	const size_t nMaxArraySize = 500000; // arbitrary max array size
+
+	if ( !MustHaveParamCount( GetByte(), 4 ) )
+		return;
+
+	double N = ::rtl::math::approxFloor(GetDouble());
+	double M = ::rtl::math::approxFloor(GetDouble());
+	double n = ::rtl::math::approxFloor(GetDouble());
+	double x = ::rtl::math::approxFloor(GetDouble());
+	
+	if( (x < 0.0) || (n < x) || (M < x) || (N < n) || (N < M) || (x < n - N + M) )
 	{
-		double N = ::rtl::math::approxFloor(GetDouble());
-		double M = ::rtl::math::approxFloor(GetDouble());
-		double n = ::rtl::math::approxFloor(GetDouble());
-		double x = ::rtl::math::approxFloor(GetDouble());
+		SetIllegalArgument();
+		return;
+	}
+
+	typedef ::std::vector< double > HypContainer;
+	HypContainer cnNumer, cnDenom;	
+
+	size_t nEstContainerSize = static_cast<size_t>( x + ::std::min( n, M ) );
+	size_t nMaxSize = ::std::min( cnNumer.max_size(), nMaxArraySize );
+	if ( nEstContainerSize > nMaxSize )
+	{
+		SetNoValue();
+		return;
+	}
+	cnNumer.reserve( nEstContainerSize + 10 );
+	cnDenom.reserve( nEstContainerSize + 10 );
+
+	// Trim coefficient C first
+	double fCNumVarUpper = N - n - M + x - 1.0;
+	double fCDenomVarLower = 1.0;
+	if ( N - n - M + x >= M - x + 1.0 )
+	{
+		fCNumVarUpper = M - x - 1.0;
+		fCDenomVarLower = N - n - 2.0*(M - x) + 1.0;
+	}
+
+	double fCNumLower = N - n - fCNumVarUpper;
+	double fCDenomUpper = N - n - M + x + 1.0 - fCDenomVarLower;
 
-		if( (x < 0.0) || (n < x) || (M < x) || (N < n) || (N < M) || (x < n - N + M) )
+	double fDNumVarLower = n - M;
+
+	if ( n >= M + 1.0 )
+	{
+		if ( N - M < n + 1.0 )
 		{
-			SetIllegalArgument();
-			return;
+			// Case 1
+
+			if ( N - n < n + 1.0 )
+			{
+				// no overlap
+				lcl_PutFactorialElements( cnNumer, 0.0, fCNumVarUpper, N - n );
+				lcl_PutFactorialElements( cnDenom, 0.0, N - n - 1.0, N );
+			}
+			else
+			{
+				// overlap
+				DBG_ASSERT( fCNumLower < n + 1.0, "ScHypGeomDist: wrong assertion" );
+				lcl_PutFactorialElements( cnNumer, N - 2.0*n, fCNumVarUpper, N - n );
+				lcl_PutFactorialElements( cnDenom, 0.0, n - 1.0, N );
+			}
+
+			DBG_ASSERT( fCDenomUpper <= N - M, "ScHypGeomDist: wrong assertion" );
+
+			if ( fCDenomUpper < n - x + 1.0 )
+				// no overlap
+				lcl_PutFactorialElements( cnNumer, 1.0, N - M - n + x, N - M + 1.0 );
+			else
+			{
+				// overlap
+				lcl_PutFactorialElements( cnNumer, 1.0, N - M - fCDenomUpper, N - M + 1.0 );
+
+				fCDenomUpper = n - x;
+				fCDenomVarLower = N - M - 2.0*(n - x) + 1.0;
+			}
 		}
-		double fFactor =
-			BinomKoeff( n, x ) / BinomKoeff( N, M ) * BinomKoeff( N - n, M - x );
+		else
+		{
+			// Case 2
 
-/*
-		double fFactor;
-		if (x == n - N + M)
-			fFactor = BinomKoeff(M,x)/BinomKoeff(N,n);
+			if ( n > M - 1.0 )
+			{
+				// no overlap
+				lcl_PutFactorialElements( cnNumer, 0.0, fCNumVarUpper, N - n );
+				lcl_PutFactorialElements( cnDenom, 0.0, M - 1.0, N );
+			}
+			else
+			{
+				lcl_PutFactorialElements( cnNumer, M - n, fCNumVarUpper, N - n );
+				lcl_PutFactorialElements( cnDenom, 0.0, n - 1.0, N );
+			}
+
+			DBG_ASSERT( fCDenomUpper <= n, "ScHypGeomDist: wrong assertion" );
+
+			if ( fCDenomUpper < n - x + 1.0 )
+				// no overlap
+				lcl_PutFactorialElements( cnNumer, N - M - n + 1.0, N - M - n + x, N - M + 1.0 );
+			else
+			{
+				lcl_PutFactorialElements( cnNumer, N - M - n + 1.0, N - M - fCDenomUpper, N - M + 1.0 );
+				fCDenomUpper = n - x;
+				fCDenomVarLower = N - M - 2.0*(n - x) + 1.0;
+			}
+		}
+
+		DBG_ASSERT( fCDenomUpper <= M, "ScHypGeomDist: wrong assertion" );
+	}
+	else
+	{
+		if ( N - M < M + 1.0 )
+		{
+			// Case 3
+
+			if ( N - n < M + 1.0 )
+			{
+				// No overlap
+				lcl_PutFactorialElements( cnNumer, 0.0, fCNumVarUpper, N - n );
+				lcl_PutFactorialElements( cnDenom, 0.0, N - M - 1.0, N );
+			}
+			else
+			{
+				lcl_PutFactorialElements( cnNumer, N - n - M, fCNumVarUpper, N - n );
+				lcl_PutFactorialElements( cnDenom, 0.0, n - 1.0, N );
+			}
+
+			if ( n - x + 1.0 > fCDenomUpper )
+				// No overlap
+				lcl_PutFactorialElements( cnNumer, 1.0, N - M - n + x, N - M + 1.0 );
+			else
+			{
+				// Overlap
+				lcl_PutFactorialElements( cnNumer, 1.0, N - M - fCDenomUpper, N - M + 1.0 );
+
+				fCDenomVarLower = N - M - 2.0*(n - x) + 1.0;
+				fCDenomUpper = n - x;
+			}
+		}
 		else
 		{
-			double fIndex = N - M - n;
-			if (fIndex >= 0.0)
+			// Case 4
+
+			DBG_ASSERT( M >= n - x, "ScHypGeomDist: wrong assertion" );
+			DBG_ASSERT( M - x <= N - M + 1.0, "ScHypGeomDist: wrong assertion" );
+
+			if ( N - n < N - M + 1.0 )
 			{
-				fFactor = BinomKoeff(N-M,n)/BinomKoeff(N,n);
-				for (double i = 0; i < x; i++)
-					fFactor *= (M-i)*(n-i)/((i+1.0)*(N-M-n+i+1.0));
+				// No overlap
+				lcl_PutFactorialElements( cnNumer, 0.0, fCNumVarUpper, N - n );
+				lcl_PutFactorialElements( cnDenom, 0.0, M - 1.0, N );
 			}
 			else
 			{
-				fFactor = BinomKoeff(M,-fIndex)/BinomKoeff(N,n);
-				for (double i = -fIndex + 1.0; i < x; i++)
-					fFactor *= (M-i)*(n-i)/((i+1)*(N-M-n+i+1.0));
+				// Overlap
+				DBG_ASSERT( fCNumLower <= N - M + 1.0, "ScHypGeomDist: wrong assertion" );
+
+				lcl_PutFactorialElements( cnNumer, M - n, fCNumVarUpper, N - n );
+				lcl_PutFactorialElements( cnDenom, 0.0, n - 1.0, N );
+			}
+
+			if ( n - x + 1.0 > fCDenomUpper )
+				// No overlap
+				lcl_PutFactorialElements( cnNumer, N - 2.0*M + 1.0, N - M - n + x, N - M + 1.0 );
+			else if ( M >= fCDenomUpper )
+			{
+				lcl_PutFactorialElements( cnNumer, N - 2.0*M + 1.0, N - M - fCDenomUpper, N - M + 1.0 );
+
+				fCDenomUpper = n - x;
+				fCDenomVarLower = N - M - 2.0*(n - x) + 1.0;
+			}
+			else
+			{
+				DBG_ASSERT( M <= fCDenomUpper, "ScHypGeomDist: wrong assertion" );
+				lcl_PutFactorialElements( cnDenom, fCDenomVarLower, N - n - 2.0*M + x, 
+						N - n - M + x + 1.0 );
+
+				fCDenomUpper = n - x;
+				fCDenomVarLower = N - M - 2.0*(n - x) + 1.0;				
 			}
 		}
-*/
-		PushDouble(fFactor);
+
+		DBG_ASSERT( fCDenomUpper <= n, "ScHypGeomDist: wrong assertion" );
+
+		fDNumVarLower = 0.0;
 	}
+
+	double nDNumVarUpper   = fCDenomUpper < x + 1.0 ? n - x - 1.0     : n - fCDenomUpper - 1.0;
+	double nDDenomVarLower = fCDenomUpper < x + 1.0 ? fCDenomVarLower : N - n - M + 1.0;
+	lcl_PutFactorialElements( cnNumer, fDNumVarLower, nDNumVarUpper, n );
+	lcl_PutFactorialElements( cnDenom, nDDenomVarLower, N - n - M + x, N - n - M + x + 1.0 );
+	
+	::std::sort( cnNumer.begin(), cnNumer.end() );
+	::std::sort( cnDenom.begin(), cnDenom.end() );
+	HypContainer::reverse_iterator it1 = cnNumer.rbegin(), it1End = cnNumer.rend();
+	HypContainer::reverse_iterator it2 = cnDenom.rbegin(), it2End = cnDenom.rend();
+
+	double fFactor = 1.0;
+	for ( ; it1 != it1End || it2 != it2End; )
+	{
+		double fEnum = 1.0, fDenom = 1.0;
+		if ( it1 != it1End )
+			fEnum  = *it1++;
+		if ( it2 != it2End )
+			fDenom = *it2++;
+		fFactor *= fEnum / fDenom;
+	}
+
+	PushDouble(fFactor);
 }
 
 void ScInterpreter::ScGammaDist()