#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #% Maple Stats Package for MATH 375-6 #% after a package by Z. Karian, updated for #% Maple 8 and later by J. Little #% Last modified: January 12, 2006 #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% with(plots): ############################################### # Some first descriptive statistics ############################################### Range := proc(X) return max(op(X)) - min(op(X)); end proc: Mean := proc(X) local N,Sum; N:=nops(X); Sum:=add(X[i],i=1..N); return Sum/N; end proc: Variance := proc(X) local N,Xbar,i,SSQ; N := nops(X); Xbar:=Mean(X); SSQ := add(X[i]^2,i=1..N); return (SSQ - N*Xbar^2)/(N - 1); end proc: StandardDeviation := proc(X) return sqrt(Variance(X)) end proc: Skewness := proc(X) #normalized 3rd moment about mean local sc, M, N, V, i; N := nops(X); M := Mean(X); V := ((N - 1)*Variance(X))/N; sc := 0; for i to N do sc := sc + (X[i] - M)^3 end do; return sc/(N*V^(3/2)) end proc: Kurtosis := proc(X) #normalized 4th moment about mean local sf, M, N, V, i; M := Mean(X); N := nops(X); V := ((N - 1)*Variance(X))/N; sf := 0; for i to N do sf := sf + (X[i] - M)^4 end do; return sf/(N*V^2) end proc: Percentile := proc(L, p) # # The 100*p-th percentile of the list L # local n, LL, F, r, ab; if p < 0 or 1 < p then ERROR("Second argument must be between 0 and 1") end if; n := nops(L); if p < 1/(n + 1) or n/(n + 1) < p then ERROR("Percentile does not exist") end if; LL := sort(L); F := convert((n + 1)*p, fraction); r := trunc(F); ab := F - r; return LL[r] + ab*(LL[r + 1] - LL[r]); end proc: ###################################################### # Random numbers and samples from given distributions ###################################################### MakeRandom := proc() global _seed; _seed := round(123498765*time()) end proc: MakeRandom(); RandomNumbers := proc(n) local i; return [seq(Float(rand(), -12), i = 1 .. n)] end proc: DieRoll := proc(n,m) #n rolls of a fair m-sided die return [seq(rand(1..m)(),i=1..n)]; end proc: FSample:=proc(CDF,a,b,n) local i,j,Samples,x,sample; Samples:=[]; for i to n do x:=Float(rand(),-12); sample:=[fsolve(CDF(y)=x,y)]; for j to nops(sample) do if (a<=sample[j]) and (sample[j]<=b) then Samples:=[op(Samples),sample[j]]; end if; end do; end do: return Samples; end proc: UniformSample := proc(a,b,n) local i; return [seq((b - a)*Float(rand(),-12) + a, i = 1 .. n)] end proc: NormalSample := proc(mu, sigma, n) #using "Box-Muller" method local X,i,j,u,v,a,b; X := array(1..n); j:=1; for i to trunc(n/2) do u := Float(rand(), -12); v := Float(rand(), -12); a := sqrt(-2*ln(u))*sin(2*Pi*v); X[j] := evalf(sigma*a + mu); j := j+1; b := sqrt(-2*ln(u))*cos(2*Pi*v); X[j] := evalf(sigma*b + mu); j := j+1; end do; if n > 2*trunc(n/2) then u := Float(rand(), -12); v := Float(rand(), -12); a := sqrt(-2*ln(u))*sin(sqrt(2*Pi)*v); X[n] := evalf(sigma*a + mu); end if; return [seq(X[i], i = 1 .. n)] end proc: ExponentialSample := proc(lambda,n) local i; return [seq(-ln(1-Float(rand(),-12))*lambda, i = 1 .. n)] end proc: ChiSquareSample := proc(nu, n) # nu degrees of freedom local i, j, X, A, t; A := array(1 .. n); X := NormalSample(0,1,nu*n); j := 1; for i to n do A[i]:=add(X[t]^2, t = j .. j + nu - 1); j := j + nu end do; return [seq(A[i], i = 1 .. n)] end proc: GammaSample := proc(alpha, beta, n) local WW, V, X, ay, Zee, d, q, theta, k, j, Y, U1, U2, b, P; if alpha <= 0 then ERROR("First argument must be positive") end if; if beta <= 0 then ERROR("Second argument must be positive") end if; X := array(1 .. n); k := 1; while k <= n do if alpha < 1 then b := evalf((exp(1) + alpha)/exp(1)); U1 := Float(rand(), -12); P := b*U1; if 1 < P then Y := evalf(-ln((b - P)/alpha)); U2 := Float(rand(), -12); if U2 <= evalf(Y^(alpha - 1)) then X[k] := beta*Y; k := k + 1 end if else Y := P^(1/alpha); U2 := Float(rand(), -12); if U2 <= evalf(exp(-Y)) then X[k] := beta*Y; k := k + 1 end if end if else ay := evalf(1/sqrt(2*alpha - 1)); b := evalf(alpha - ln(4)); q := evalf(alpha + 1/ay); theta := 4.5; d := 1 + ln(theta); U1 := Float(rand(), -12); U2 := Float(rand(), -12); V := evalf(ay*ln(U1/(1 - U1))); Y := evalf(alpha*exp(V)); Zee := U1^2*U2; WW := b + q*V - Y; if 0 <= evalf(WW + d - theta*Zee) then X[k] := beta*Y; k := k + 1 else if evalf(ln(Zee)) <= WW then X[k] := beta*Y; k := k + 1 end if end if end if end do; return([seq(X[j], j = 1 .. n)]) end proc: Frequencies := proc(X,a,b,n) #Frequencies of X on the range a..b #in n intervals local i, Counts, SX, bdry, delta, ptr; Counts := array(1 .. n,sparse); SX := sort(X); delta := (b - a)/n; ptr := 1; while ptr <= nops(SX) and SX[ptr] < a do ptr:=ptr+1; end do; for i to n do bdry := i*delta + a; while ptr <= nops(SX) and SX[ptr] <= bdry do Counts[i] := Counts[i] + 1; ptr := ptr + 1 end do; end do; return [seq(Counts[i], i = 1 .. n)] end proc: DiscreteS := proc() local N, A, B, ll, ul, ptr, var, F, i, r, j, L, expr, n; option `July 5, 1993 -- Zaven A. Karian`; description "n rand. obs. from dist. described by L"; if nargs = 2 and not type(args[1], list) then ERROR("When two arguments are used, the first must be a list") fi; if nargs = 2 and not type(args[2], posint) then ERROR("When two arguments are used, the second must be a positive integer") fi; if nargs = 3 and not type(args[1], algebraic) then ERROR("When three arguments are used, the first must be an expression") fi; if nargs = 3 and not type(args[2], range) then ERROR("When three arguments are used, the second must be a range") fi; if nargs = 3 and not type(args[3], posint) then ERROR("When three arguments are used, the third must be a positive integer") fi; if nargs = 3 then expr := args[1]; n := args[3]; var := op(select(type, indets(expr), name)); ll := lhs(args[2]); ul := rhs(args[2]); B := array(1 .. 2*ul - 2*ll + 2); ptr := 1; for i from ll to ul do B[ptr] := i; B[ptr + 1] := subs(var = i, expr); ptr := ptr + 2 od; L := convert(B, list) else L := args[1]; n := args[2] fi; N := 1/2*nops(L); A := array(1 .. n); F := array(1 .. N); F[1] := L[2]; for i from 2 to N do F[i] := F[i - 1] + L[2*i] od; for i to n do r := op(RandomNumbers(1)); if r < F[1] then A[i] := L[1] fi; if F[N] < r then A[i] := L[2*N - 1] fi; for j from 2 to N do if r < F[j] and F[j - 1] <= r then A[i] := L[2*j - 1] fi od od; [seq(A[i], i = 1 .. n)] end: HypergeometricSample :=proc(N::posint, n::posint, r::posint, NumS::posint) local lb, ub, A, ptr, y, L, i; lb := max(0, n-(N-r)); ub := min(n,r); A := array(1 .. 2*ub - 2*lb + 2); ptr := 1; for y from lb to ub do A[ptr] := y; A[ptr + 1] := binomial(r, y)*binomial(N-r,n-y)/ binomial(N,n); ptr := ptr + 2 od; L := [seq(A[i], i = 1 .. 2*ub - 2*lb + 2)]; DiscreteS(L, NumS) end: ################################################## # Various pdf's and cdf's ################################################## BetaCDF := proc(a, b, y) local C, x, A; if y < 0 then A := 0 elif 1 < y then A := 1 else C := GAMMA(a + b + 2)/(GAMMA(a + 1)*GAMMA(b + 1)); A := evalf(Int(C*x^a*(1 - x)^b, x = 0 .. y)) end if; return A end proc: BinomialCDF := proc(N, p, y) local i; return evalf(add(binomial(N, i)*p^i*(1 - p)^(N - i), i = 0 .. y)) end proc: ChiSquareCDF := proc(nu, y) local a, x, val; if y < 0 then val:=0 else a := 1/(GAMMA(1/2*nu)*2^(1/2*nu)); val:=evalf(Int(a*x^(1/2*nu - 1)*exp(- 1/2*x), x = 0 .. y)) end if; return val end proc: ExponentialCDF := proc(t, y) local x,val; if y < 0 then val:=0 else val:= evalf(int(exp(- x/t)/t, x = 0 .. y)) end if; return val; end proc: FCDF := proc(ndf, ddf, y) #ndf is numerator d.f.; ddf is denominator d.f. local num, den, x, A; if y < 0 then A := 0 else num := x^(1/2*ndf - 1)*GAMMA(1/2*ndf + 1/2*ddf)* (ndf/ddf)^(1/2*ndf); den := GAMMA(1/2*ndf)*GAMMA(1/2*ddf)* (1 + ndf*x/ddf)^(1/2*ndf + 1/2*ddf); A := evalf(Int(num/den, x = 0 .. y)) end if; return A end proc: GammaCDF := proc(a, t, y) local b, x, A; if y < 0 then A := 0 else b := 1/(GAMMA(a)*t^a); A := evalf(Int(b*x^(a - 1)*exp(- x/t), x = 0 .. y)) end if; return A end proc: GeometricCDF := proc(p, y) local i, val; if y < 1 then val:=0 else val:=evalf(sum(p*(1 - p)^(i - 1), i = 1 .. y)) end if; return val end proc: NormalCDF := proc(mu, sigma, y) # cdf for N(mu,sigma) local t; t := (y - mu)/(sqrt(2)*sigma); return evalf(.5 + .5*erf(t)) end proc: PoissonCDF := proc(l, y) local i,val; if y < 0 then val:=0 else val:=evalf(add(l^i*exp(-l)/i!, i = 0 .. y)) end if; return val end proc: TCDF := proc(df, y) #cdf for Student's t-distribution local C1, C2, C3, x; C1 := GAMMA(1/2*df + 1/2); C2 := GAMMA(1/2*df)*sqrt(df*Pi); C3 := (1 + x^2/df)^(1/2*df + 1/2); return evalf(Int(C1/(C2*C3), x = -infinity .. y)) end proc: UniformCDF := proc(a,b,y) local val; if y < a then val:=0 elif y > b then val:=1 else val:=evalf((y - a)/(b - a)) end if; return val end proc: BetaPDF := proc(a, b, x) #a, b must be positive local C, val; if (x <= 0 or 1 <= x) then val:=0 else C := GAMMA(a + b + 2)/(GAMMA(a + 1)*GAMMA(b + 1)); val:=C*x^a*(1 - x)^b end if; return val end proc: BinomialPDF := proc(N, p, x) local val; if (x < 0 or N < x) then val:=0 else val:=binomial(N, x)*p^x*(1 - p)^(N - x) end if; return val end proc: ChiSquarePDF := proc(nu, x) # nu degrees of freedom local a,val; if x <= 0 then val:= 0 else a := 1/(GAMMA(1/2*nu)*2^(1/2*nu)); val:=a*x^(1/2*nu - 1)*exp(- 1/2*x) end if; return val end proc: ExponentialPDF := proc(lambda, x) local val; if x < 0 then val:=0 else val:=exp(- x/lambda)/lambda end if; return val end proc: FPDF := proc(ndf, ddf, x) local num, den, val; if x <= 0 then val:=0 else num := x^(1/2*ndf - 1)*GAMMA(1/2*ndf + 1/2*ddf)*(ndf/ddf)^(1/2*ndf); den := GAMMA(1/2*ndf)*GAMMA(1/2*ddf)*(1 + ndf*x/ddf)^(1/2*ndf + 1/2*ddf); val:= num/den end if; return val end proc: GammaPDF := proc(a, t, x) local b,val; if x < 0 then val:=0 else b := 1/(GAMMA(a)*t^a); val:= b*x^(a - 1)*exp(- x/t) end if; return val end proc: GeometricPDF := proc(p,y) local q,val; q:=1-p; if y < 1 then val:=0 else val:=q^(y-1)*p end if; return val; end proc: HypergeometricPDF := proc(N,n,r,x) return binomial(r,x)*binomial(N-r, n-x)/binomial(N,n) end proc: NormalPDF := proc(mu, sigma, x) local a; a := 1/(sqrt(2*Pi)*sigma); return a*exp(- 1/2*(x - mu)^2/sigma^2) end proc: PoissonPDF := proc(lambda, x) local val; if x < 0 then val:=0 else val:=lambda^x*exp(-lambda)/x! end if; return val end proc: TPDF := proc(df, x) local C1, C2, C3; C1 := GAMMA(1/2*df + 1/2); C2 := GAMMA(1/2*df)*sqrt(df*Pi); C3 := (1 + x^2/df)^(1/2*df + 1/2); C1/(C2*C3) end proc: UniformPDF := proc(a,b,x) local val; if (x < a or x > b) then val:=0 else val:=1/(b - a) end if; return val end proc: WeibullPDF := proc(a, b, x) local A,val; if x < 0 then val:=0 else A := a*(x/b)^(a - 1)/b; val:= A*exp(-(x/b)^a); end if; return val end proc: ####################################################### # Graphical routines ####################################################### Hist := proc(Y,min,max,nint) local YYY, YY, n, ell, u, v, i, j, k, points, t, U, V, counts; YY := sort(evalf(Y)); n := nops(YY); YYY:=array(1..n); j := 1; u := array(1..nint+1); counts := array(1 .. nint); # the frequency counts for i to n do if min <= YY[i] and YY[i] <= max then YYY[j] := YY[i]; j := j + 1 end if end do; YYY:=convert(YYY,list); n:=nops(YYY); if j-1 < n then lprint("WARNING",n-j+1,"data points out of range") end if; # lprint(n); counts:=Frequencies(YYY,min,max,nint); # lprint(counts); u[1]:=min; ell:=(max-min)/nint; for i from 2 to nint+1 do u[i]:=u[i-1] + ell; end do; # lprint(seq(u[i],i=1..nint)); points:=[seq(op([[u[i],0],[u[i],(counts[i]/n)], [u[i+1],(counts[i]/n)],[u[i+1],0]]),i=1..nint)]; plot(points,min..max+0.01,color=red); end proc: NormHist := proc(Y,min,max,nint) # # Same idea as Hist, except heights scaled so # total area = 1, for comparison with empirical # or theoretical pdf # local YYY, YY, n, ell, u, v, i, j, k, points, t, U, V, counts,DeltaY; DeltaY:=(max-min)/nint; YY := sort(evalf(Y)); n := nops(YY); YYY:=array(1..n); j := 1; u := array(1..nint+1); counts := array(1 .. nint); # the frequency counts for i to n do if min <= YY[i] and YY[i] <= max then YYY[j] := YY[i]; j := j + 1 end if end do; YYY:=convert(YYY,list); n:=nops(YYY); if j-1 < n then lprint("WARNING",n-j+1,"data points out of range") end if; # lprint(n); counts:=Frequencies(YYY,min,max,nint); # lprint(counts); u[1]:=min; ell:=(max-min)/nint; for i from 2 to nint+1 do u[i]:=u[i-1] + ell; end do; # lprint(seq(u[i],i=1..nint)); points:=[seq(op([[u[i],0],[u[i],(counts[i]/n)/DeltaY], [u[i+1],(counts[i]/n)/DeltaY],[u[i+1],0]]),i=1..nint)]; plot(points,min..max+0.01,color=red); end proc: ScatterPlot := proc(X,Y) # Produces a scatter plot of X-Y pairs local nX, nY, A, Minx, Maxx, Miny, Maxy, XR, YR, EX, EY, i; nX := nops(X); nY := nops(Y); if nX <> nY then ERROR("X and Y lists must have same length") else A := array(1 .. 2*nX); for i to nX do A[2*i - 1] := X[i]; A[2*i] := Y[i] end do; Minx := min(op(X)); Maxx := max(op(X)); Miny := min(op(Y)); Maxy := max(op(Y)); EX := 1/20*Maxx - 1/20*Minx; EY := 1/20*Maxy - 1/20*Miny; XR := Minx - EX .. Maxx + EX; YR := Miny - EY .. Maxy + EY; plot([seq([A[2*i - 1], A[2*i]], i = 1 .. nX)], XR, YR, style = POINT) end if end proc: PlotEmpiricalCDF := proc(X,a,b) # Graphical display of empirical distribution of X local i, A, N, XS; N := nops(X); XS := sort(X); A := [[min(a, X[1]), 0], [XS[1], 0]]; for i to N - 1 do if XS[i] < XS[i + 1] then A := [op(A),[XS[i], i/N],[XS[i + 1], i/N]]; fi od; A := [op(A),[XS[N], 1], [max(b, XS[N]), 1]]; plot(A, color = red) end proc: PlotEmpiricalPDF:=proc(X,a,b) # "Graphic display of probability histogram" local YYY, YY, Y, n, l, u, v, i, j, k, p, t, U, V, count, nint; YY := sort(X); n := nops(YY); nint := 20; count := 0; for i to n do if YY[i] < a or b < YY[i] then count := count + 1 end if end do; if 0 < count then print("WARNING: There are", count, " data points out of plot range.") end if; j := 1; YYY := array(1 .. n); u := array(1 .. nint + 1); v := array(1 .. nint + 1); for i to n do if a <= YY[i] and YY[i] <= b then YYY[j] := YY[i]; j := j + 1 end if end do; n := j - 1; Y := [seq(YYY[k], k = 1 .. n)]; p := array(1 .. 2*nint + 4); l := (b - a)/nint; u[1] := a; v[1] := 0; for i from 2 to nint + 1 do u[i] := u[i - 1] + l; v[i] := 0 end do; if u[nint + 1] < Y[n] then u[nint + 1] := Y[n] end if; i := 2; for j to n do if Y[j] <= u[i] then v[i - 1] := v[i - 1] + 1 else i := i + 1; j := j - 1 end if end do; U[1] := a - 1/2*l; V[1] := 0; t := 2; for i to nint do U[t] := u[i] + 1/2*l; V[t] := v[i]; t := t + 1 end do; U[nint + 2] := b + 1/2*l; V[nint + 2] := 0; for i to nint + 2 do p[2*i - 1] := U[i]; p[2*i] := V[i]/(l*n) end do; plot( [seq([p[2*i - 1], p[2*i]], i = 1 .. nint + 2)], color = red, linestyle = 3) end proc: BoxWhisker := proc() # # ``box-and-whisker plot'' of one or more lists # local N, i, A, D, m, M, Minn, Maxx, P25, P50, P75, eh; N := nargs; Minn := min(op(args[1])); Maxx := max(op(args[1])); for i to N do if not type(args[i], list) then ERROR("All arguments must be lists") end if end do; A := {}; for i to N do D := args[i]; m := min(op(D)); M := max(op(D)); if m < Minn then Minn := m fi; if Maxx < M then Maxx := M fi; P25 := Percentile(D, .25); P50 := Percentile(D, .50); P75 := Percentile(D, .75); A := A union {[[P75, i - .4], [P75, i + .4]], [[P25, i + .4], [P75, i + .4]], [[P25, i - .4], [P25, i + .4]], [[P50, i - .4], [P50, i + .4]], [[m, i], [P25, i]], [[P75, i], [M, i]], [[P25, i - .4], [P75, i - .4]]} end do; eh := 1/20*Maxx - 1/20*Minn; plot(A, Minn - eh .. Maxx + eh, 0 .. N + 1, color = blue) end proc: ############################################################# # Confidence Intervals, etc. ############################################################# MeanLSCI := proc(Data,alpha) #two-sided large-sample confidence #interval for mean local sm, ssd, N, left, right, cutoff; N:=nops(Data); sm:=Mean(Data); ssd:=StandardDeviation(Data); cutoff:=solve(NormalCDF(0,1,x)=1-alpha/2); left:=evalf(sm - cutoff*ssd/sqrt(N)); right:=evalf(sm + cutoff*ssd/sqrt(N)); return [left,right]; end proc: CIPlot := proc(mu,sigma,alpha,N) #Generates 100 random samples of size N #from a normal distribution with #mean mu and standard deviation #sigma, computes the alpha-level # large-sample #confidence interval for the mean #from each sample and displays them #relative to the population mean local i,PP,PL,CI,Pcolor,count; PL:={plot([[mu,0],[mu,20.2]],color=blue)}: count:=0; for i to 100 do CI:=MeanLSCI(NormalSample(mu,sigma,N),alpha); if (CI[1] > mu) or (CI[2] < mu) then Pcolor:=red; else Pcolor:=black; count:=count+1; end if; PP:=plot([[CI[1],i*.2],[CI[2],i*.2]],color=Pcolor): PL:={op(PL),PP}; end do; print(count,"confidence intervals contain population mean (shown in blue)"); display(PL,axes=boxed,ytickmarks=[]); end proc: ############################################################# # Games of Chance, etc. ############################################################# Craps:=proc(n,verbose) #simulates n games of the dice game "craps" #outcomes is a list of n 0,1 entries (1 = win, #0 = lose) recording the outcomes of the games #if the verbose output option is true, #then the rolls in each game will be printed local game,roll,point,total,outcomes,over,games; outcomes:=[]; for games to n do game:=[]; roll:=DieRoll(2,6); total:=roll[1]+roll[2]; game:=[op(game),total]; if total in {7,11} then outcomes:=[op(outcomes),1]; if verbose then lprint("a win",game); end if; elif total in {2,3,12} then outcomes:=[op(outcomes),0]; if verbose then lprint("a loss",game); end if; else point:=total; over:=false; while not over do roll:=DieRoll(2,6); total:=roll[1]+roll[2]; game:=[op(game),total]; if total = point then outcomes:=[op(outcomes),1]; if verbose then lprint("a win",game); end if; over:=true; elif total = 7 then outcomes:=[op(outcomes),0]; if verbose then lprint("a loss",game); end if; over:=true; end if; end do; end if; end do; return outcomes; end proc: