((section 2 "Outdated egg!" (p "This is an egg for CHICKEN 4, the unsupported old release.  You're almost certainly looking for " (int-link "/eggref/5/statistics" "the CHICKEN 5 version of this egg") ", if it exists.") (p "If it does not exist, there may be equivalent functionality provided by another egg; have a look at the " (link "https://wiki.call-cc.org/chicken-projects/egg-index-5.html" "egg index") ". Otherwise, please consider porting this egg to the current version of CHICKEN.")) (section 2 "Statistics" (p "This library is a port of " (link "http://compbio.ucdenver.edu/hunter/" "Larry Hunter") "'s Lisp statistics library to chicken scheme.") (p "The library provides a number of formulae and methods taken from the book \"Fundamentals of Biostatistics\" by Bernard Rosner (5th edition).") (toc) (section 3 "Statistical Distributions" (p "To use this library, you need to understand the underlying statistics.  In brief:") (p "The " (link "http://en.wikipedia.org/wiki/Binomial_distribution" "Binomial distribution") " is used when counting discrete events in a series of trials, each of which events has a probability p of producing a positive outcome.  An example would be tossing a coin " (tt "n") " times: the probability of a head is " (tt "p") ", and the distribution gives the expected number of heads in the " (tt "n") " trials.  The binomial distribution is defined as B(n, p).") (p "The " (link "http://en.wikipedia.org/wiki/Poisson_distribution" "Poisson distribution") " is used to count discrete events which occur with a known average rate.  A typical example is the decay of radioactive elements.  A poisson distribution is defined Pois(mu).") (p "The " (link "http://en.wikipedia.org/wiki/Normal_distribution" "Normal distribution") " is used for real-valued events which cluster around a specific mean with a symmetric variance.  A typical example would be the distribution of people's heights.  A normal distribution is defined N(mean, variance).")) (section 3 "Provided Functions" (section 4 "Utilities" (def (sig (procedure "(average-rank value sorted-values)" (id average-rank))) (p "returns the average position of given value in the list of sorted values: the rank is based from 1.") (pre "> (average-rank 2 '(1 2 2 3 4))\n5/2")) (def (sig (procedure "(beta-incomplete x a b)" (id beta-incomplete)))) (def (sig (procedure "(bin-and-count items n)" (id bin-and-count))) (p "Divides the range of the list of " (tt "items") " into " (tt "n") " bins, and returns a vector of the number of items which fall into each bin.") (pre "> (bin-and-count '(1 1 2 3 3 4 5) 5)\n#(2 1 2 1 1)")) (def (sig (procedure "(combinations n k)" (id combinations))) (p "returns the number of ways to select " (tt "k") " items from " (tt "n") ", where the order does not matter.")) (def (sig (procedure "(factorial n)" (id factorial))) (p "returns the factorial of " (tt "n") ".")) (def (sig (procedure "(find-critical-value p-function p-value #:increasing?)" (id find-critical-value))) (p "given a monotonic function " (tt "p-function") " taking a single value " (tt "x") " to " (tt "y") ", returns the value of " (tt "x") " which makes " (tt "(p-function x)") " closest to " (tt "p-value") ".  A boolean keyword parameter " (tt "#:increasing?") " determines if function should be increasing or decreasing (the default).")) (def (sig (procedure "(fisher-z-transform r)" (id fisher-z-transform))) (p "returns the transformation of a correlation coefficient " (tt "r") " into an approximately normal distribution.")) (def (sig (procedure "(gamma-incomplete a x)" (id gamma-incomplete)))) (def (sig (procedure "(gamma-ln x)" (id gamma-ln)))) (def (sig (procedure "(permutations n k)" (id permutations))) (p "returns the number of ways to select " (tt "k") " items from " (tt "n") ", where the order does matter.")) (def (sig (procedure "(random-normal mean sd)" (id random-normal))) (p "returns a random number distributed with specified mean and standard deviation.")) (def (sig (procedure "(random-pick items)" (id random-pick))) (p "returns a random item from the given list of items.")) (def (sig (procedure "(random-sample n items)" (id random-sample))) (p "returns a random sample from the list of items without replacement of size " (tt "n") ".")) (def (sig (procedure "(random-weighted-sample n items weights)" (id random-weighted-sample))) (p "returns a random sample from the list of items without replacement of size " (tt "n") ", where each sample has a defined probability of selection (weight).")) (def (sig (procedure "(sign n)" (id sign))) (p "returns 0, 1 or -1 according to if " (tt "n") " is zero, positive or negative.")) (def (sig (procedure "(square n)" (id square)))) (def (sig (procedure "(cumsum sequences)" (id cumsum))) (p "returns the cumulative sum of a sequence."))) (section 4 "Descriptive statistics" (p "These functions provide information on a given list of numbers, the " (tt "items") ".  Note, the list does not have to be sorted.") (def (sig (procedure "(mean items)" (id mean))) (p "returns the arithmetic mean of the " (tt "items") " (the sum of the numbers divided by the number of numbers).") (pre "(mean '(1 2 3 4 5)) => 3")) (def (sig (procedure "(median items)" (id median))) (p "returns the value which separates the upper and lower halves of the list of numbers.") (pre "(median '(1 2 3 4)) => 5/2")) (def (sig (procedure "(mode items)" (id mode))) (p "returns two " (b "values") ".  The first is a list of the " (i "modes") " and the second is the frequency.  (A mode of a list of numbers is the most frequently occurring value.)") (pre "> (mode '(1 2 3 4))\n(1 2 3 4)\n1\n> (mode '(1 2 2 3 4))\n(2)\n2\n> (mode '(1 2 2 3 3 4))\n(2 3)\n2")) (def (sig (procedure "(geometric-mean items)" (id geometric-mean))) (p "returns the geometric mean of the " (tt "items") " (the result of multiplying the items together and then taking the nth root, where n is the number of items).") (pre "(geometric-mean '(1 2 3 4 5)) => 2.60517108469735")) (def (sig (procedure "(range items)" (id range))) (p "returns the difference between the biggest and the smallest value from the list of " (tt "items") ".") (pre "(range '(5 1 2 3 4)) => 4")) (def (sig (procedure "(percentile items percent)" (id percentile))) (p "returns the item closest to the " (tt "percent") " value if the " (tt "items") " are sorted into order; the returned item may be in the list, or the average of adjacent values.") (pre "(percentile '(1 2 3 4) 50) => 5/2\n(percentile '(1 2 3 4) 67) => 3")) (def (sig (procedure "(variance items)" (id variance)))) (def (sig (procedure "(standard-deviation items)" (id standard-deviation)))) (def (sig (procedure "(coefficient-of-variation items)" (id coefficient-of-variation))) (p "returns 100 * (std-dev / mean) of the " (tt "items") ".") (pre "(coefficient-of-variation '(1 2 3 4)) => 51.6397779494322")) (def (sig (procedure "(standard-error-of-the-mean items)" (id standard-error-of-the-mean))) (p "returns std-dev / sqrt(length items).") (pre " (standard-error-of-the-mean '(1 2 3 4)) => 0.645497224367903")) (def (sig (procedure "(mean-sd-n items)" (id mean-sd-n))) (p "returns three " (b "values") ", one for the mean, one for the standard deviation, and one for the length of the list.") (pre "> (mean-sd-n '(1 2 3 4))\n5/2\n1.29099444873581\n4"))) (section 4 "Distributional functions" (def (sig (procedure "(binomial-probability n k p)" (id binomial-probability))) (p "returns the probability that the number of positive outcomes for a binomial distribution B(n, p) is k.") (pre "> (do-ec (: i 0 11) \n         (format #t \"i = ~d P = ~f~&\" i (binomial-probability 10 i 0.5)))\ni = 0 P = 0.0009765625\ni = 1 P = 0.009765625\ni = 2 P = 0.0439453125\ni = 3 P = 0.1171875\ni = 4 P = 0.205078125\ni = 5 P = 0.24609375\ni = 6 P = 0.205078125\ni = 7 P = 0.1171875\ni = 8 P = 0.0439453125\ni = 9 P = 0.009765625\ni = 10 P = 0.0009765625")) (def (sig (procedure "(binomial-cumulative-probability n k p)" (id binomial-cumulative-probability))) (p "returns the probability that less than " (tt "k") " positive outcomes occur for a binomial distribution B(n, p).") (pre "> (do-ec (: i 0 11) \n         (format #t \"i = ~d P = ~f~&\" i (binomial-cumulative-probability 10 i 0.5)))\ni = 0 P = 0.0\ni = 1 P = 0.0009765625\ni = 2 P = 0.0107421875\ni = 3 P = 0.0546875\ni = 4 P = 0.171875\ni = 5 P = 0.376953125\ni = 6 P = 0.623046875\ni = 7 P = 0.828125\ni = 8 P = 0.9453125\ni = 9 P = 0.9892578125\ni = 10 P = 0.9990234375")) (def (sig (procedure "(binomial-ge-probability n k p)" (id binomial-ge-probability))) (p "returns the probability of " (tt "k") " or more positive outcomes for a binomial distribution B(n, p).")) (def (sig (procedure "(binomial-le-probability n k p)" (id binomial-le-probability))) (p "returns the probability " (tt "k") " or fewer positive outcomes for a binomial distribution B(n, p).")) (def (sig (procedure "(poisson-probability mu k)" (id poisson-probability))) (p "returns the probability of " (tt "k") " events occurring when the average is " (tt "mu") ".") (pre "> (do-ec (: i 0 20) \n         (format #t \"P(X=~2d) = ~,4f~&\" i (poisson-probability 10 i)))\nP(X= 0) = 0.0000\nP(X= 1) = 0.0005\nP(X= 2) = 0.0023\nP(X= 3) = 0.0076\nP(X= 4) = 0.0189\nP(X= 5) = 0.0378\nP(X= 6) = 0.0631\nP(X= 7) = 0.0901\nP(X= 8) = 0.1126\nP(X= 9) = 0.1251\nP(X=10) = 0.1251\nP(X=11) = 0.1137\nP(X=12) = 0.0948\nP(X=13) = 0.0729\nP(X=14) = 0.0521\nP(X=15) = 0.0347\nP(X=16) = 0.0217\nP(X=17) = 0.0128 \nP(X=18) = 0.0071\nP(X=19) = 0.0037")) (def (sig (procedure "(poisson-cumulative-probability mu k)" (id poisson-cumulative-probability))) (p "returns the probability of less than " (tt "k") " events occurring when the average is " (tt "mu") ".") (pre "> (do-ec (: i 0 20) \n         (format #t \"P(X=~2d) = ~,4f~&\" i (poisson-cumulative-probability 10 i)))\nP(X= 0) = 0.0000\nP(X= 1) = 0.0000\nP(X= 2) = 0.0005\nP(X= 3) = 0.0028\nP(X= 4) = 0.0103\nP(X= 5) = 0.0293\nP(X= 6) = 0.0671\nP(X= 7) = 0.1301\nP(X= 8) = 0.2202\nP(X= 9) = 0.3328\nP(X=10) = 0.4579\nP(X=11) = 0.5830\nP(X=12) = 0.6968\nP(X=13) = 0.7916\nP(X=14) = 0.8645\nP(X=15) = 0.9165\nP(X=16) = 0.9513\nP(X=17) = 0.9730\nP(X=18) = 0.9857\nP(X=19) = 0.9928")) (def (sig (procedure "(poisson-ge-probability mu k)" (id poisson-ge-probability))) (p "returns the probability of " (tt "k") " or more events occurring when the average is " (tt "mu") ".")) (def (sig (procedure "(normal-pdf x mean variance)" (id normal-pdf))) (p "returns the likelihood of " (tt "x") " given a normal distribution with stated mean and variance.") (pre "> (do-ec (: i 0 11) \n         (format #t \"~3d ~,4f~&\" i (normal-pdf i 5 4)))\n 0 0.0088\n 1 0.0270\n 2 0.0648\n 3 0.1210\n 4 0.1760\n 5 0.1995\n 6 0.1760\n 7 0.1210\n 8 0.0648\n 9 0.0270\n10 0.0088")) (def (sig (procedure "(convert-to-standard-normal x mean variance)" (id convert-to-standard-normal))) (p "returns a value for " (tt "x") " rescaling the given normal distribution to a standard N(0, 1).") (pre "> (convert-to-standard-normal 5 6 2)\n-1/2")) (def (sig (procedure "(phi x)" (id phi))) (p "returns the cumulative distribution function (CDF) of the standard normal distribution.") (pre "> (do-ec (: x -2 2 0.4)\n         (format #t \"~4,1f ~,4f~&\" x (phi x)))\n-2.0 0.0228\n-1.6 0.0548\n-1.2 0.1151\n-0.8 0.2119\n-0.4 0.3446\n 0.0 0.5000\n 0.4 0.6554\n 0.8 0.7881\n 1.2 0.8849\n 1.6 0.9452")) (def (sig (procedure "(z percentile)" (id z))) (p "returns the inverse of the standard normal distribution.  Input is a percentile, between 0.0 and 1.0.")) (def (sig (procedure "( t-distribution degrees-of-freedom percentile)" (id t-distribution))) (p "returns the point in the t-distribution given the " (tt "degrees-of-freedom") " and " (tt "percentile") ".  " (tt "degrees-of-freedom") " must be a positive integer, and " (tt "percentile") " a value between 0.0 and 1.0.")) (def (sig (procedure "(chi-square degrees-of-freedom percentile)" (id chi-square))) (p "returns the point at which chi-square distribution has " (tt "percentile") " to its " (b "left") ", using given " (tt "degrees-of-freedom") ".")) (def (sig (procedure "(chi-square-cdf x degrees-of-freedom)" (id chi-square-cdf))) (p "returns the probability that a random variable is to the " (b "left") " of " (tt "x") " using the chi-square distribution with given " (tt "degrees-of-freedom") "."))) (section 4 "Confidence intervals" (p "These functions report bounds for an observed property of a distribution: the bounds are tighter as the confidence level, alpha, varies from 0.0 to 1.0.") (def (sig (procedure "(binomial-probability-ci n p alpha)" (id binomial-probability-ci))) (p "returns two values, the upper and lower bounds on an observed probability " (tt "p") " from " (tt "n") " trials with confidence " (tt "(1-alpha)") ".") (pre "> (binomial-probability-ci 10 0.8 0.9)\n0.724273681640625 \n0.851547241210938\n; 2 values")) (def (sig (procedure "(poisson-mu-ci k alpha)" (id poisson-mu-ci))) (p "returns two values, the upper and lower bounds on the poisson parameter if " (tt "k") " events are observed; the bound is for confidence " (tt "(1-alpha)") ".") (pre "> (poisson-mu-ci 10 0.9)\n8.305419921875\n10.0635986328125\n; 2 values")) (def (sig (procedure "(normal-mean-ci mean standard-deviation k alpha)" (id normal-mean-ci))) (p "returns two values, the upper and lower bounds on the mean of the normal distibution of " (tt "k") " events are observed; the bound is for confidence " (tt "(1-alpha)") ".") (pre "> (normal-mean-ci 0.5 0.1 10 0.8)\n0.491747852700165\n0.508252147299835\n; 2 values")) (def (sig (procedure "(normal-mean-ci-on-sequence items alpha)" (id normal-mean-ci-on-sequence))) (p "returns two values, the upper and lower bounds on the mean of the given " (tt "items") ", assuming they are normally distributed; the bound is for confidence " (tt "(1-alpha)") ".") (pre "> (normal-mean-ci-on-sequence '(1 2 3 4 5) 0.9)\n2.40860081649174\n3.59139918350826\n; 2 values")) (def (sig (procedure "(normal-variance-ci standard-deviation k alpha)" (id normal-variance-ci))) (p "returns two values, the upper and lower bounds on the variance of the normal distibution of " (tt "k") " events are observed; the bound is for confidence " (tt "(1-alpha)") ".")) (def (sig (procedure "(normal-variance-ci-on-sequence items alpha)" (id normal-variance-ci-on-sequence))) (p "returns two values, the upper and lower bounds on the variance of the given " (tt "items") ", assuming they are normally distributed; the bound is for confidence " (tt "(1-alpha)") ".")) (def (sig (procedure "normal-sd-ci standard-deviation k alpha)" (id normal-sd-ci))) (p "returns two values, the upper and lower bounds on the standard deviation of the normal distibution of " (tt "k") " events are observed; the bound is for confidence " (tt "(1-alpha)") ".")) (def (sig (procedure "(normal-sd-ci-on-sequence sequence items)" (id normal-sd-ci-on-sequence))) (p "returns two values, the upper and lower bounds on the standard deviation of the given " (tt "items") ", assuming they are normally distributed; the bound is for confidence " (tt "(1-alpha)") "."))) (section 4 "Hypothesis testing" (p "These functions report on the significance of an observed sample against a given distribution.") (section 5 "(parametric)" (def (sig (procedure "(z-test x-bar n #:mu #:sigma #:tails)" (id z-test))) (p "Given " (tt "x-bar") " the sample mean, " (tt "n") " the number in the sample, " (tt "#:mu") " the distribution mean (defaults to 0), " (tt "#:sigma") " the distribution standard deviation (defaults to 1), and " (tt "#:tails") " the significance to report on:") (ul (li (tt "':both") ", the probability of the difference between " (tt "x-bar") " and " (tt "#:mu")) (li (tt "':positive") ", the probability that observation is " (tt ">= x-bar")) (li (tt "':negative") ", the probability that observation is " (tt "<= x-bar"))) (p "e.g. given a distribution with mean 50 and standard deviation 10") (pre "; probability that a single observation is <= 40\n> (z-test 40 1 #:mu 50 #:sigma 10 #:tails ':negative)\n0.158655\n; probability that 10 observations are <= 40\n> (z-test 40 10 #:mu 50 #:sigma 10 #:tails ':negative)\n0.000783\n; probability that 5 observations give a mean of 40\n> (z-test 40 5 #:mu 50 #:sigma 10)\n0.025347")) (def (sig (procedure "(z-test-on-sequence observations #:mu #:sigma #:tails)" (id z-test-on-sequence))) (p "As for " (tt "z-test") " except " (tt "x-bar") " and " (tt "n") " are computed from given " (tt "observations") ".")) (def (sig (procedure "(t-test-one-sample x-bar sd n mu #:tails)" (id t-test-one-sample))) (p "Given observed data with mean " (tt "x-bar") ", standard devation " (tt "sd") " and number of observations " (tt "n") " (" (tt "n < 30") "), return the significance of the sample compared with the population mean " (tt "mu") ".  " (tt "#:tails") " is one of:") (ul (li (tt "':both") " two-sided (default)") (li (tt "':positive") " one-sided, " (tt "x-bar >= mu")) (li (tt "':negative") " one-sided, " (tt "x-bar <= mu")))) (def (sig (procedure "(t-test-one-sample-on-sequence observations mu #:tails)" (id t-test-one-sample-on-sequence))) (p "As for " (tt "t-test-one-sample") " except " (tt "x-bar") ", " (tt "sd") " and " (tt "n") " are computed from given " (tt "observations") ".")) (def (sig (procedure "(t-test-paired t-bar sd n #:tails)" (id t-test-paired))) (p "Computes the significance of the differences between two sequences of data: the differences are given as their mean, " (tt "t-bar") ", standard deviation, " (tt "sd") ", and number of measurements, " (tt "n") ".")) (def (sig (procedure "(t-test-paired-on-sequences before after #:tails)" (id t-test-paired-on-sequences))) (p "Computes the significance of the difference between two sequences of data: one before an experimental change and one after.  " (tt "#:tails") " is as for " (tt "t-significance") ".") (pre "> (t-test-paired-on-sequences '(4 3 5) '(1 1 3))\n0.0198039411803931")) (def (sig (procedure "(t-test-two-sample mean-1 sd-1 n-1 mean-2 sd-2 n-2 #:variances-equal? #:variance-significance-cutoff #:tails)" (id t-test-two-sample))) (p "Computes the significance of the difference of two means given the sample standard deviations and sizes.")) (def (sig (procedure "(t-test-two-sample-on-sequences sequence-1 sequence-2 #:tails)" (id t-test-two-sample-on-sequences))) (p "Significance of difference of two sequences of observations.")) (def (sig (procedure "(f-test variance-1 n1 variance-2 n2 #:tails)" (id f-test))) (p "Tests for the equality of two variances.")) (def (sig (procedure "(chi-square-test-one-sample observed-variance sample-size test-variance #:tails)" (id chi-square-test-one-sample))) (p "Tests for significance of difference between an observed and a test variance.")) (def (sig (procedure "(binomial-test-one-sample p-hat n p #:tails #:exact?)" (id binomial-test-one-sample))) (p "Returns the significance of a one sample test with " (tt "n") " observations, observed probability " (tt "p-hat") " and expected probability " (tt "p") ".")) (def (sig (procedure "(binomial-test-two-sample p-hat-1 n-1 p-hat-2 n-2 #:tails #:exact?)" (id binomial-test-two-sample))) (p "Returns the significance of a two sample test.")) (def (sig (procedure "(fisher-exact-test a b c d #:tails)" (id fisher-exact-test))) (p "Given a 2x2 contingency table, returns a p value using Fisher's exact test.  " (tt "a") " and " (tt "b") " form the first row of the contingency table, " (tt "c") " and " (tt "d") " the second row.")) (def (sig (procedure "(mcnemars-test a-discordant-count b-discordant-count #:exact?)" (id mcnemars-test))) (p "For measuring effectiveness of, e.g., one treatment over another.  " (tt "a-discordant-count") " is the number of times when A worked, " (tt "b-discordant-count") " the number of times B worked.")) (def (sig (procedure "(poisson-test-one-sample observed mu #:tails #:approximate?)" (id poisson-test-one-sample))) (p "Computes significance of the number of observed events under a Poisson distribution against " (tt "mu") " expected events."))) (section 5 "(non parametric)" (def (sig (procedure "(sign-test plus-count minus-count #:exact? #:tails)" (id sign-test)))) (def (sig (procedure "(sign-test-on-sequence sequence-1 sequence-2 #:exact? #:tails)" (id sign-test-on-sequence))) (p "Takes two equal-sized sequences of observations, and reports if the entries of one are different to those in the other.")) (def (sig (procedure "(wilcoxon-signed-rank-test differences #:tails)" (id wilcoxon-signed-rank-test))) (p "Given at least 16 differences, reports if the positive differences are significantly larger or smaller than the negative differences.")) (def (sig (procedure "(wilcoxon-signed-rank-test-on-sequences sequence-1 sequence-2 #:tails)" (id wilcoxon-signed-rank-test-on-sequences))) (p "Given two sequences of at least 16 observations, computes " (tt "wilcoxon-signed-rank-test") " on the differences.")) (def (sig (procedure "(chi-square-test-rxc contingency-table)" (id chi-square-test-rxc))) (p "Given a contingency table (a SRFI-63 array), returns significance of relation between row and column variable.")) (def (sig (procedure "(chi-square-test-for-trend row1-counts row2-counts)" (id chi-square-test-for-trend))) (p "Returns p significance of trend, and prints a string to show if increasing or decreasing.")))) (section 4 "Sample size estimates" (def (sig (procedure "(t-test-one-sample-sse mean-1 mean-2 sigma-1 #:alpha #:1-beta #:tails)" (id t-test-one-sample-sse))) (p "Returns the size of sample necessary to distinguish a normally distributed sample with " (tt "mean-2") " from a population " (tt "mean-1") " standard deviation " (tt "sigma-1") ".  The significance " (tt "#:alpha") " (defaults to 0.05), power " (tt "#:1-beta") " (0.95) and sides " (tt "#:tails") " (':both) may be altered.") (pre "> (t-test-one-sample-sse 5.0 5.2 0.5)\n163")) (def (sig (procedure "(t-test-two-sample-sse mean-1 sigma-1 mean-2 sigma-2 #:alpha #:1-beta #:tails #:sample-ratio)" (id t-test-two-sample-sse))) (p "Returns the size of sample necessary to distinguish a normally distributed sample N(mean-1, sigma-1) from a normally distributed sample N(mean-2, sigma-2).  The significance " (tt "#:alpha") " (defaults to 0.05), power " (tt "#:1-beta") " (0.95), sides " (tt "#:tails") " (':both) and sample-ratio " (tt "#:sample-ratio") " (1) may be altered.")) (def (sig (procedure "(t-test-paired-sse difference-mean difference-sigma #:alpha #:1-beta #:tails)" (id t-test-paired-sse))) (p "Returns the size of sample to produce a given mean and standard deviation in the differences of two samples.")) (def (sig (procedure "(binomial-test-one-sample-sse p-estimated p-null #:alpha #:1-beta #:tails)" (id binomial-test-one-sample-sse))) (p "Returns the size of sample needed to test whether an observed probability is significantly different from a particular binomial null hypothesis with a significance alpha and a power 1-beta.")) (def (sig (procedure "(binomial-test-two-sample-sse p-one p-two #:alpha #:1-beta #:tails #:sample-ratio)" (id binomial-test-two-sample-sse))) (p "Returns the size of sample needed to test if given two binomial probabilities are significantly different.  " (tt "#:sample-ratio") " can be given if the two samples differ in size.")) (def (sig (procedure "(binomial-test-paired-sse pd pa #:alpha #:1-beta #:tails)" (id binomial-test-paired-sse))) (p "Sample size estimate for McNemar's discordant pairs test.")) (def (sig (procedure "(correlation-sse rho #:alpha #:1-beta)" (id correlation-sse))) (p "Returns the size of sample necessary to find a correlation of value " (tt "rho") " with significance " (tt "#:alpha") " (defaults to 0.05) and power " (tt "#:1-beta") " (defaults to 0.95).") (pre "> (correlation-sse 0.80 #:alpha 0.05 #:1-beta 0.9)\n11"))) (section 4 "Correlation and regression" (def (sig (procedure "(linear-regression line-defn)" (id linear-regression))) (p "Given a line definition as a list of point pairs, first prints to the terminal and then returns 5 " (b "values") " for the best fitting line through the points:") (ul (li "the y-intercept") (li "the slope") (li "the correlation coefficient, r") (li "the square of the correlation coefficient, r^2") (li "the significance of the difference of the slope from zero, p")) (p "(This is also called the Pearson correlation; used when relation expected to be linear.  Also see " (tt "spearman-rank-correlation") ".)") (pre "> (linear-regression '((1.0 0.1) (2.0 0.3) (3.0 0.8)))\nIntercept = -0.3, slope = 0.35, r = 0.970725343394151, R^2 = 0.942307692307692, p = 0.154420958311267\n-0.3\n0.35\n0.970725343394151\n0.942307692307692\n0.154420958311267\n; 5 values")) (def (sig (procedure "(correlation-coefficient line-defn)" (id correlation-coefficient))) (p "As above, but only returns the value of " (i "r") ":") (pre "> (correlation-coefficient '((1.0 0.1) (2.0 0.3) (3.0 0.8)))\n0.970725343394151")) (def (sig (procedure "(correlation-test-two-sample r1 n1 r2 n2 #:tails)" (id correlation-test-two-sample))) (p "Returns the significance of the similarity between two correlations.  " (tt "#:tails") " determines how the comparison is made: " (tt "':both") " measures the difference, " (tt "':negative") " if " (tt "r1 < r2") " and " (tt "#':positive") " if " (tt "r2 > r1") ".")) (def (sig (procedure "(correlation-test-two-sample-on-sequences points-1 points-2 #:tails)" (id correlation-test-two-sample-on-sequences))) (p "As above, but computes the correlations from given lists of points.")) (def (sig (procedure "(spearman-rank-correlation points)" (id spearman-rank-correlation))) (p "Returns two " (b "values") ", the Spearman Rank measure of correlation between given list of points, and the p-significance of the correlation.  (This correlation is used for non-linear relations; compare with " (tt "linear-regression") ".)"))) (section 4 "Significance test functions" (def (sig (procedure "(t-significance t-value degrees-of-freedom #:tails)" (id t-significance))) (p "returns the probability of " (tt "t-value") " for given " (tt "degrees-of-freedom") ".  The keyword " (tt "#:tails") " modifies the calculation to be two-sided (the default) with " (tt "':both") ", or one-sided, " (tt "':positive") " or " (tt "':negative") ".") (pre "> (t-significance 0.2 5)\n0.849360513995829\n> (t-significance 0.2 5 #:tails ':positive)\n0.424680256997915\n> (t-significance 0.2 5 #:tails ':negative)\n0.575319743002086")) (def (sig (procedure "(f-significance f-value numerator-dof denominator-dof #:one-tailed?)" (id f-significance))) (p "returns the probability of " (tt "f-value") " for given " (tt "numerator-dof") " and " (tt "denominator-dof") ".  The boolean keyword " (tt "#:one-tailed?") " indicates if calculation is two-sided (the default) or not.") (pre "> (f-significance 1.5 8 2)\n0.920449812578091\n> (f-significance 1.5 8 2 #:one-tailed? #t)\n0.460224906289046")))) (section 3 "Authors" (p (int-link "/users/peter-lane" "Peter Lane") " wrote the Scheme version of this library.  The original Lisp version was written by " (link "http://compbio.ucdenver.edu/hunter/" "Larry Hunter") ".")) (section 3 "License" (p "GPL version 3.0.")) (section 3 "Requirements" (p "Needs srfi-1, srfi-25, srfi-63, srfi-69, vector-lib, numbers, extras, foreign, format") (p "Uses the GNU scientific library for basic numeric processing, so requires libgsl, libgslcblas and the development files for libgsl.")) (section 3 "Version History" (ul (li "0.8: added cumsum and random-weighted-sample") (li "0.5: fixed warning in compilation (thanks to Felix for pointing it out)") (li "0.4: all functions should now be working") (li "0.3: some error fixes and addition of tests for majority of functions") (li "0.2: fixed some errors in keywords and find-critical-value") (li "0.1: initial package")))))