/* Benford.c: read data on stdin and compute the relative frequency
* distribution of the leading digits. Data should be delimited by whitespace:
* spaces, tabs, and newlines, and should be in decimal (i.e, not
* scientific) format. All data should be respresentable in double precision
* on the target platform.
*
* Prints a table showing the relative frequency of each digit, and, for
* comparison, the relative frequency predicted by "Benford's Law."
*
* Compile: cc -o benford benford.c -lm
*
* By Terry McConnell
*
*/
/*
Benford's law, a curious empirical observation, says that the
relative frequency of the most significant digit equaling k is commonly
distributed as log((k+1)/k). The story is that
the pages near the beginning of the book in old tables
of common logarithms were often more heavily worn than pages near the
end. Since people usually went to such tables to look up the logarithms
of data they observed, this would seem to imply a nonuniform
distribution for the most significant digit of the mantissa.
There is by now a sizable literature on Benford's law. See, for
example, T.P. Hill, "The Significant-Digit Phenomenon", Amer. Math.
Monthly, April 1995, 322-327, and the references given there.
For example, 117 home resale prices from the Phoenix area gave the following
output:
The number of items observed was: 117
Digit Observed Benford
1 0.393162 0.301030
2 0.128205 0.176091
3 0.000000 0.124939
4 0.000000 0.096910
5 0.008547 0.079181
6 0.034188 0.066947
7 0.145299 0.057992
8 0.119658 0.051153
9 0.170940 0.045757
*/
#include
#include
#include
int
main()
{
double freq[9] = {0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0};
char buffer[10];
double count = 0.0,datum,i=0.0;
int digit,j;
while(scanf("%lf",&datum) != EOF) {
count += 1.0;
/* Compute the most significant digit and
increment the corresp. freq */
/* The following algorithm, which seems reasonable, doesn't work in
all cases. For example, if datum = 8 then digit = 7 (NeXT C-library):
x = log10(datum);
digit = (int) floor(pow(10,x - floor(x)));
*/
sprintf(buffer,"%-.0e",fabs(datum));
digit = buffer[0] - '0'; /* Yes, I'm aware this won't work
for EBCDIC, and I don't care */
freq[digit-1] += 1.0;
}
/* Print out a table of results. */
printf("The number of items observed was: %.0f\n\n",count);
printf("Digit\tObserved\tBenford\n");
for(j=1;j<=9;j++){
i +=1.0;
printf("%.0f\t%f\t%f\n",i,freq[j-1]/count,
log10((i+1)/i));
}
}