|
#!/usr/bin/perl
|
|
use FileHandle; # for use with formatted output
|
|
STDOUT->format_top_name("TOP");
|
|
STDOUT->format_name("LINE");
|
|
#
|
|
# Important variables...
|
|
#
|
|
# Variables with values taken from data file:
|
|
# -------------------------------------------
|
|
# $trials --> # of times to iterate over data set
|
|
# $pop --> population of infected community
|
|
# $inf_prob --> P(A), the probability that a person is infected
|
|
# $tpos_prob --> P(B|A), the probability that a test result is p\
|
|
ositive
|
|
# given that the person is infected (a "true positiv\
|
|
e")
|
|
# $fpos_prob --> P(B|not-A), the probability that a test result \
|
|
is
|
|
# positive given that the person is NOT infected
|
|
# (a "false positive")
|
|
#
|
|
# Variables with Values Generated on One Trial:
|
|
# ----------------------------------------------
|
|
# $inf_count --> counts # of people infected for the trial
|
|
# $tpos_count --> counts # of infected people who register posit\
|
|
ive on test
|
|
# $tneg_count --> counts # of uninfected people who register neg\
|
|
ative
|
|
# negative on test
|
|
# $fpos_count --> counts # of uninfected people who register
|
|
# positive on test
|
|
# $fneg_count --> counts # of infected people who register negat\
|
|
ive on test
|
|
#
|
|
# Variables with Values Acculuated Over Many Trials of same Population\
|
|
:
|
|
# --------------------------------------------------------------------\
|
|
-
|
|
# Same as section above, but with form $total_ABOVE --> denoting\
|
|
a
|
|
# running total which is kept. These values are then used to calcu\
|
|
late
|
|
# probabilities:
|
|
#
|
|
# $prob_a, $prob_not_a, $prob_b_given_a, $prob_b_given_not_a
|
|
# and $prob_a_given_b
|
|
#
|
|
srand; # seed the random number generator
|
|
open(IN,($datafile = "prob.data")) or
|
|
die('Program Aborting due to the following situation:
|
|
Can\'t open datafile "' . $datafile . "\":\n$!");
|
|
while ( <IN> ) {
|
|
chomp;
|
|
next if /^#/; # skips "commented" lines
|
|
next if !/\S/g; # skips lines consisting only of whitespace
|
|
# Keeps track of which experiment in the data file we are currently wo\
|
|
rking on.
|
|
$exp_num++;
|
|
|
|
# Clears out variables from the values they obtained in the previous t\
|
|
rial set.
|
|
$total_inf_count = 0;
|
|
$total_tpos_count = 0;
|
|
$total_tneg_count = 0;
|
|
$total_fpos_count = 0;
|
|
$total_fneg_count = 0;
|
|
#
|
|
# My use of eval in the below is how I handle both real-number and fra\
|
|
ctional
|
|
# data in the input file. Be sure to use this technique only where yo\
|
|
u are
|
|
# confident that the source of your datafile is safe. My usage of the\
|
|
split
|
|
# command defines the column-delimitation structure of the datafile (a\
|
|
nd
|
|
# the accompaying lines of description of how to use the datafile).
|
|
#
|
|
foreach $item ( ($trials,$pop,$inf_prob,$tpos_prob,$fpos_prob) = s\
|
|
plit) {
|
|
$item = eval $item;
|
|
}
|
|
#
|
|
# To help smooth out the values derived, I'm running the experiment th\
|
|
rough
|
|
# trials, where the number of trials = $trials is set in the data file\
|
|
.
|
|
#
|
|
for ($j = 0; $j < $trials; $j++) {
|
|
# Clear out variables from values of their previous population trial.
|
|
$inf_count = 0;
|
|
$tpos_count = 0; $tneg_count = 0;
|
|
$fpos_count = 0; $fneg_count = 0;
|
|
|
|
#
|
|
# For each member of the population, determine whether or not that mem\
|
|
ber
|
|
# is infected with the disease, and what kind of test result that memb\
|
|
er
|
|
# yields. Note that I'm not concerned with >= vs. > in my test \
|
|
comparisons,
|
|
# since the magnitude of difference those make compared to the probabi\
|
|
lities
|
|
# of test success or failure are miniscule. Also, watch out for my us\
|
|
e
|
|
# of the trinary ?: operator as a left-operand. In the if statements,
|
|
# I discovered that
|
|
# $variable > rand
|
|
# works better than
|
|
# rand < $variable
|
|
# due to how the Perl parser wants to handle the situation.
|
|
#
|
|
for ($i = 0; $i < $pop; $i++) {
|
|
if ( $inf_prob > rand ) {
|
|
$inf_count++;
|
|
(($tpos_prob > rand)?$tpos_count:$fneg_count) += 1;
|
|
} else {
|
|
(($fpos_prob > rand)?$fpos_count:$tneg_count) += 1;
|
|
}
|
|
}
|
|
|
|
# Keep running total of trial-derived information across all trials wi\
|
|
th
|
|
# this data-set.
|
|
$total_inf_count += $inf_count;
|
|
$total_tpos_count += $tpos_count;
|
|
$total_tneg_count += $tneg_count;
|
|
$total_fpos_count += $fpos_count;
|
|
$total_fneg_count += $fneg_count;
|
|
}
|
|
# Normalize the total counts based on the number of trials that were r\
|
|
un
|
|
$total_inf_count /= $trials;
|
|
$total_tpos_count /= $trials;
|
|
$total_tneg_count /= $trials;
|
|
$total_fpos_count /= $trials;
|
|
$total_fneg_count /= $trials;
|
|
#
|
|
# Compute probabilities from the data collection in the numerical expe\
|
|
riment.
|
|
# Note that our numerical experiment doesn't directly provide some of \
|
|
the
|
|
# P(X) and P(X|Y) values, so these are obtained through simple arithme\
|
|
tic
|
|
# and (in the case of the final quanity) probability theory.
|
|
#
|
|
$prob_a = $total_inf_count / $pop;
|
|
$prob_not_a = 1 - $prob_a;
|
|
$prob_b_given_a = $total_tpos_count / $total_inf_count;
|
|
$prob_b_given_not_a = $total_fpos_count / ($pop - $total_inf_count\
|
|
);
|
|
$prob_a_given_b = ($prob_a * $prob_b_given_a) /
|
|
($prob_a * $prob_b_given_a +
|
|
$prob_not_a * $prob_b_given_not_a) ;
|
|
#
|
|
# Output the results of this experimen to our formatted report.
|
|
#
|
|
&write_form_line($exp_num, $trials, $pop,
|
|
$inf_prob, 1 - $inf_prob, $tpos_prob, $fpos_prob,\
|
|
|
|
($inf_prob * $tpos_prob) /
|
|
($inf_prob * $tpos_prob + (1-$inf_prob) * $fpos_p\
|
|
rob),
|
|
$prob_a, $prob_not_a, $prob_b_given_a,
|
|
$prob_b_given_not_a, $prob_a_given_b);
|
|
}
|
|
close(IN);
|
|
|
|
sub write_form_line {
|
|
|
|
local($experiment, $trial_runs, $population,
|
|
$apA, $apnA, $apBA, $apBnA, $apAB,
|
|
$epA, $epnA, $epBA, $epBnA, $epAB) = @_;
|
|
|
|
write;
|
|
}
|
|
format TOP =
|
|
Analytic and Experimental Results
|
|
from our Population Testing Scenario
|
|
======================================================================\
|
|
=======
|
|
P(A) P(not-A) P(B|A) P(B|not-A) \
|
|
P(A|B)
|
|
======================================================================\
|
|
=======
|
|
.
|
|
format LINE =
|
|
Experiment #:@>>>>>>>>
|
|
$experiment
|
|
Trials Run :@>>>>>>>>
|
|
$trial_runs
|
|
Pop. Size :@>>>>>>>>
|
|
$population
|
|
Analytic Results : @.##### @.##### @.##### @.##### \
|
|
@.#####
|
|
$apA, $apnA, $apBA, $apBnA, $apAB
|
|
Experimental Results : @.##### @.##### @.##### @.##### \
|
|
@.#####
|
|
$epA, $epnA, $epBA, $epBnA, $epAB
|
|
======================================================================\
|
|
=======
|
|
.
|
|
exit 0;
|