summaryrefslogtreecommitdiff
path: root/lib/Benchmark/Timer.pm
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Benchmark/Timer.pm')
-rwxr-xr-xlib/Benchmark/Timer.pm618
1 files changed, 618 insertions, 0 deletions
diff --git a/lib/Benchmark/Timer.pm b/lib/Benchmark/Timer.pm
new file mode 100755
index 0000000..624289b
--- /dev/null
+++ b/lib/Benchmark/Timer.pm
@@ -0,0 +1,618 @@
+# ========================================================================
+# Benchmark::Timer - Perl code benchmarking tool
+# David Coppit <david@coppit.org>
+#
+# This program contains embedded documentation in Perl POD (Plain Old
+# Documentation) format. Search for the string "=head1" in this document
+# to find documentation snippets, or use "perldoc" to read it; utilities
+# like "pod2man" and "pod2html" can reformat as well.
+#
+# Copyright(c) 2004 David Coppit
+# Copyright(c) 2000-2001 Andrew Ho.
+#
+# ========================================================================
+
+=head1 NAME
+
+Benchmark::Timer - Benchmarking with statistical confidence
+
+=head1 SYNOPSIS
+
+ # Non-statistical usage
+ use Benchmark::Timer;
+ $t = Benchmark::Timer->new(skip => 1);
+
+ for(1 .. 1000) {
+ $t->start('tag');
+ &long_running_operation();
+ $t->stop('tag');
+ }
+ print $t->report;
+
+ # --------------------------------------------------------------------
+
+ # Statistical usage
+ use Benchmark::Timer;
+ $t = Benchmark::Timer->new(skip => 1, confidence => 97.5, error => 2);
+
+ while($t->need_more_samples('tag')) {
+ $t->start('tag');
+ &long_running_operation();
+ $t->stop('tag');
+ }
+ print $t->report;
+
+=head1 DESCRIPTION
+
+The Benchmark::Timer class allows you to time portions of code
+conveniently, as well as benchmark code by allowing timings of repeated
+trials. It is perfect for when you need more precise information about the
+running time of portions of your code than the Benchmark module will give
+you, but don't want to go all out and profile your code.
+
+The methodology is simple; create a Benchmark::Timer object, and wrap portions
+of code that you want to benchmark with C<start()> and C<stop()> method calls.
+You can supply a tag to those methods if you plan to time multiple portions of
+code. If you provide error and confidence values, you can also use
+C<need_more_samples()> to determine, statistically, whether you need to
+collect more data.
+
+After you have run your code, you can obtain information about the running
+time by calling the C<results()> method, or get a descriptive benchmark report
+by calling C<report()>. If you run your code over multiple trials, the
+average time is reported. This is wonderful for benchmarking time-critical
+portions of code in a rigorous way. You can also optionally choose to skip any
+number of initial trials to cut down on initial case irregularities.
+
+=head1 METHODS
+
+In all of the following methods, C<$tag> refers to the user-supplied name of
+the code being timed. Unless otherwise specified, $tag defaults to the tag of
+the last call to C<start()>, or "_default" if C<start()> was not previously
+called with a tag.
+
+=over 4
+
+=cut
+
+
+# ------------------------------------------------------------------------
+# Package setup
+
+package Benchmark::Timer;
+require 5.005;
+use strict;
+
+use Carp;
+use Time::HiRes qw( gettimeofday tv_interval );
+
+use vars qw($VERSION);
+$VERSION = sprintf "%d.%02d%02d", q/0.71.0/ =~ /(\d+)/g;
+
+use constant BEFORE => 0;
+use constant ELAPSED => 1;
+use constant LASTTAG => 2;
+use constant TAGS => 3;
+use constant SKIP => 4;
+use constant MINIMUM => 5;
+use constant SKIPCOUNT => 6;
+use constant CONFIDENCE => 7;
+use constant ERROR => 8;
+use constant STAT => 9;
+
+# ------------------------------------------------------------------------
+# Constructor
+
+=item $t = Benchmark::Timer->new( [options] );
+
+Constructor for the Benchmark::Timer object; returns a reference to a
+timer object. Takes the following named arguments:
+
+=over 4
+
+=item skip
+
+The number of trials (if any) to skip before recording timing information.
+
+=item minimum
+
+The minimum number of trials to run.
+
+=item error
+
+A percentage between 0 and 100 which indicates how much error you are willing
+to tolerate in the average time measured by the benchmark. For example, a
+value of 1 means that you want the reported average time to be within 1% of
+the real average time. C<need_more_samples()> will use this value to determine
+when it is okay to stop collecting data.
+
+If you specify an error you must also specify a confidence.
+
+=item confidence
+
+A percentage between 0 and 100 which indicates how confident you want to be in
+the error measured by the benchmark. For example, a value of 97.5 means that
+you want to be 97.5% confident that the real average time is within the error
+margin you have specified. C<need_more_samples()> will use this value to
+compute the estimated error for the collected data, so that it can determine
+when it is okay to stop.
+
+If you specify a confidence you must also specify an error.
+
+=back
+
+=cut
+
+sub new {
+ my $class = shift;
+ my $self = [];
+ bless $self, $class;
+ return $self->reset(@_);
+}
+
+
+# ------------------------------------------------------------------------
+# Public methods
+
+=item $t->reset;
+
+Reset the timer object to the pristine state it started in.
+Erase all memory of tags and any previously accumulated timings.
+Returns a reference to the timer object. It takes the same arguments
+the constructor takes.
+
+=cut
+
+sub reset {
+ my $self = shift;
+ my %args = @_;
+
+ $self->[BEFORE] = {}; # [ gettimeofday ] storage
+ $self->[ELAPSED] = {}; # elapsed fractional seconds
+ $self->[LASTTAG] = undef; # what the last tag was
+ $self->[TAGS] = []; # keep list of tags in order seen
+ $self->[SKIP] = 0; # how many samples to skip
+ $self->[MINIMUM] = 1; # the minimum number of trails to run
+ $self->[SKIPCOUNT] = {}; # trial skip storage
+ delete $self->[CONFIDENCE]; # confidence factor
+ delete $self->[ERROR]; # allowable error
+ delete $self->[STAT]; # stat objects for each tag
+
+ if(exists $args{skip}) {
+ croak 'argument skip must be a non-negative integer'
+ unless defined $args{skip}
+ and $args{skip} !~ /\D/
+ and int $args{skip} == $args{skip};
+ $self->[SKIP] = $args{skip};
+ delete $args{skip};
+ }
+
+ if(exists $args{minimum}) {
+ croak 'argument minimum must be a non-negative integer'
+ unless defined $args{minimum}
+ and $args{minimum} !~ /\D/
+ and int $args{minimum} == $args{minimum};
+ croak 'argument minimum must greater than or equal to skip'
+ unless defined $args{minimum}
+ and $args{minimum} >= $self->[SKIP];
+ $self->[MINIMUM] = $args{minimum};
+ delete $args{minimum};
+ }
+
+ my $confidence_is_valid =
+ (defined $args{confidence}
+ and $args{confidence} =~ /^\d*\.?\d*$/
+ and $args{confidence} > 0
+ and $args{confidence} < 100);
+
+ my $error_is_valid =
+ (defined $args{error}
+ and $args{error} =~ /^\d*\.?\d*$/
+ and $args{error} > 0
+ and $args{error} < 100);
+
+ if ($confidence_is_valid && !$error_is_valid ||
+ !$confidence_is_valid && $error_is_valid)
+ {
+ carp 'you must specify both confidence and error'
+ }
+ elsif ($confidence_is_valid && $error_is_valid)
+ {
+ $self->[CONFIDENCE] = $args{confidence};
+ delete $args{confidence};
+
+ $self->[ERROR] = $args{error};
+ delete $args{error};
+
+ # Demand load the module we need. We could just
+ # require people to install it...
+ croak 'Could not load the Statistics::PointEstimation module'
+ unless eval "require Statistics::PointEstimation";
+ }
+
+ if(%args) {
+ carp 'skipping unknown arguments';
+ }
+
+ return $self;
+}
+
+
+=item $t->start($tag);
+
+Record the current time so that when C<stop()> is called, we can calculate an
+elapsed time.
+
+=cut
+
+# In this routine we try hard to make the [ gettimeofday ] take place
+# as late as possible to minimize Heisenberg problems. :)
+
+sub start {
+ my $self = shift;
+ my $tag = shift || $self->[LASTTAG] || '_default';
+ $self->[LASTTAG] = $tag;
+ if(exists $self->[SKIPCOUNT]->{$tag}) {
+ if($self->[SKIPCOUNT]->{$tag} > 1) {
+ $self->[SKIPCOUNT]->{$tag}--;
+ } else {
+ $self->[SKIPCOUNT]->{$tag} = 0;
+ push @{$self->[BEFORE]->{$tag}}, [ gettimeofday ];
+ }
+ } else {
+ push @{$self->[TAGS]}, $tag;
+ $self->[SKIPCOUNT]->{$tag} = $self->[SKIP] + 1;
+ if($self->[SKIPCOUNT]->{$tag} > 1) {
+ $self->[SKIPCOUNT]->{$tag}--;
+ } else {
+ $self->[SKIPCOUNT]->{$tag} = 0;
+ $self->[BEFORE]->{$tag} = [ [ gettimeofday ] ]
+ }
+ }
+}
+
+
+=item $t->stop($tag);
+
+Record timing information. If $tag is supplied, it must correspond to one
+given to a previously called C<start()> call. It returns the elapsed time in
+milliseconds. C<stop()> croaks if the timer gets out of sync (e.g. the number
+of C<start()>s does not match the number of C<stop()>s.)
+
+=cut
+
+sub stop {
+ my $after = [ gettimeofday ]; # minimize overhead
+ my $self = shift;
+ my $tag = shift || $self->[LASTTAG] || '_default';
+
+ croak 'must call $t->start($tag) before $t->stop($tag)'
+ unless exists $self->[SKIPCOUNT]->{$tag};
+
+ return if $self->[SKIPCOUNT]->{$tag} > 0;
+
+ my $i = exists $self->[ELAPSED]->{$tag} ?
+ scalar @{$self->[ELAPSED]->{$tag}} : 0;
+ my $before = $self->[BEFORE]->{$tag}->[$i];
+ croak 'timer out of sync' unless defined $before;
+
+ # Create a stats object if we need to
+ if (defined $self->[CONFIDENCE] && !defined $self->[STAT]->{$tag})
+ {
+ $self->[STAT]->{$tag} = Statistics::PointEstimation->new;
+ $self->[STAT]->{$tag}->set_significance($self->[CONFIDENCE]);
+ }
+
+ my $elapsed = tv_interval($before, $after);
+
+ if($i > 0) {
+ push @{$self->[ELAPSED]->{$tag}}, $elapsed;
+ } else {
+ $self->[ELAPSED]->{$tag} = [ $elapsed ];
+ }
+
+ $self->[STAT]->{$tag}->add_data($elapsed)
+ if defined $self->[STAT]->{$tag};
+
+ return $elapsed;
+}
+
+
+=item $t->need_more_samples($tag);
+
+Compute the estimated error in the average of the data collected thus far, and
+return true if that error exceeds the user-specified error. If a $tag is
+supplied, it must correspond to one given to a previously called C<start()>
+call.
+
+This routine assumes that the data are normally distributed.
+
+=cut
+
+sub need_more_samples {
+ my $self = shift;
+ my $tag = shift || $self->[LASTTAG] || '_default';
+
+ carp 'You must set the confidence and error in order to use need_more_samples'
+ unless defined $self->[CONFIDENCE];
+
+ # In case this function is called before any trials are run
+ return 1
+ if !defined $self->[STAT]->{$tag} ||
+ $self->[STAT]->{$tag}->count < $self->[MINIMUM];
+
+ # For debugging
+# printf STDERR "Average: %.5f +/- %.5f, Samples: %d\n",
+# $self->[STAT]->{$tag}->mean(), $self->[STAT]->{$tag}->delta(),
+# $self->[STAT]->{$tag}->count;
+# printf STDERR "Percent Error: %.5f > %.5f\n",
+# $self->[STAT]->{$tag}->delta() / $self->[STAT]->{$tag}->mean() * 100,
+# $self->[ERROR];
+
+ return (($self->[STAT]->{$tag}->delta() / $self->[STAT]->{$tag}->mean() * 100) >
+ $self->[ERROR]);
+}
+
+
+=item $t->report($tag);
+
+Returns a string containing a simple report on the collected timings for $tag.
+This report contains the number of trials run, the total time taken, and, if
+more than one trial was run, the average time needed to run one trial and
+error information. C<report()> will complain (via a warning) if a tag is
+still active.
+
+=cut
+
+sub report {
+ my $self = shift;
+ my $tag = shift || $self->[LASTTAG] || '_default';
+
+ unless(exists $self->[ELAPSED]->{$tag}) {
+ carp join ' ', 'tag', $tag, 'still running';
+ return;
+ }
+
+ return $self->_report($tag);
+}
+
+
+=item $t->reports;
+
+In a scalar context, returns a string containing a simple report on the
+collected timings for all tags. The report is a concatenation of the
+individual tag reports, in the original tag order. In an list context, returns
+a hash keyed by tag and containing reports for each tag. The return value is
+actually an array, so that the original tag order is preserved if you assign
+to an array instead of a hash. C<reports()> will complain (via a warning) if a
+tag is still active.
+
+
+=cut
+
+sub reports {
+ my $self = shift;
+
+ if (wantarray)
+ {
+ my @reports;
+
+ foreach my $tag (@{$self->[TAGS]}) {
+ push @reports, $tag;
+ push @reports, $self->report($tag);
+ }
+
+ return @reports;
+ }
+ else
+ {
+ my $report = '';
+
+ foreach my $tag (@{$self->[TAGS]}) {
+ $report .= $self->report($tag);
+ }
+
+ return $report;
+ }
+}
+
+
+sub _report {
+ my $self = shift;
+ my $tag = shift;
+
+ unless(exists $self->[ELAPSED]->{$tag}) {
+ return "Tag $tag is still running or has not completed its skipped runs, skipping\n";
+ }
+
+ my $report = '';
+
+ my @times = @{$self->[ELAPSED]->{$tag}};
+ my $n = scalar @times;
+ my $total = 0; $total += $_ foreach @times;
+
+ if ($n == 1)
+ {
+ $report .= sprintf "\%d trial of \%s (\%s total)\n",
+ $n, $tag, timestr($total);
+ }
+ else
+ {
+ $report .= sprintf "\%d trials of \%s (\%s total), \%s/trial\n",
+ $n, $tag, timestr($total), timestr($total / $n);
+ }
+
+ if (defined $self->[STAT]->{$tag})
+ {
+ my $delta = 0;
+ $delta = $self->[STAT]->{$tag}->delta()
+ if defined $self->[STAT]->{$tag}->delta();
+
+ $report .= sprintf "Error: +/- \%.5f with \%s confidence\n",
+ $delta, $self->[CONFIDENCE];
+ }
+
+ return $report;
+}
+
+
+=item $t->result($tag);
+
+Return the time it took for $tag to elapse, or the mean time it took for $tag
+to elapse once, if $tag was used to time code more than once. C<result()> will
+complain (via a warning) if a tag is still active.
+
+=cut
+
+sub result {
+ my $self = shift;
+ my $tag = shift || $self->[LASTTAG] || '_default';
+ unless(exists $self->[ELAPSED]->{$tag}) {
+ carp join ' ', 'tag', $tag, 'still running';
+ return;
+ }
+ my @times = @{$self->[ELAPSED]->{$tag}};
+ my $total = 0; $total += $_ foreach @times;
+ return $total / @times;
+}
+
+
+=item $t->results;
+
+Returns the timing data as a hash keyed on tags where each value is
+the time it took to run that code, or the average time it took,
+if that code ran more than once. In scalar context it returns a reference
+to that hash. The return value is actually an array, so that the original
+tag order is preserved if you assign to an array instead of a hash.
+
+=cut
+
+sub results {
+ my $self = shift;
+ my @results;
+ foreach my $tag (@{$self->[TAGS]}) {
+ push @results, $tag;
+ push @results, $self->result($tag);
+ }
+ return wantarray ? @results : \@results;
+}
+
+
+=item $t->data($tag), $t->data;
+
+These methods are useful if you want to recover the full internal timing
+data to roll your own reports.
+
+If called with a $tag, returns the raw timing data for that $tag as
+an array (or a reference to an array if called in scalar context). This is
+useful for feeding to something like the Statistics::Descriptive package.
+
+If called with no arguments, returns the raw timing data as a hash keyed
+on tags, where the values of the hash are lists of timings for that
+code. In scalar context, it returns a reference to that hash. As with
+C<results()>, the data is internally represented as an array so you can
+recover the original tag order by assigning to an array instead of a hash.
+
+=cut
+
+sub data {
+ my $self = shift;
+ my $tag = shift;
+ my @results;
+ if($tag) {
+ if(exists $self->[ELAPSED]->{$tag}) {
+ @results = @{$self->[ELAPSED]->{$tag}};
+ } else {
+ @results = ();
+ }
+ } else {
+ @results = map { ( $_ => $self->[ELAPSED]->{$_} || [] ) }
+ @{$self->[TAGS]};
+ }
+ return wantarray ? @results : \@results;
+}
+
+
+# ------------------------------------------------------------------------
+# Internal utility subroutines
+
+# timestr($sec) takes a floating-point number of seconds and formats
+# it in a sensible way, commifying large numbers of seconds, and
+# converting to milliseconds if it makes sense. Since Time::HiRes has
+# at most microsecond resolution, no attempt is made to convert into
+# anything below that. A unit string is appended to the number.
+
+sub timestr {
+ my $sec = shift;
+ my $retstr;
+ if($sec >= 1_000) {
+ $retstr = commify(int $sec) . 's';
+ } elsif($sec >= 1) {
+ $retstr = sprintf $sec == int $sec ? '%ds' : '%0.3fs', $sec;
+ } elsif($sec >= 0.001) {
+ my $ms = $sec * 1_000;
+ $retstr = sprintf $ms == int $ms ? '%dms' : '%0.3fms', $ms;
+ } elsif($sec >= 0.000001) {
+ $retstr = sprintf '%dus', $sec * 1_000_000;
+ } else {
+ # I'll have whatever real-time OS she's having
+ $retstr = $sec . 's';
+ }
+ $retstr;
+}
+
+
+# commify($num) inserts a grouping comma according to en-US standards
+# for numbers larger than 1000. For example, the integer 123456 would
+# be written 123,456. Any fractional part is left untouched.
+
+sub commify {
+ my $num = shift;
+ return unless $num =~ /\d/;
+ return $num if $num < 1_000;
+
+ my $ip = int $num;
+ my($fp) = ($num =~ /\.(\d+)/);
+
+ $ip =~ s/(\d\d\d)$/,$1/;
+ 1 while $ip =~ s/(\d)(\d\d\d),/$1,$2,/;
+
+ return $fp ? join '.', $ip, $fp : $ip;
+}
+
+
+# ------------------------------------------------------------------------
+# Finish up the POD.
+
+=back
+
+=head1 BUGS
+
+Benchmarking is an inherently futile activity, fraught with uncertainty
+not dissimilar to that experienced in quantum mechanics. But things are a
+little better if you apply statistics.
+
+=head1 SEE ALSO
+
+L<Benchmark>, L<Time::HiRes>, L<Time::Stopwatch>, L<Statistics::Descriptive>
+
+=head1 AUTHOR
+
+The original code (written before April 20, 2001) was written by Andrew Ho
+E<lt>andrew@zeuscat.comE<gt>, and is copyright (c) 2000-2001 Andrew Ho.
+Versions up to 0.5 are distributed under the same terms as Perl.
+
+Maintenance of this module is now being done by David Coppit
+E<lt>david@coppit.orgE<gt>.
+
+=cut
+
+
+# ------------------------------------------------------------------------
+# Return true for a valid Perl include
+
+1;
+
+
+# ========================================================================
+__END__