source: phptop/phptop @ 374

Last change on this file since 374 was 374, checked in by zerodeux, 11 years ago

phptop 0.5 release

  • Property svn:executable set to *
File size: 12.5 KB
Line 
1#!/usr/bin/perl
2
3# phptop - Analyse quickly system ressource usage across many PHP queries
4# Copyright (C) 2009,2010 Bearstech - http://bearstech.com/
5#
6# This program is free software: you can redistribute it and/or modify
7# it under the terms of the GNU General Public License as published by
8# the Free Software Foundation, either version 3 of the License, or
9# (at your option) any later version.
10#
11# This program is distributed in the hope that it will be useful,
12# but WITHOUT ANY WARRANTY; without even the implied warranty of
13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14# GNU General Public License for more details.
15#
16# You should have received a copy of the GNU General Public License
17# along with this program.  If not, see <http://www.gnu.org/licenses/>.
18
19use strict;
20use warnings;
21use Getopt::Long qw(:config no_auto_abbrev no_ignore_case bundling);
22use HTTP::Date;
23use POSIX qw(setlocale strftime);
24use Term::Size;
25use Sys::Hostname;
26
27my $package_name    = 'phptop';
28my $package_version = '0.5';
29my $package_owner   = 'Copyright (C) 2009,2010 Bearstech - http://bearstech.com/';
30my $package_url     = 'http://forge.bearstech.com/trac/wiki/PhpTop';
31
32# Options
33my @opts = @ARGV;  # Save @ARGV, we let GetOptions() modify it
34my $help;
35my $version;
36my @log;
37my $full_query;
38my $path_only;
39my @sortkeys;
40my $count  = 10;
41my $span   = 10;
42my $output = 'text';
43
44# Globals
45my @keys = qw/hit time user sys mem mem_max/;
46my %stat;
47my $now   = time();
48my $hits  = 0;
49my $bogus = 0;
50
51
52sub print_short_help {
53    print STDERR <<EOF;
54Usage: $package_name [options]
55
56Try '$package_name --help' for more information.
57EOF
58    exit 2;
59}
60
61sub print_help {
62    print STDERR <<EOF;
63Usage: $package_name [options]
64
65Options:
66  -c, --count N        Limit output to top N URLs (default is $count)
67  -f, --full-query     Consider full URL with CGI parameters
68  -h, --help           Display this information
69  -l, --log path       Logfiles to parse, you may use several -l and wildcards
70  -o, --output mode    Output mode: text or html (default is $output);
71  -p, --path-only      Only print path, skip http://host
72  -s, --sort key       Sort key: hit, time, user, sys or mem (default is @sortkeys)
73  -t, --time N         Parse data from now back to N minutes (default is $span)
74  -v, --version        Display version number and copyright info
75
76The -s option can be used multiple times to generate several reports in one go.
77
78Columns explanation:
79  Hit       Number of queries
80  Time      Total wall clock time (in seconds)
81  User      Total CPU time (in seconds)
82  Sys       Total system time (in seconds)
83  Mem/hit   Average allocated memory per query (MB/hit)
84  Mem_max   Maximum allocated memory across queries (MB)
85EOF
86    exit 0;
87}
88
89sub print_version {
90    print "$package_name $package_version\n";
91    exit 0;
92}
93
94# parse_log() is called for every log files and fills up the global %stat hash
95#
96sub parse_log {
97    my $logfile = shift;
98
99    my $lh;
100    my $reverse = 0;
101    my $revmiss = 0;
102
103    # Try to open logfile in 'reverse' mode first, using 'tac'
104    # (trying to parse less lines and hence being much faster)
105    {
106        # It's okay for open(tac|) to fail, and we handle the other open errors explicitly
107        no warnings;
108   
109        if (open($lh, "tac $logfile |")) {
110            $reverse = 1;
111        }
112        elsif (!open($lh, "<$logfile")) {
113            warn "$logfile: $!";
114            return 0;
115        }
116    }
117
118    setlocale(POSIX::LC_NUMERIC, 'C'); # Use . as decimal separator
119
120  LINE:
121    while (<$lh>) {
122        chomp;
123        next if not /\bphptop ([^ ]+) (.*)/;
124        my ($url, $pairs) = ($1, $2);
125
126        # Only keep records within expected time span. If a time stamp is not found or
127        # cannot be parsed, we still process the item.
128        if (/^\[(.*?)\]/) {
129            my $stamp = str2time($1);
130            my $skip = defined $stamp && ($now - $stamp) > $span * 60;
131
132            if ($reverse and $skip) {
133                # Reverse parsing: exit loop when at least 10 out of time records
134                # have been seen (to cope with unordered log entries)
135                $revmiss++;
136                last LINE if $revmiss > 9;
137            }
138            # Forward parsing: skip record until we are in the right time window.
139            # We also obviously skip the record in reverse mode.
140            next LINE if $skip;
141        }
142
143        $pairs =~ s/, referer.*//;     # Apache may append a referer in error.log, ignore
144        $pairs =~ s/(\d),(\d)/$1.$2/g; # Be independent of the PHP locale, normalize on dot decimal separator
145        my @kv = split(/ /, $pairs);
146        next if @kv < 5;               # Protect against bogus/wrapped phptop records, count pairs
147
148        # Fix URLs (depending on options)
149        $url =~ s/index\.php\/?//;  # / == /index.php (99.9% of the time)
150        $url =~ s/\?.*// if not defined $full_query;
151        $url =~ s/^https?:\/\/[^\/]+// if defined $path_only;
152
153        # For each 'key:<numeric val>' sum the numeric value per URL
154        foreach (@kv) {
155            my ($key, $val) = split(/:/, $_);
156
157            # If we have a bogus key/value pair, zap the whole record
158            if ($key eq '' or not defined $val or not $val =~ /^[0-9.]+$/) {
159                delete $stat{$url};
160                $bogus++;
161                next LINE;
162            }
163
164            $stat{$url}{$key} += $val;
165
166            # Compute max for mem values
167            if ($key eq 'mem') {
168                $stat{$url}{'mem_max'} = $val if ($stat{$url}{'mem_max'} || 0) < $val;
169            }
170        }
171
172        $stat{$url}{'hit'}++;
173        $hits++;
174    }
175
176    close($lh);
177    1;
178}
179
180# fix_stat() is called once all log files have been parsed and only once,
181# and modifies a few values in %stat for a useful display.
182#
183sub fix_stat {
184    while (my ($url, $i) = each %stat) {
185        # Convert 'mem' values from total to 'per hit' average, more meaningful.
186        # Also scale memory values from bytes to MB.
187        $i->{'mem'} /= $i->{'hit'} * 2**20;
188        $i->{'mem_max'} /= 2**20;
189    }
190}
191
192# raw_report() compute the tabular report which is then rendered via another routine.
193#
194sub raw_report {
195    my $sortkey = shift;
196
197    my @headers = map {
198        my $h = $_;
199        $h = "$h/hit" if /^mem$/;  # Mem shows per-hit average values
200        ucfirst $h;
201    } @keys;
202    unshift(@headers, 'URL');
203
204    my @sumkeys = qw/hit time user sys/;  # We compute totals for these keys
205
206    # Sort queries according to $sortkey (they are all numeric)
207    my @rows;
208    my %sum;
209    foreach my $url (sort { $stat{$b}{$sortkey} <=> $stat{$a}{$sortkey} } keys %stat) {
210        my $s = $stat{$url};
211        $sum{$_} += $s->{$_} foreach @sumkeys;
212
213        # Continue to loop and only sum totals even if $count records have been rendered
214        next if @rows >= $count;
215
216        my @cells = map { sprintf($_ =~ m/hit/ ? '%d' : '%.1f', $s->{$_}) } @keys;
217        push(@rows, [ $url, @cells]);
218    }
219
220    my @sums = map { defined $sum{$_} ? sprintf($_ =~ m/hit/ ? '%d' : '%.1f', $sum{$_}) : '' } @keys;
221    unshift(@sums, "Total (from last $span min)");
222
223    return { headers => \@headers, rows => \@rows, sums => \@sums };
224}
225
226# text_report() pretty prints in a terminal raw_report() result
227#
228sub text_report {
229    # Compute all reports at first, we'll align all report columns
230    my %reports;
231    $reports{$_} = raw_report($_) foreach @sortkeys;
232
233    # Limit URL column width, depending on output available columns
234    my $cols  = $ENV{COLUMNS};
235    $cols ||= Term::Size::chars;
236    my $colmax = $cols - 1 - 9*@keys;
237    die "Terminal width to short, try cheating with COLUMNS env var." if $colmax < 16;
238
239    # Compute URL column width
240    my $width = 0;
241    foreach my $r (values %reports) {
242        foreach (@{$r->{rows}}) {
243            my $w = length($_->[0]);
244            $width = $w if $w > $width;
245        }
246    }
247    $width = $colmax if $width > $colmax;
248
249    my $report_nb = 0;
250    foreach my $key (@sortkeys) {
251        my $r = $reports{$key};
252
253        # Table separator (starting from 2nd report)
254        print "\n" if $report_nb++;
255   
256        # Table header
257        my $h = $r->{headers};
258        map { $_ = ">$_" if /^$key(\/hit)?$/i } @$h;
259        printf("%-${width}s %s\n", shift(@$h), join(' ', map { sprintf("%8s", $_) } @$h));
260
261        # Rows
262        foreach my $cell (@{$r->{rows}}) {
263            printf("%-${width}s %s\n", shift(@$cell), join(' ', map { sprintf("%8s", $_) } @$cell));
264        }
265
266        # Sums, totals
267        my $f = $r->{sums};
268        printf("%-${width}s %s\n", shift(@$f), join(' ', map { sprintf("%8s", $_) } @$f));
269    }
270}
271
272# html_report() renders a report from the inlined template in DATA section
273#
274sub html_report {
275    my $host = hostname();
276    my $date = strftime('%Y-%m-%d %H:%M', localtime());
277    my $args = join(' ', @opts);
278
279    my $rows;
280    foreach my $key (@sortkeys) {
281        my $r = raw_report($key);
282
283        my $header_class = sub {
284            shift;
285            (/^URL/ ? ' url' : '').
286            (/^$key(\/hit)?$/i ? ' sortby' : '')
287        };
288        $rows .= html_report_row('th', $header_class, $r->{headers});
289
290        my $row_class = sub { shift; /^[0-9.]*$/ ? '' : ' url' };
291        $rows .= html_report_row('td', $row_class, $_) foreach @{$r->{rows}};
292
293        my $sum_class = sub { shift; ' sum'.(/^[0-9.]*$/ ? '' : ' url') };
294        $rows .= html_report_row('td', $sum_class, $r->{sums});
295    }
296    chomp($rows);
297
298    # Tried with HTML::Template but it was plain ugly to implement the magic
299    # in html_report_row with proper HTML/logic separation.
300    my %param = (
301        TITLE  => "phptop\@$host",
302        ROWS   => $rows,
303        FOOTER => "Generated by <a href=\"$package_url\">$package_name $package_version</a>".
304                  " from <em>$host</em> on $date".
305                  ($args ne "" ? " (options: <code>$args</code>)": ""),
306    );
307    my $tmpl = do { local($/); <DATA> };
308    $tmpl =~  s/::(\w+)/$param{$1}/g;
309    print $tmpl;
310}
311sub html_report_row {
312    my ($tag, $classfunc, $cells) = @_;
313
314    my $row = "<tr>\n";
315    foreach (@$cells) {
316        my $class = $classfunc->($_);    # Ask for classes
317        $class =~ s/^ +//;               # Put up with ugly concatenation
318        $class &&= " class=\"$class\"";  # Only set class if some is set
319        $row .= "  <$tag$class>$_</$tag>\n";
320    }
321    $row .= "</tr>\n";
322}
323
324
325# Main
326
327GetOptions(
328    'c|count=i'      => \$count,
329    'f|full-query'   => \$full_query,
330    'h|help'         => \$help,
331    'l|log=s'        => \@log,
332    'o|output=s'     => \$output,
333    'p|path'         => \$path_only,
334    's|sort=s'       => \@sortkeys,
335    't|time=i'       => \$span,
336    'v|version'      => \$version,
337)
338or print_short_help();
339
340@sortkeys = ('hit') if not @sortkeys;
341foreach my $k (@sortkeys) {
342  next if grep { $_ eq $k } @keys;
343  print STDERR "Unknown sort key '$k'.\n";
344  exit 1;
345}
346
347if (not $output =~ /^text|html$/) {
348  print STDERR "Unknown output mode '$output'.\n";
349  exit 1;
350}
351
352print_help() if $help;
353print_version() if $version;
354
355my @logfiles;
356push(@log, '/var/log/apache2/error*log', '/var/log/apache2/*/error*log') if !@log;
357map { push(@logfiles, glob($_)) } @log;
358
359my $parsed = 0;
360$parsed += parse_log($_) foreach @logfiles;
361
362if ($parsed == 0) {
363    print STDERR "Error: no log files found/processed. Tried: ".join(", ", @log)."\n";
364    exit 2;
365}
366if ($hits == 0) {
367    print STDERR "No phptop records found.\n";
368    exit 0;
369}
370if ($bogus >= $hits * .05) {
371    print STDERR "Warning: you have more than 5% malformed records ($bogus out of $hits).\n";
372}
373
374fix_stat();
375text_report() if $output eq 'text';
376html_report() if $output eq 'html';
377
378
379__DATA__
380<?xml version="1.0" encoding="UTF-8"?>
381<!DOCTYPE html
382     PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
383    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
384<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
385  <head>
386    <title>::TITLE</title>
387    <style type="text/css">
388      table.stat { border-collapse: collapse; border-spacing: 0 }
389      .stat th { border-bottom: solid black 1px }
390      .stat th, .stat td { padding: .15em .25em .15em .75em; text-align: right; border-left: solid #ccc 1px }
391      .stat th.url, .stat td.url { padding: .15em .5em .15em 0; text-align: left; border-left: none }
392      .stat th.sortby { background-color: black; color: white }
393      .stat td.sum { font-weight: bold; padding-bottom: 1em }
394      .stat tr:hover td { background-color: #eee }
395      .stat a, .stat a:visited { color: black; text-decoration: none }
396      #footer, #footer a, #footer a:visited { color: #aaa; margin-top: 1em }
397      #footer em { font-style: normal; font-weight: bold }
398    </style>
399  </head>
400  <body>
401    <table class="stat">
402::ROWS
403    </table>
404    <p id="footer">::FOOTER</p>
405  </body>
406</html>
Note: See TracBrowser for help on using the repository browser.