diff options
-rwxr-xr-x | collectstats.pl | 15 | ||||
-rw-r--r-- | defparams.pl | 8 | ||||
-rwxr-xr-x | duplicates.cgi | 213 |
3 files changed, 147 insertions, 89 deletions
diff --git a/collectstats.pl b/collectstats.pl index e645165e1..d6a97e4e2 100755 --- a/collectstats.pl +++ b/collectstats.pl @@ -24,7 +24,7 @@ # Run me out of cron at midnight to collect Bugzilla statistics. -use DB_File; +use AnyDBM_File; use diagnostics; use strict; use vars @::legal_product; @@ -125,16 +125,16 @@ sub calculate_dupes { my $key; my $changed = 1; - my $today = &today; + my $today = &today_dash; # Save % count here in a date-named file # so we can read it back in to do changed counters # First, delete it if it exists, so we don't add to the contents of an old file - if (-e "data/mining/dupes$today.db") { - system("rm -f data/mining/dupes$today.db"); + if (-e "data/mining/dupes$today") { + system("rm -f data/mining/dupes$today"); } - dbmopen(%count, "data/mining/dupes$today.db", 0644) || die "Can't open DBM dupes file: $!"; + dbmopen(%count, "data/mining/dupes$today", 0644) || die "Can't open DBM dupes file: $!"; # Create a hash with key "a bug number", value "bug which that bug is a # direct dupe of" - straight from the duplicates table. @@ -194,3 +194,8 @@ sub today { return sprintf "%04d%02d%02d", 1900 + $year, ++$mon, $dom; } +sub today_dash { + my ($dom, $mon, $year) = (localtime(time))[3, 4, 5]; + return sprintf "%04d-%02d-%02d", 1900 + $year, ++$mon, $dom; +} + diff --git a/defparams.pl b/defparams.pl index c384eb973..ff60338b0 100644 --- a/defparams.pl +++ b/defparams.pl @@ -120,10 +120,6 @@ sub check_shadowdb { # t -- A short text entry field (suitable for a single line) # l -- A long text field (suitable for many lines) # b -- A boolean value (either 1 or 0) -# i -- An integer. -# defenum -- This param defines an enum that defines a column in one of -# the database tables. The name of the parameter is of the form -# "tablename.columnname". DefParam("maintainer", "The email address of the person who maintains this installation of Bugzilla.", @@ -334,6 +330,10 @@ additional data you may have.</li> <br> }); +DefParam("mostfreqthreshold", + "The minimum number of duplicates a bug needs to show up on the <A HREF=\"duplicates.cgi\">most frequently reported bugs page</a>. If you have a large database and this page takes a long time to load, try increasing this number.", + "t", + "2"); DefParam("mybugstemplate", "This is the URL to use to bring up a simple 'all of my bugs' list for a user. %userid% will get replaced with the login name of a user.", diff --git a/duplicates.cgi b/duplicates.cgi index 6bb4e20ab..03a366763 100755 --- a/duplicates.cgi +++ b/duplicates.cgi @@ -25,82 +25,119 @@ use diagnostics; use strict; use CGI "param"; -use DB_File; +use AnyDBM_File; require "globals.pl"; require "CGI.pl"; -ConnectToDatabase(); +ConnectToDatabase(1); GetVersionTable(); +my %dbmcount; my %count; my $dobefore = 0; my $before = ""; my %before; -my $changedsince; -my $maxrows = 500; # arbitrary limit on max number of rows +# Get params from URL -my $today = &days_ago(0); +my $changedsince = 7; # default one week +my $maxrows = 100; # arbitrary limit on max number of rows +my $sortby = "dup_count"; # default to sorting by dup count -# Open today's record of dupes -if (-e "data/mining/dupes$today.db") -{ - dbmopen(%count, "data/mining/dupes${today}.db", 0644) || die "Can't open today's dupes file: $!"; -} -else +if (defined(param("sortby"))) { - # Try yesterday's, then (in case today's hasn't been created yet) :-) - $today = &days_ago(1); - if (-e "data/mining/dupes$today.db") - { - dbmopen(%count, "data/mining/dupes${today}.db", 0644) || die "Can't open yesterday's dupes file: $!"; - } - else - { - die "There are no duplicate statistics for today or yesterday."; - } + $sortby = param("sortby"); } # Check for changedsince param, and see if it's a positive integer if (defined(param("changedsince")) && param("changedsince") =~ /^\d{1,4}$/) { - $changedsince = param("changedsince"); + $changedsince = param("changedsince"); } -else + +# check for max rows param, and see if it's a positive integer +if (defined(param("maxrows")) && param("maxrows") =~ /^\d{1,4}$/) { - # Otherwise, default to one week - $changedsince = "7"; + $maxrows = param("maxrows"); } -$before = &days_ago($changedsince); +# Start the page +print "Content-type: text/html\n"; +print "\n"; +PutHeader("Most Frequently Reported Bugs"); -# check for max rows parameter -if (defined(param("maxrows")) && param("maxrows") =~ /^\d{1,4}$/) +# Open today's record of dupes +my $today = &days_ago(0); + +if (-e "data/mining/dupes$today.db") { - $maxrows = param("maxrows"); + dbmopen(%dbmcount, "data/mining/dupes$today", 0644) || + &die_politely("Can't open today's dupes file: $!"); } +else +{ + # Try yesterday's, then (in case today's hasn't been created yet) + $today = &days_ago(1); + if (-e "data/mining/dupes$today.db") + { + dbmopen(%dbmcount, "data/mining/dupes$today", 0644) || + &die_politely("Can't open yesterday's dupes file: $!"); + } + else + { + &die_politely("There are no duplicate statistics for today or yesterday."); + } +} + +# Copy hash (so we don't mess up the on-disk file when we remove entries) +%count = %dbmcount; +my $key; +my $value; +my $threshold = Param("mostfreqthreshold"); -if (-e "data/mining/dupes${before}.db") +# Remove all those dupes under the threshold (for performance reasons) +while (($key, $value) = each %count) { - dbmopen(%before, "data/mining/dupes${before}.db", 0644) && ($dobefore = 1); + if ($value < $threshold) + { + delete $count{$key}; + } } -print "Content-type: text/html\n"; -print "\n"; -PutHeader("Most Frequently Reported Bugs"); +# Try and open the database from "changedsince" days ago +$before = &days_ago($changedsince); + +if (-e "data/mining/dupes$before.db") +{ + dbmopen(%before, "data/mining/dupes$before", 0644) && ($dobefore = 1); +} print Param("mostfreqhtml"); print " <table BORDER> - <tr BGCOLOR=\"#CCCCCC\"> -<td><center><b>Bug #</b></center></td> -<td><center><b>Dupe<br>Count</b></center></td>\n"; + +<td><center><b> +<a href=\"duplicates.cgi?sortby=bug_no&maxrows=$maxrows&changedsince=$changedsince\">Bug #</a> +</b></center></td> +<td><center><b> +<a href=\"duplicates.cgi?sortby=dup_count&maxrows=$maxrows&changedsince=$changedsince\">Dupe<br>Count</a> +</b></center></td>\n"; + +my %delta; if ($dobefore) { - print "<td><center><b>Change in last<br>$changedsince day(s)</b></center></td> "; + print "<td><center><b> + <a href=\"duplicates.cgi?sortby=delta&maxrows=$maxrows&changedsince=$changedsince\">Change in + last<br>$changedsince day(s)</a></b></center></td>"; + + # Calculate the deltas if we are doing a "before" + foreach (keys(%count)) + { + $delta{$_} = $count{$_} - $before{$_}; + } } print " @@ -111,78 +148,94 @@ print " <td><center><b>Summary</b></center></td> </tr>\n\n"; -my %delta; +# Sort, if required +my @sortedcount; -# Calculate the deltas if we are doing a "before" -if ($dobefore) +if ($sortby eq "delta") { - foreach (keys(%count)) - { - $delta{$_} = $count{$_} - $before{$_}; - } + @sortedcount = sort by_delta keys(%count); } - -# Offer the option of sorting on total count, or on the delta -my @sortedcount; - -if (defined(param("sortby")) && param("sortby") == "delta") +elsif ($sortby eq "bug_no") { - @sortedcount = sort by_delta keys(%count); + @sortedcount = sort by_bug_no keys(%count); } -else +elsif ($sortby eq "dup_count") { - @sortedcount = sort by_dup_count keys(%count); + @sortedcount = sort by_dup_count keys(%count); } my $i = 0; foreach (@sortedcount) { - my $id = $_; - SendSQL("SELECT component, bug_severity, op_sys, target_milestone, short_desc, groupset " . + my $id = $_; + SendSQL("SELECT component, bug_severity, op_sys, target_milestone, short_desc, groupset " . " FROM bugs WHERE bug_id = $id"); - my ($component, $severity, $op_sys, $milestone, $summary, $groupset) = FetchSQLData(); + my ($component, $severity, $op_sys, $milestone, $summary, $groupset) = FetchSQLData(); next unless $groupset == 0; $summary = html_quote($summary); - print "<tr>"; - print '<td><center><A HREF="show_bug.cgi?id=' . $id . '">'; - print $id . "</A></center></td>"; - print "<td><center>$count{$id}</center></td>"; - if ($dobefore) - { - print "<td><center>$delta{$id}</center></td>"; - } - print "<td>$component</td>\n "; - print "<td><center>$severity</center></td>"; - print "<td><center>$op_sys</center></td>"; - print "<td><center>$milestone</center></td>"; - print "<td>$summary</td>"; - print "</tr>\n"; - - $i++; - if ($i == $maxrows) - { - last; - } + print "<tr>"; + print '<td><center><A HREF="show_bug.cgi?id=' . $id . '">'; + print $id . "</A></center></td>"; + print "<td><center>$count{$id}</center></td>"; + if ($dobefore) + { + print "<td><center>$delta{$id}</center></td>"; + } + print "<td>$component</td>\n "; + print "<td><center>$severity</center></td>"; + print "<td><center>$op_sys</center></td>"; + print "<td><center>$milestone</center></td>"; + print "<td>$summary</td>"; + print "</tr>\n"; + + $i++; + if ($i == $maxrows) + { + last; + } } print "</table><br><br>"; PutFooter(); +sub by_bug_no +{ + return ($a <=> $b); +} + sub by_dup_count { - return -($count{$a} <=> $count{$b}); + return -($count{$a} <=> $count{$b}); } sub by_delta { - return -($delta{$a} <=> $delta{$b}); + return -($delta{$a} <=> $delta{$b}); } sub days_ago { - my ($dom, $mon, $year) = (localtime(time - ($_[0]*24*60*60)))[3, 4, 5]; - return sprintf "%04d%02d%02d", 1900 + $year, ++$mon, $dom; + my ($dom, $mon, $year) = (localtime(time - ($_[0]*24*60*60)))[3, 4, 5]; + return sprintf "%04d-%02d-%02d", 1900 + $year, ++$mon, $dom; } +sub die_politely { + my $msg = shift; + + print <<FIN; +<p> +<table border=1 cellpadding=10> +<tr> +<td align=center> +<font color=blue>$msg</font> +</td> +</tr> +</table> +<p> +FIN + + PutFooter(); + exit; +} |