// Finds all of the PDF/XLS document links on a page, and enables
// tracking for Google analytics. Known issue, will fail to match
// PDF links utilizing hash to jump to page (eg file.pdf#page=35).
// Also, not compatible with IE6 because of CSS 3.
// More info on the page tracking API:
// http://www.google.com/support/analytics/bin/answer.py?hl=en&answer=55529
function trackFileClicks(){
  // Modify file_extensions to include any additional types you 
  // would like to track (eg .xlsx)
  var file_exentions = ['.pdf', '.xls', '.doc'];
  file_exentions.each(function(file_extension){
    // CSS 3 selector, a[href$='.pdf'], $= implies file ends with '.pdf'
    var css_selector = "a[href$='" + file_extension + "']";
    // select all of the links w/the current file extension
    $$(css_selector).each(function(link){
      // Add a click listener to the link, which will send an 
      // Asynch request to google
      link.observe('click', function(e){
        var pageTracker = _gat._getTracker(sGoogleCode); //Change to your ID
        pageTracker._trackPageview(cleanHref(link.href, file_extension));
      });
    });
  });
}

// Takes the full URL and reduces to only path, and file name sans extension.
function cleanHref(href, extension){
  // Found RE on StackOverflow:
  // http://stackoverflow.com/questions/27745/getting-parts-of-a-url-regex)
  var uri_regex = /^((http[s]?|ftp):\/)?\/?([^:\/\s]+)(:([^\/]*))?((\/\w+)*\/)
  ([\w\-\.]+[^#?\s]+)(\?([^#]*))?(#(.*))?$/;
  var clean_href = href.toString();
  //Regex returns array with indexes as follows:
  var SCHEMA = 2, DOMAIN = 3, PORT = 5, PATH = 6,
  FILE = 8, QUERYSTRING = 9, HASH = 12;
  var match_container = clean_href.match(uri_regex)

  return match_container[PATH] + match_container[FILE].replace(extension, '');
}

