javascript HTTP Archive数据集的BigQuery查询示例。

Posted

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了javascript HTTP Archive数据集的BigQuery查询示例。相关的知识,希望对你有一定的参考价值。

/* Which domains are the top third party dependencies? */

SELECT DOMAIN(req.url) third_party, COUNT(*) num_requests
FROM [httparchive:runs.2013_06_01_requests] as req JOIN (
  SELECT DOMAIN(url) self, pageid
  FROM [httparchive:runs.2013_06_01_pages]
) as pages ON pages.pageid = req.pageid
WHERE DOMAIN(req.url) != pages.self
GROUP BY third_party
ORDER BY num_requests desc
/* Which sites enabled Resource Timing? Courtesy of Steve Souders. */

SELECT domain(url) as domainname, count(*) as num
FROM [httparchive:runs.2013_06_01_requests]
WHERE lower(respOtherHeaders) contains "timing-allow-origin" 
GROUP BY domainname 
ORDER BY num desc
/* First render quantiles? */

SELECT
  NTH(50, quantiles(renderStart,101)) median,
  NTH(75, quantiles(renderStart,101)) seventy_fifth,
  NTH(90, quantiles(renderStart,101)) ninetieth
FROM [httparchive:runs.2013_06_01_pages]

/* Trend quantiles over time ... */


SELECT * FROM
(SELECT '2013_06' date,
  NTH(50, quantiles(renderStart,101)) median,
  NTH(75, quantiles(renderStart,101)) seventy_fifth,
  NTH(90, quantiles(renderStart,101)) ninetieth
FROM [httparchive:runs.2013_06_01_pages]),
(SELECT '2013_05' date,
  NTH(50, quantiles(renderStart,101)) median,
  NTH(75, quantiles(renderStart,101)) seventy_fifth,
  NTH(90, quantiles(renderStart,101)) ninetieth
FROM [httparchive:runs.2013_05_01_pages]),
(SELECT '2013_04' date,
  NTH(50, quantiles(renderStart,101)) median,
  NTH(75, quantiles(renderStart,101)) seventy_fifth,
  NTH(90, quantiles(renderStart,101)) ninetieth
FROM [httparchive:runs.2013_04_01_pages]),
(SELECT '2013_03' date,
  NTH(50, quantiles(renderStart,101)) median,
  NTH(75, quantiles(renderStart,101)) seventy_fifth,
  NTH(90, quantiles(renderStart,101)) ninetieth
FROM [httparchive:runs.2013_03_01_pages])
/* Medians for different content-type's... */

SELECT
  NTH(50, quantiles(bytesTotal,101))   TOTAL_med,
  NTH(50, quantiles(bytesHtmlDoc,101)) HTMLDOC_med,
  NTH(50, quantiles(bytesHtml,101))    HTML_med,
  NTH(50, quantiles(bytesJS,101))      JS_med,
  NTH(50, quantiles(bytesCSS,101))     CSS_med,
  NTH(50, quantiles(bytesImg,101))     IMG_med,
  NTH(50, quantiles(bytesGif,101))     GIF_med,
  NTH(50, quantiles(bytesJpg,101))     JPG_med,
  NTH(50, quantiles(bytesPng,101))     PNG_med,
  NTH(50, quantiles(bytesFont,101))    FONT_med,
  NTH(50, quantiles(bytesFlash,101))   FLASH_med,
  NTH(50, quantiles(bytesJson,101))    JSON_med,
  NTH(50, quantiles(bytesOther,101))   OTHER_med
FROM [httparchive:runs.2013_06_01_pages]
/* Which frameworks are the most popular? */

SELECT REGEXP_EXTRACT(url, r'(jquery|dojo|angular|prototype)') type, count(distinct(pageid))
FROM [httparchive:runs.2013_05_15_requests]
WHERE REGEXP_MATCH(url, r'jquery|dojo|angular|prototype')
GROUP BY type;
/* Which sites use multiple versions of JQuery, and how many? (facepalm) */

SELECT pages.pageid, url, cnt, versions, pages.rank rank FROM [httparchive:runs.2013_06_01_pages] as pages JOIN ( 
  SELECT pageid, count(distinct(version)) cnt, GROUP_CONCAT(version) versions FROM (
    SELECT url, REGEXP_EXTRACT(url, r'googleapis.*jquery\/(\d+\.\d+\.\d+)\/.*\.js') version, pageid 
    FROM [httparchive:runs.2013_06_01_requests] 
    WHERE REGEXP_MATCH(url, r'jquery.*\.js')
    GROUP BY url, pageid, version
  )
  WHERE version IS NOT NULL
  GROUP by pageid
  HAVING cnt >= 2
) as lib ON lib.pageid = pages.pageid
WHERE rank is NOT NULL
ORDER BY rank asc
/* One JS framework is clearly not enough... Which sites are using multiple popular JS frameworks, and how many? */

SELECT pages.pageid, url, cnt, libs, pages.rank rank FROM [httparchive:runs.2013_06_01_pages] as pages JOIN (
  SELECT pageid, count(distinct(type)) cnt, GROUP_CONCAT(type) libs FROM (
    SELECT REGEXP_EXTRACT(url, r'(jquery|dojo|angular|prototype|backbone|emberjs|sencha|scriptaculous).*\.js') type, pageid
    FROM [httparchive:runs.2013_06_01_requests]
    WHERE REGEXP_MATCH(url, r'jquery|dojo|angular|prototype|backbone|emberjs|sencha|scriptaculous.*\.js')
    GROUP BY pageid, type
  )
  GROUP BY pageid
  HAVING cnt >= 2
) as lib ON lib.pageid = pages.pageid
WHERE rank IS NOT NULL
ORDER BY rank asc
  function onOpen() {
  var ss = SpreadsheetApp.getActiveSpreadsheet();
  var menuEntries = [ {name: "Run Query", functionName: "runQuery"} ];
  ss.addMenu("HTTP Archive + BigQuery", menuEntries);
}    

function runQuery() {
  var projectNumber = 'httparchive';
  
  var sheet = SpreadsheetApp.getActiveSheet();
  var c = SpreadsheetApp.getActiveSheet().getRange('F2').getValue();
  
  var dates = ['2013_06','2013_05','2013_04','2013_03','2013_02','2013_01',
               '2012_12','2012_11','2012_10','2012_09','2012_08','2012_07',
               '2012_06','2012_05','2012_04','2012_03','2012_02','2012_01',
              ];
   
  var sql = 'SELECT date, median, seventy_fifth, ninetieth FROM '
  for (var i=0; i<dates.length; i++) {
    sql = sql + '(SELECT "'+dates[i]+'" date,\
        NTH(50, quantiles('+c+',101)) median,\
        NTH(75, quantiles('+c+',101)) seventy_fifth,\
        NTH(90, quantiles('+c+',101)) ninetieth \
        FROM [httparchive:runs.'+dates[i]+'_01_pages]),'
  }
  
  var queryResults;

  // Inserts a Query Job
  try {
    queryResults = BigQuery.Jobs.query(projectNumber, sql);
  }
  catch (err) {
    Logger.log(err);
    Browser.msgBox(err);
    return;
  }
  
  // Check on status of the Query Job
  while (queryResults.getJobComplete() == false) {
    try {
      queryResults = BigQuery.Jobs.getQueryResults(projectNumber, queryJob.getJobReference().getJobId());
    }
    catch (err) {
      Logger.log(err);
      Browser.msgBox(err);
      return;
    }
  }
  
  // Update the amount of results
  var resultCount = queryResults.getTotalRows();
  var resultSchema = queryResults.getSchema();
  
  var resultValues = new Array(resultCount);
  var tableRows = queryResults.getRows();
  
  // Iterate through query results
  for (var i = 0; i < tableRows.length; i++) {
    var cols = tableRows[i].getF();
    resultValues[i] = new Array(cols.length);
    // For each column, add values to the result array
    for (var j = 0; j < cols.length; j++) {
      resultValues[i][j] = cols[j].getV();
    }
  } 

  // Update the Spreadsheet with data from the resultValues array, starting from cell A1
  sheet.getRange(2, 1, resultCount, tableRows[0].getF().length).setValues(resultValues);  
}

以上是关于javascript HTTP Archive数据集的BigQuery查询示例。的主要内容,如果未能解决你的问题,请参考以下文章

JavaScript前端插件

JavaScript---闭包和作用域链

转:HAR(HTTP Archive)规范

Javascript学习-闭包

js操作文件FileSystemObject对象http://www.cnblogs.com/suiqirui19872005/archive/2007/06/03/769431.html

nodeJS中的包 npm install http://www.cnblogs.com/xiaohuochai/archive/2017/05/20/6882027.html