sql [SQL查询片段]用于在命令行或通过R和其他工具使用SQL的快速代码段#tags:sql,R,text processing,命令li

Posted

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了sql [SQL查询片段]用于在命令行或通过R和其他工具使用SQL的快速代码段#tags:sql,R,text processing,命令li相关的知识,希望对你有一定的参考价值。

-- Normalized available pages according to swap_free field in kstat.
SELECT
	ROUND((swap_free - swpav) / swpsd, 1) as "Free Swap Z-score",
	COUNT(*) as "Count"
	FROM (
		SELECT avg(swap_free) as swpav, stdev(swap_free) as swpsd
		FROM mem
	) a, mem
GROUP BY 1

-- Similar to above, but for freemem field in kstat.
SELECT
	ROUND((CAST(freemem AS double) / physmem
	- subq.avgFreeRatio) / subq.sdFreeRatio, 1) as "Ratio Free Memory to Total",
	COUNT(*) as "Count"

	FROM (
		SELECT 
			avg(freemem / CAST(physmem AS double)) as avgFreeRatio,
			stdev(freemem / CAST(physmem AS double)) as sdFreeRatio
		FROM mem
	) subq, mem
GROUP BY 1
CREATE TABLE IF NOT EXISTS "iostat" (
  "r_per_sec" float,
  "w_per_sec" float,
  "kbR_per_sec" float,
  "kbW_per_sec" float,
  "wait" float,
  "actv" float,
  "wsvc_t" float,
  "asvc_t" float,
  "wait_pct" integer,
  "busy_pct" integer,
  "device"
);
SELECT
    bf,
	count(*) AS count,
	printf("%d to %d", bf, bc) AS range
FROM (
    -- Subquery creates ranges of values, effectively by quantizing
    -- unique values into bins, which we accomplish here through
    -- multiplication and division by same value. The seemingly crazy looking
    -- (SELECT (max(pp_kernel) - min(pp_kernel)) / (count(*) / 
    -- (max(pp_kernel) - min(pp_kernel))) is how we compute number of buckets.
	SELECT 
        -- Compute bucket floor
		round(pp_kernel/
        (SELECT (max(pp_kernel) - min(pp_kernel)) / (count(*) / 
        (max(pp_kernel) - min(pp_kernel)))
        FROM mem) * 
        (SELECT (max(pp_kernel) - min(pp_kernel)) /
        (count(*) / (max(pp_kernel) - min(pp_kernel)))
        FROM mem)) AS bf,
        -- Compute bucket ceiling
        round(pp_kernel/
        (SELECT (max(pp_kernel) - min(pp_kernel)) / (count(*) / 
        (max(pp_kernel) - min(pp_kernel)))
        FROM mem)) *
        (SELECT (max(pp_kernel) - min(pp_kernel)) /
        (count(*) / (max(pp_kernel) - min(pp_kernel)))
        FROM mem) +
        (SELECT (max(pp_kernel) - min(pp_kernel)) / (count(*) /
        (max(pp_kernel) - min(pp_kernel)))
        FROM mem) AS bc
	FROM mem
	) a
group by 1, 3
order by 1
-- Use a subquery approach to select a list of timestamps randomly ordered first
-- and then limited to a fixed value, 100 in this case, and then select rows
-- from the dataset that match these unique timestamp values. The selection on
-- timestamp must be done against unique rows, otherwise results are not sane.
SELECT timestamp, uptime, pp_kernel 
FROM mem 
WHERE timestamp IN (SELECT timestamp FROM mem ORDER BY RANDOM() LIMIT 100)
-- begin
select
	count(1),
	--(time*1e-9), 
	avg(arc_size >> 30) as size_gb,
	--arc_metadata_size >> 30 as md_size_gb,
	avg((total_memory - kernel_memory) >> 30) as tot_minus_kern_gb,
	avg(free_memory >> 30) as free_gb,
case 
	when (free_memory >> 30) >= 20 then 'above 20GB'
	when (free_memory >> 30) >= 15 and (free_memory >> 30) < 20 then 'between 15 and 20GB'
	when (free_memory >> 30) >= 10 and (free_memory >> 30) < 15 then 'between 10 and 15GB'
	when (free_memory >> 30) >= 5 and (free_memory >> 30) < 10 then 'between 5 and 10GB'
	when (free_memory >> 30) >= 1 and (free_memory >> 30) < 5 then 'between 1 and 5GB'
	when (free_memory >> 30) < 1 then 'below 1GB'
 else 'x' end as free_range
from arcusage 
group by case 
	when (free_memory >> 30) >= 20 then 'above 20GB'
	when (free_memory >> 30) >= 15 and (free_memory >> 30) < 20 then 'between 15 and 20GB'
	when (free_memory >> 30) >= 10 and (free_memory >> 30) < 15 then 'between 10 and 15GB'
	when (free_memory >> 30) >= 5 and (free_memory >> 30) < 10 then 'between 5 and 10GB'
	when (free_memory >> 30) >= 1 and (free_memory >> 30) < 5 then 'between 1 and 5GB'
	when (free_memory >> 30) < 1 then 'below 1GB'
 else 'x' end
--group by free_memory >> 30
--order by time
order by 1
--- end

-- begin
select
	count(1),
	--(time*1e-9), 
	avg(arc_size >> 30) as avg_arc_size_gb,
	--arc_metadata_size >> 30 as md_size_gb,
	avg((total_memory - kernel_memory) >> 30) as avg_tot_minus_kern_gb,
	avg(arc_size >> 30) as avg_free_gb,
case 
	when (arc_size >> 30) > 30 then 'above 30GB'
	when (arc_size >> 30) >= 20 and (arc_size >> 30) < 30 then 'between 20 and 30GB'
	when (arc_size >> 30) >= 15 and (arc_size >> 30) < 20 then 'between 15 and 20GB'
	when (arc_size >> 30) >= 10 and (arc_size >> 30) < 15 then 'between 10 and 15GB'
	when (arc_size >> 30) >= 5 and (arc_size >> 30) < 10 then 'between 5 and 10GB'
	when (arc_size >> 30) >= 1 and (arc_size >> 30) < 5 then 'between 1 and 5GB'
	when (arc_size >> 30) < 1 then 'below 1GB'
 else 'x' end as arc_size
from arcusage 
group by case 
	when (arc_size >> 30) > 30 then 'above 30GB'
	when (arc_size >> 30) >= 20 and (arc_size >> 30) < 30 then 'between 20 and 30GB'
	when (arc_size >> 30) >= 15 and (arc_size >> 30) < 20 then 'between 15 and 20GB'
	when (arc_size >> 30) >= 10 and (arc_size >> 30) < 15 then 'between 10 and 15GB'
	when (arc_size >> 30) >= 5 and (arc_size >> 30) < 10 then 'between 5 and 10GB'
	when (arc_size >> 30) >= 1 and (arc_size >> 30) < 5 then 'between 1 and 5GB'
	when (arc_size >> 30) < 1 then 'below 1GB'
 else 'x' end
--group by free_memory >> 30
--order by time
order by 1
-- end
q -d, -H -O "select (rqav/10)*10, count(*) as count from ./queuestat.csv where pool='p01' group by 1 order by 1" | chart line ,

q -d, -H -O \
"select count(*) as records, avg(rqav) as runq_average, 
 avg(wqav) as waitq_average from ./queuestat.csv where pool='p01' order by 1"

# A histogram reporting frequency of sizes of Run Queue
q -d, -H -O "select rqctav, count(rqctav) as count from ./queuestat.csv where pool='p01' group by rqctav order by 1" | chart -x "Run Queue Size" -y "Frequency" line ,

# Get basic stats about the range of the data, median and mean
q -d, -H -O "SELECT
    pool,
    count(*) as count,
    avg(rqav) as MEAN,
    percentile(rqav, 0.75) - percentile(rqav, 0.25) as IQR, 
    percentile(rqav, 0.25) as Q1,
    percentile(rqav, 0.50) as MEDIAN,
    percentile(rqav, 0.75) as Q3,
    percentile(rqav, 0.25) - (1.5 * (percentile(rqav, 0.75) - 
    percentile(rqav, 0.25))) as LOWER_WSKR,
    percentile(rqav, 0.75) + (1.5 * (percentile(rqav, 0.75) - 
    percentile(rqav, 0.25))) as UPPER_WSKR
    FROM ./queuestat.csv GROUP BY 1 ORDER BY 1"
Example header from D script: bw-tput-iops-actv-time-hist-csv.d
  1: sdname
  2: mpxiowwn
  3: actvtm
  4: rangerlat
  5: rangewlat
  6: totbytes
  7: tput
  8: maxtput
  9: ctrd
 10: ctwr
 11: aviosz
 12: rangeiosz
 13: aviops
 14: maxiops
 15: avbw
 16: rangebw
 17: bwlt1k
 18: bw10k
 19: bw100k
 20: bw1m
 21: timegt1000ms
 22: time100ms
 23: time50ms
 24: time25ms
 25: time10ms
 26: timelt1ms
 27: cterr
```
textql -output-header -header -sql \
"select `mpxiowwn` as name, 
    round(cast(sum(ctwr) as float) / cast(sum(ctrd+ctwr) as float), 3) as w_pct,
    round(cast(sum(ctrd) as float) / cast(sum(ctrd+ctwr) as float), 3) as r_pct,
    round(cast(sum(timegt1000ms) as float) / cast(sum(ctrd+ctwr) as float), 3) as timegt1000ms_pct,
    round(cast(sum(time100ms) as float) / cast(sum(ctrd+ctwr) as float), 3) as time100ms_pct,
    round(cast(sum(time50ms) as float) / cast(sum(ctrd+ctwr) as float), 3) as time50ms_pct,
    round(cast(sum(time25ms) as float) / cast(sum(ctrd+ctwr) as float), 3) as time25ms_pct,
    round(cast(sum(time10ms) as float) / cast(sum(ctrd+ctwr) as float), 3) as time10ms_pct,
    round(cast(sum(timelt1ms) as float) / cast(sum(ctrd+ctwr) as float), 3) as timelt1ms_pct,
    sum(ctrd+ctwr) as ioct
    from data1 where aviops > 0
    group by 1 order by 10" ./data1.csv
```    

Example header from D script: bw-tput-iops-actv-time-hist-csv.d
  1: sdname
  2: mpxiowwn
  3: actvtm
  4: rangerlat
  5: rangewlat
  6: totbytes
  7: tput
  8: maxtput
  9: ctrd
 10: ctwr
 11: aviosz
 12: rangeiosz
 13: iops
 14: maxiops
 15: avKBps
 16: rangeKBps
 17: ratelt1MBps
 18: rate10MBps
 19: rate100MBps
 20: rate1GBps
 21: timegt1000ms
 22: time100ms
 23: time50ms
 24: time25ms
 25: time10ms
 26: timelt1ms
 27: iosztiny
 28: iosz4k
 29: iosz8k
 30: iosz16k
 31: iosz32k
 32: iosz64k
 33: ioszbig
 34: cterr

## Get pretty-printed output
```
~/go/bin/textql -output-header -header -pretty -sql \
"select `mpxiowwn` as name, 
    round(cast(sum(ctwr) as float) / cast(sum(ctrd+ctwr) as float), 3) as w_pct,
    round(cast(sum(ctrd) as float) / cast(sum(ctrd+ctwr) as float), 3) as r_pct,
    round(cast(sum(iosztiny) as float) / cast(sum(ctrd+ctwr) as float), 3) as iosztiny_pct,
    round(cast(sum(iosz4k) as float) / cast(sum(ctrd+ctwr) as float), 3) as iosz4k_pct,
    round(cast(sum(iosz8k) as float) / cast(sum(ctrd+ctwr) as float), 3) as iosz8k_pct,
    round(cast(sum(iosz16k) as float) / cast(sum(ctrd+ctwr) as float), 3) as iosz16k_pct,
    round(cast(sum(iosz32k) as float) / cast(sum(ctrd+ctwr) as float), 3) as iosz32k_pct,
    round(cast(sum(iosz64k) as float) / cast(sum(ctrd+ctwr) as float), 3) as iosz64k_pct,
    round(cast(sum(ioszbig) as float) / cast(sum(ctrd+ctwr) as float), 3) as ioszbig_pct,
    sum(totbytes) as bytes,
    sum(ctrd+ctwr) as ioct,
    sum(actvtm) as actv_time,
    avg(avbw) as av_bw
    from data2 where iops > 0
    group by 1 order by 14" ./data2.csv
```

## Get pretty-printed output
```
 ~/go/bin/textql -output-header -header -pretty -sql \
"select `mpxiowwn` as name, 
    round(cast(sum(ctwr) as float) / cast(sum(ctrd+ctwr) as float), 3) as '%w',
    round(cast(sum(ctrd) as float) / cast(sum(ctrd+ctwr) as float), 3) as '%r',
    -- round(cast(sum(timegt1000ms) as float) / cast(sum(ctrd+ctwr) as float), 3) as '%tgt1000ms',
    round(cast(sum(time100ms) as float) / cast(sum(ctrd+ctwr) as float), 3) as '%t100ms',
    round(cast(sum(time50ms) as float) / cast(sum(ctrd+ctwr) as float), 3) as '%t50ms',
    round(cast(sum(time25ms) as float) / cast(sum(ctrd+ctwr) as float), 3) as '%t25ms',
    round(cast(sum(time10ms) as float) / cast(sum(ctrd+ctwr) as float), 3) as '%t10ms',
    round(cast(sum(timelt1ms) as float) / cast(sum(ctrd+ctwr) as float), 3) as '%tlt1ms',
    sum(totbytes) >> 20 as tot_MB,
    sum(actvtm) as actv_time,
    avg(avKBps),
    cast(avg(iops) as integer) as iops,
    avg(aviosz) as av_iosz
    from data2 where iops > 0
    group by 1 order by 10" ./data2.csv
```
-- query uses World Bank data
-- which is one of the example sqlite files
-- supplied in the Modeling with Data code examples
select *
from gdp as first
inner join pop as second
on first.country = second.Country
-- kspgs program generates a CSV-structured output, which will convert nicely to
-- a SQL database with the following schema.
CREATE TABLE IF NOT EXISTS "mem" (
  "timestamp" integer PRIMARY KEY,
  "uptime" integer,
  "runtime" integer,
  "interval" integer,
  "availrmem" integer,
  "desfree" integer,
  "desscan" integer,
  "econtig" integer,
  "fastscan" integer,
  "freemem" integer,
  "kernelbase" integer,
  "lotsfree" integer,
  "minfree" integer,
  "nalloc" integer,
  "nalloc_calls" integer,
  "nfree" integer integer,
  "nfree_calls" integer,
  "nscan" integer,
  "pagesfree" integer,
  "pageslocked" integer,
  "pagestotal" integer,
  "physmem" integer,
  "pp_kernel" integer,
  "slowscan" integer,
  "swap_alloc" integer,
  "swap_avail" integer,
  "swap_free" integer,
  "swap_resv" integer,
  "free" integer
);


CREATE INDEX IF NOT EXISTS idx_availrmem ON "mem" ("availrmem" ASC);

CREATE INDEX IF NOT EXISTS idx_freemem ON "mem" ("freemem" ASC);

CREATE UNIQUE INDEX IF NOT EXISTS idx_timestamp ON "mem" ("timestamp" ASC);

CREATE INDEX IF NOT EXISTS idx_pp_kernel ON "mem" ("pp_kernel" ASC);

以上是关于sql [SQL查询片段]用于在命令行或通过R和其他工具使用SQL的快速代码段#tags:sql,R,text processing,命令li的主要内容,如果未能解决你的问题,请参考以下文章

SQL 查询 - 如何使用 group by 获取 2 行或更多行

求SQL关键字和其语法(好的话,给100分)

sql查询语句

当有更多行或查询变得更复杂时,分区视图会读取所有表? SQL Server 2008 错误?

在 CRAN R 中使用 BETWEEN 命令进行 SQL 查询以选择日期

通过 R 在 SQL 查询中粘贴值