sql [SQL查询片段]用于在命令行或通过R和其他工具使用SQL的快速代码段#tags:sql,R,text processing,命令li
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了sql [SQL查询片段]用于在命令行或通过R和其他工具使用SQL的快速代码段#tags:sql,R,text processing,命令li相关的知识,希望对你有一定的参考价值。
-- Normalized available pages according to swap_free field in kstat.
SELECT
ROUND((swap_free - swpav) / swpsd, 1) as "Free Swap Z-score",
COUNT(*) as "Count"
FROM (
SELECT avg(swap_free) as swpav, stdev(swap_free) as swpsd
FROM mem
) a, mem
GROUP BY 1
-- Similar to above, but for freemem field in kstat.
SELECT
ROUND((CAST(freemem AS double) / physmem
- subq.avgFreeRatio) / subq.sdFreeRatio, 1) as "Ratio Free Memory to Total",
COUNT(*) as "Count"
FROM (
SELECT
avg(freemem / CAST(physmem AS double)) as avgFreeRatio,
stdev(freemem / CAST(physmem AS double)) as sdFreeRatio
FROM mem
) subq, mem
GROUP BY 1
CREATE TABLE IF NOT EXISTS "iostat" (
"r_per_sec" float,
"w_per_sec" float,
"kbR_per_sec" float,
"kbW_per_sec" float,
"wait" float,
"actv" float,
"wsvc_t" float,
"asvc_t" float,
"wait_pct" integer,
"busy_pct" integer,
"device"
);
SELECT
bf,
count(*) AS count,
printf("%d to %d", bf, bc) AS range
FROM (
-- Subquery creates ranges of values, effectively by quantizing
-- unique values into bins, which we accomplish here through
-- multiplication and division by same value. The seemingly crazy looking
-- (SELECT (max(pp_kernel) - min(pp_kernel)) / (count(*) /
-- (max(pp_kernel) - min(pp_kernel))) is how we compute number of buckets.
SELECT
-- Compute bucket floor
round(pp_kernel/
(SELECT (max(pp_kernel) - min(pp_kernel)) / (count(*) /
(max(pp_kernel) - min(pp_kernel)))
FROM mem) *
(SELECT (max(pp_kernel) - min(pp_kernel)) /
(count(*) / (max(pp_kernel) - min(pp_kernel)))
FROM mem)) AS bf,
-- Compute bucket ceiling
round(pp_kernel/
(SELECT (max(pp_kernel) - min(pp_kernel)) / (count(*) /
(max(pp_kernel) - min(pp_kernel)))
FROM mem)) *
(SELECT (max(pp_kernel) - min(pp_kernel)) /
(count(*) / (max(pp_kernel) - min(pp_kernel)))
FROM mem) +
(SELECT (max(pp_kernel) - min(pp_kernel)) / (count(*) /
(max(pp_kernel) - min(pp_kernel)))
FROM mem) AS bc
FROM mem
) a
group by 1, 3
order by 1
-- Use a subquery approach to select a list of timestamps randomly ordered first
-- and then limited to a fixed value, 100 in this case, and then select rows
-- from the dataset that match these unique timestamp values. The selection on
-- timestamp must be done against unique rows, otherwise results are not sane.
SELECT timestamp, uptime, pp_kernel
FROM mem
WHERE timestamp IN (SELECT timestamp FROM mem ORDER BY RANDOM() LIMIT 100)
-- begin
select
count(1),
--(time*1e-9),
avg(arc_size >> 30) as size_gb,
--arc_metadata_size >> 30 as md_size_gb,
avg((total_memory - kernel_memory) >> 30) as tot_minus_kern_gb,
avg(free_memory >> 30) as free_gb,
case
when (free_memory >> 30) >= 20 then 'above 20GB'
when (free_memory >> 30) >= 15 and (free_memory >> 30) < 20 then 'between 15 and 20GB'
when (free_memory >> 30) >= 10 and (free_memory >> 30) < 15 then 'between 10 and 15GB'
when (free_memory >> 30) >= 5 and (free_memory >> 30) < 10 then 'between 5 and 10GB'
when (free_memory >> 30) >= 1 and (free_memory >> 30) < 5 then 'between 1 and 5GB'
when (free_memory >> 30) < 1 then 'below 1GB'
else 'x' end as free_range
from arcusage
group by case
when (free_memory >> 30) >= 20 then 'above 20GB'
when (free_memory >> 30) >= 15 and (free_memory >> 30) < 20 then 'between 15 and 20GB'
when (free_memory >> 30) >= 10 and (free_memory >> 30) < 15 then 'between 10 and 15GB'
when (free_memory >> 30) >= 5 and (free_memory >> 30) < 10 then 'between 5 and 10GB'
when (free_memory >> 30) >= 1 and (free_memory >> 30) < 5 then 'between 1 and 5GB'
when (free_memory >> 30) < 1 then 'below 1GB'
else 'x' end
--group by free_memory >> 30
--order by time
order by 1
--- end
-- begin
select
count(1),
--(time*1e-9),
avg(arc_size >> 30) as avg_arc_size_gb,
--arc_metadata_size >> 30 as md_size_gb,
avg((total_memory - kernel_memory) >> 30) as avg_tot_minus_kern_gb,
avg(arc_size >> 30) as avg_free_gb,
case
when (arc_size >> 30) > 30 then 'above 30GB'
when (arc_size >> 30) >= 20 and (arc_size >> 30) < 30 then 'between 20 and 30GB'
when (arc_size >> 30) >= 15 and (arc_size >> 30) < 20 then 'between 15 and 20GB'
when (arc_size >> 30) >= 10 and (arc_size >> 30) < 15 then 'between 10 and 15GB'
when (arc_size >> 30) >= 5 and (arc_size >> 30) < 10 then 'between 5 and 10GB'
when (arc_size >> 30) >= 1 and (arc_size >> 30) < 5 then 'between 1 and 5GB'
when (arc_size >> 30) < 1 then 'below 1GB'
else 'x' end as arc_size
from arcusage
group by case
when (arc_size >> 30) > 30 then 'above 30GB'
when (arc_size >> 30) >= 20 and (arc_size >> 30) < 30 then 'between 20 and 30GB'
when (arc_size >> 30) >= 15 and (arc_size >> 30) < 20 then 'between 15 and 20GB'
when (arc_size >> 30) >= 10 and (arc_size >> 30) < 15 then 'between 10 and 15GB'
when (arc_size >> 30) >= 5 and (arc_size >> 30) < 10 then 'between 5 and 10GB'
when (arc_size >> 30) >= 1 and (arc_size >> 30) < 5 then 'between 1 and 5GB'
when (arc_size >> 30) < 1 then 'below 1GB'
else 'x' end
--group by free_memory >> 30
--order by time
order by 1
-- end
q -d, -H -O "select (rqav/10)*10, count(*) as count from ./queuestat.csv where pool='p01' group by 1 order by 1" | chart line ,
q -d, -H -O \
"select count(*) as records, avg(rqav) as runq_average,
avg(wqav) as waitq_average from ./queuestat.csv where pool='p01' order by 1"
# A histogram reporting frequency of sizes of Run Queue
q -d, -H -O "select rqctav, count(rqctav) as count from ./queuestat.csv where pool='p01' group by rqctav order by 1" | chart -x "Run Queue Size" -y "Frequency" line ,
# Get basic stats about the range of the data, median and mean
q -d, -H -O "SELECT
pool,
count(*) as count,
avg(rqav) as MEAN,
percentile(rqav, 0.75) - percentile(rqav, 0.25) as IQR,
percentile(rqav, 0.25) as Q1,
percentile(rqav, 0.50) as MEDIAN,
percentile(rqav, 0.75) as Q3,
percentile(rqav, 0.25) - (1.5 * (percentile(rqav, 0.75) -
percentile(rqav, 0.25))) as LOWER_WSKR,
percentile(rqav, 0.75) + (1.5 * (percentile(rqav, 0.75) -
percentile(rqav, 0.25))) as UPPER_WSKR
FROM ./queuestat.csv GROUP BY 1 ORDER BY 1"
Example header from D script: bw-tput-iops-actv-time-hist-csv.d
1: sdname
2: mpxiowwn
3: actvtm
4: rangerlat
5: rangewlat
6: totbytes
7: tput
8: maxtput
9: ctrd
10: ctwr
11: aviosz
12: rangeiosz
13: aviops
14: maxiops
15: avbw
16: rangebw
17: bwlt1k
18: bw10k
19: bw100k
20: bw1m
21: timegt1000ms
22: time100ms
23: time50ms
24: time25ms
25: time10ms
26: timelt1ms
27: cterr
```
textql -output-header -header -sql \
"select `mpxiowwn` as name,
round(cast(sum(ctwr) as float) / cast(sum(ctrd+ctwr) as float), 3) as w_pct,
round(cast(sum(ctrd) as float) / cast(sum(ctrd+ctwr) as float), 3) as r_pct,
round(cast(sum(timegt1000ms) as float) / cast(sum(ctrd+ctwr) as float), 3) as timegt1000ms_pct,
round(cast(sum(time100ms) as float) / cast(sum(ctrd+ctwr) as float), 3) as time100ms_pct,
round(cast(sum(time50ms) as float) / cast(sum(ctrd+ctwr) as float), 3) as time50ms_pct,
round(cast(sum(time25ms) as float) / cast(sum(ctrd+ctwr) as float), 3) as time25ms_pct,
round(cast(sum(time10ms) as float) / cast(sum(ctrd+ctwr) as float), 3) as time10ms_pct,
round(cast(sum(timelt1ms) as float) / cast(sum(ctrd+ctwr) as float), 3) as timelt1ms_pct,
sum(ctrd+ctwr) as ioct
from data1 where aviops > 0
group by 1 order by 10" ./data1.csv
```
Example header from D script: bw-tput-iops-actv-time-hist-csv.d
1: sdname
2: mpxiowwn
3: actvtm
4: rangerlat
5: rangewlat
6: totbytes
7: tput
8: maxtput
9: ctrd
10: ctwr
11: aviosz
12: rangeiosz
13: iops
14: maxiops
15: avKBps
16: rangeKBps
17: ratelt1MBps
18: rate10MBps
19: rate100MBps
20: rate1GBps
21: timegt1000ms
22: time100ms
23: time50ms
24: time25ms
25: time10ms
26: timelt1ms
27: iosztiny
28: iosz4k
29: iosz8k
30: iosz16k
31: iosz32k
32: iosz64k
33: ioszbig
34: cterr
## Get pretty-printed output
```
~/go/bin/textql -output-header -header -pretty -sql \
"select `mpxiowwn` as name,
round(cast(sum(ctwr) as float) / cast(sum(ctrd+ctwr) as float), 3) as w_pct,
round(cast(sum(ctrd) as float) / cast(sum(ctrd+ctwr) as float), 3) as r_pct,
round(cast(sum(iosztiny) as float) / cast(sum(ctrd+ctwr) as float), 3) as iosztiny_pct,
round(cast(sum(iosz4k) as float) / cast(sum(ctrd+ctwr) as float), 3) as iosz4k_pct,
round(cast(sum(iosz8k) as float) / cast(sum(ctrd+ctwr) as float), 3) as iosz8k_pct,
round(cast(sum(iosz16k) as float) / cast(sum(ctrd+ctwr) as float), 3) as iosz16k_pct,
round(cast(sum(iosz32k) as float) / cast(sum(ctrd+ctwr) as float), 3) as iosz32k_pct,
round(cast(sum(iosz64k) as float) / cast(sum(ctrd+ctwr) as float), 3) as iosz64k_pct,
round(cast(sum(ioszbig) as float) / cast(sum(ctrd+ctwr) as float), 3) as ioszbig_pct,
sum(totbytes) as bytes,
sum(ctrd+ctwr) as ioct,
sum(actvtm) as actv_time,
avg(avbw) as av_bw
from data2 where iops > 0
group by 1 order by 14" ./data2.csv
```
## Get pretty-printed output
```
~/go/bin/textql -output-header -header -pretty -sql \
"select `mpxiowwn` as name,
round(cast(sum(ctwr) as float) / cast(sum(ctrd+ctwr) as float), 3) as '%w',
round(cast(sum(ctrd) as float) / cast(sum(ctrd+ctwr) as float), 3) as '%r',
-- round(cast(sum(timegt1000ms) as float) / cast(sum(ctrd+ctwr) as float), 3) as '%tgt1000ms',
round(cast(sum(time100ms) as float) / cast(sum(ctrd+ctwr) as float), 3) as '%t100ms',
round(cast(sum(time50ms) as float) / cast(sum(ctrd+ctwr) as float), 3) as '%t50ms',
round(cast(sum(time25ms) as float) / cast(sum(ctrd+ctwr) as float), 3) as '%t25ms',
round(cast(sum(time10ms) as float) / cast(sum(ctrd+ctwr) as float), 3) as '%t10ms',
round(cast(sum(timelt1ms) as float) / cast(sum(ctrd+ctwr) as float), 3) as '%tlt1ms',
sum(totbytes) >> 20 as tot_MB,
sum(actvtm) as actv_time,
avg(avKBps),
cast(avg(iops) as integer) as iops,
avg(aviosz) as av_iosz
from data2 where iops > 0
group by 1 order by 10" ./data2.csv
```
-- query uses World Bank data
-- which is one of the example sqlite files
-- supplied in the Modeling with Data code examples
select *
from gdp as first
inner join pop as second
on first.country = second.Country
-- kspgs program generates a CSV-structured output, which will convert nicely to
-- a SQL database with the following schema.
CREATE TABLE IF NOT EXISTS "mem" (
"timestamp" integer PRIMARY KEY,
"uptime" integer,
"runtime" integer,
"interval" integer,
"availrmem" integer,
"desfree" integer,
"desscan" integer,
"econtig" integer,
"fastscan" integer,
"freemem" integer,
"kernelbase" integer,
"lotsfree" integer,
"minfree" integer,
"nalloc" integer,
"nalloc_calls" integer,
"nfree" integer integer,
"nfree_calls" integer,
"nscan" integer,
"pagesfree" integer,
"pageslocked" integer,
"pagestotal" integer,
"physmem" integer,
"pp_kernel" integer,
"slowscan" integer,
"swap_alloc" integer,
"swap_avail" integer,
"swap_free" integer,
"swap_resv" integer,
"free" integer
);
CREATE INDEX IF NOT EXISTS idx_availrmem ON "mem" ("availrmem" ASC);
CREATE INDEX IF NOT EXISTS idx_freemem ON "mem" ("freemem" ASC);
CREATE UNIQUE INDEX IF NOT EXISTS idx_timestamp ON "mem" ("timestamp" ASC);
CREATE INDEX IF NOT EXISTS idx_pp_kernel ON "mem" ("pp_kernel" ASC);
以上是关于sql [SQL查询片段]用于在命令行或通过R和其他工具使用SQL的快速代码段#tags:sql,R,text processing,命令li的主要内容,如果未能解决你的问题,请参考以下文章
SQL 查询 - 如何使用 group by 获取 2 行或更多行
当有更多行或查询变得更复杂时,分区视图会读取所有表? SQL Server 2008 错误?