-- chech how many rows in the dataset
-- 查看数据集记录总数
SELECT count(*) FROM <table name>;
-- select random rows
-- 随机返回若干条记录
-- https://gist.github.com/404hub/11f55753ac23dc876a6e70461672b470
SELECT select_expression
FROM table_name
TABLESAMPLE SYSTEM/BERNOULLI ( argument [, ...] )
-- SYSTEM sampling method does block/page level sampling; BERNOULLI sampling method does a sequential scan.
-- caltulating percentiles
-- 计算各百分位数
-- https://gist.github.com/404hub/bdf81448887359c97f565418157ed1d2
-- fast method using window function
select max(buckets.colume_name), ntile as percentile
from(
select colume_of_table, ntile(percentile_num_you_want)
over (order by colume_of_table)
from table_name
) as buckets
group by 2
order by 2