ck使用小技巧

mysql使用小技巧
pg使用小技巧
ck使用小技巧
SQL函数 — ClickHouse官方文档

ck 使用的时候会有很多坑,或者说是使用小技巧(临时先放着)

表:创建/插入/删除

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
select * from aaa;
drop table aaa;

create table aaa
(
id String comment 'id',
stats_date Date comment '统计日期',
date_created DateTime comment '创建日期'
) engine = MergeTree ORDER BY (id);



-- 新增
INSERT INTO aaa (id, stats_date, date_created) VALUES
('001', '2024-02-19', '2024-02-19 08:00:00'),
('002', '2024-02-20', '2024-02-20 09:00:00'),
('003', '2024-02-21', '2024-02-21 10:00:00');

-- 更新
ALTER TABLE t_ads_scd UPDATE statis_dt='2023-07-13' where sale_amt=39; -- 更新带有条件
ALTER TABLE t_ads_scd UPDATE store_name='abc' where 1=1; -- 更新所有

-- 删除
ALTER TABLE t_ads_scd DELETE WHERE statis_dt='2023-07-19'; -- 删除

聚合函数

1
2
3
4
5
6
7
-- 查询字段 可group,也可max/min/any
select tenant_id ... group by tenant_id
select max(tenant_id) ...
select any(tenant_id) ...

-- count 多个参数去重
countDistinctIf(tuple(prod.store_id, prod.product_id), prod.sales_amt_actual>0)

数值计算

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
-- 四舍五入   保留几位小数   设置默认值
round(toDecimal128(ifNull(toString(sales_amt_actual_sum), '0'), 5)) sales,-- 默认0,四舍五入取整
round(toDecimal128(ifNull(toString(target_value_sum), '0'), 5)) target,
round(intDivOrZero(sales, target), 3) finish_rate-- 四舍五入,保留3位小数 #.#%



-- 除法,被除数可以为 0
-- 防止损失精度 要把分子分母都转为 Decimal 类型,精度 5位小数
SELECT
-- toDecimal128(xxx, 5)
-- toDecimal128(sum(xxx), 5)
-- intDivOrZero(xxx, xxx)
-- intDivOrZero(xxx, toDecimal128(sum(xxx),5))
-- intDivOrZero(toDecimal128(sum(xxx),5), toDecimal128(sum(xxx),5))
sum(retail_tot_amt_actual) sales,
count(retail_id) retail_count,
intDivOrZero(toDecimal128(sales,5), retail_count) atv,
intDivOrZero(toDecimal128(sum(retail_tot_qty),5), retail_count) upt,
intDivOrZero(toDecimal128(sales,5), toDecimal128(sum(retail_tot_amt_list),5)) md

字符串拼接

1
2
-- 两个字符串拼接 => "a b"
SELECT concat('a', ' ', 'b')

字符串替换/分割/一行变多行

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
-- SQL解释:下面 xxx 为占位符
-- position String ===> (288,532.4489361702127) (原始数据)
-- replaceRegexpAll(xxx, '\(|\)', '')) String ===> 288,532.4489361702127 (去除小括号)
-- splitByChar(',', cast(xxx as String))) String 数组 ===> ['288','532.4489361702127'] (字符串分割,转为 String数组 ===> 先转 String 防止报错误:Nested type Array(String) cannot be inside Nullable type)
-- arrayElement(xxx, 1) String ===> 288 (获取数组第1个元素)
-- arrayElement(xxx, 2) String ===> 532.4489361702127 (获取数组第2个元素)
SELECT
-- SQL解释:下面 xxx 为占位符
-- path_region_ids String ===> {3529,3531,3530,3531,3529} (原始数据)
-- replaceRegexpAll(xxx, '{|}', '')) String ===> 3529,3531,3530,3531,3529 (去除大括号)
-- splitByChar(',', xxx)) String 数组 ===> ['3529','3531','3530','3531','3529'] (字符串分割,转为 String数组)
-- arraySlice(xxx, 1, 4) String 数组 ===> ['3529','3531','3530','3531'] (从第1个元素开始,包含第1个元素,数组长度保留4)
-- arrayDistinct(xxx) String 数组 ===> ['3529','3531','3530'] (去重,为了更少的与右表进行关联)
-- arrayJoin(xxx) String 数组变 3 行,元素类型为 String
-- toInt16(xxx) 元素类型 String => int (为了方便join ===> t_store_region_config.region_path_id)
toInt16(
arrayJoin(
arrayDistinct(
arraySlice(
splitByChar(',', replaceRegexpAll(path_region_ids,'{|}','')) as arr_src,-- 原数组 ['3529','3531','3530','3531','3529']
1,4
) as arr_split-- 数组元素,最多4个 ['3529','3531','3530','3531']
) as arr_distinct-- 数组元素去重 ['3529','3531','3530']
) as arr_e_str-- 数组元素 字符串
) as arr_e_int,-- 数组元素 int
arr_split,
sum(activity_count) count
FROM t_store_region_path
GROUP BY arr_split
ORDER BY arr_split

json解析

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
select * from system.functions where name like '%visitParamExtractString%';
select * from system.functions where name like '%visit%';
-- visitParamExtractInt null=0 否则int




-- ifNull(fetch_target_json, '[]') -- 字符串默认 '[]'
-- JSONExtractArrayRaw -- 转为json数组
-- arrayJoin(...) AS json -- 一行变多行
-- visitParamExtractString(json, 'featureCode') AS feature_code -- 获取json中的某个key对应值
-- toDecimal128(ifNull(visitParamExtractString(json, 'targetValue'), '0'), 5) AS target_value -- 获取对应值,默认 '0' 再转为 Decimal
-- AND feature_code='sales' -- 过滤
SELECT
store_id,
SUM(target_value) target_value_sum
FROM
(
SELECT
rel_id AS store_id,
arrayJoin(
JSONExtractArrayRaw(
ifNull(fetch_target_json, '[]')
)
) AS json,
visitParamExtractString(json, 'featureCode') AS feature_code,
toDecimal128(ifNull(visitParamExtractString(json, 'targetValue'), '0'), 5) AS target_value
FROM
t_sc_feature_daily_target_info
WHERE
tenant_id = $tenant_id$
AND rel_type = 'STORE'
AND rel_id IN (SELECT store_id FROM t_ads_store_scd WHERE tenant_id=$tenant_id$ AND if_create_sc = 1)
-- AND stats_date BETWEEN '2023-01-01' AND '2023-01-09'
AND stats_date BETWEEN toStartOfMonth(now()) AND yesterday()
AND feature_code='sales'
)
GROUP BY store_id ORDER BY store_id

map函数

1
2
3
4
5
6
7
8
9
10
11
12
select * from system.functions where name like '%map%';

-- select toJSONString(tuple('sales_amt_actual', sales_amt_actual, 'pct', pct)) json from source
-- toDecimal128(ifNull(visitParamExtractString(json, 'sales_amt_actual'), '0'), 5)

select visitParamExtractInt(json, 'sales_amt_actual') sales_amt_actual from (
select
toJSONString(map('sales_amt_actual', sales_amt_actual, 'pct', pct, 'age', 18)) json,
-- visitParamExtractInt(json, 'sales_amt_actual') sales_amt_actual,-- Cyclic aliases.
visitParamExtractInt(json, 'age') age
from source
)

日期

日期函数 — ClickHouse官方文档
类型转换 — ClickHouse官方文档
CK时间日期函数 — CSDN

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
-- 相关日期函数(这里能查出来的,官方文档不一定有)
select * from system.functions where name like '%Date%';
-- toDateTime toDateTimeOrNull 后者代替前者,避免toHour报错

-- 字符串 + 小时数
-- AND billtime between addHours(today(), 7) AND addHours(today(), 12)
AND billtime between addHours(toDate($report_date$), 7) AND addHours(toDate($report_date$), 12)

-- DateTime 类型 => 字符串"yyyy-MM-dd HH:mm:ss"
SELECT formatDateTime(billtime, '%F %T')
formatDateTime(rdd.billtime, '%H:%M') AS billtime,-- 订单时间 格式 "HH:mm"
-- formatDateTime(rdd.billtime, '%F %T') AS billtime,
SELECT toString(billtime)

-- dt Date类型
-- billtime 字符串"HH:mm:ss"
-- dt + billtime => 生成 DateTime 类型
SELECT toDateTime(concat(formatDateTime(dt, '%F'), ' ', billtime))


SELECT now() -- 2022-11-24 14:46:48.000
SELECT today() -- 2022-11-24
select yesterday() -- 2022-11-23
select toStartOfMonth(now()) -- 2022-11-01 当月1号

select toStartOfDay(now()) -- 2022-11-24 00:00:00.000
select toStartOfDay(today()) -- 2022-11-24 00:00:00.000
select toStartOfDay(yesterday()) -- 2022-11-23 00:00:00.000
select toDateTime(now()) -- 2022-11-24 14:51:42.000
select toDateTime(today()) -- 2022-11-24 00:00:00.000
select toDateTime(yesterday()) -- 2022-11-23 00:00:00.000
select formatDateTime(now(), '%F %T') -- 2022-11-24 14:56:03
select formatDateTime(now(), '%F') -- 2022-11-24
select formatDateTime(today(), '%F') -- 2022-11-24
select formatDateTime(yesterday(), '%F') -- 2022-11-23
select toString(now()) -- 2022-11-24 14:59:17
select toString(today()) -- 2022-11-24
select toString(yesterday()) -- 2022-11-23

-- 小时
-- 字符串拼接 'yyyy-MM-dd HH:mm:ss' concat('2023-01-01', ' ', '12:13:14')
-- 'yyyy-MM-dd HH:mm:ss' => DateTime toDateTime('2023-01-01 12:13:14')
-- DateTime => 小时数 Uint8 toHour(toDateTime('2023-01-01 12:13:14'))
SELECT
-- ...
concat('2023-01-01', ' ', first_order_time) billtime,
FROM t_ads_store_daily_stats
WHERE
-- ...
AND toHour(toDateTime(billtime)) BETWEEN 7 AND 10

排名函数

好多人说,为什么ck没有排名函数,其实有的,网上都没有给出相关案例。
window-functions — ClickHouse

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
-- 排名函数
-- 2022-10-26 09:49:21
SELECT
value,
rank() over win as rk -- 1 2 2 4
-- dense_rank() over win as rk -- 1 2 2 3
-- row_number() over win as rk -- 1 2 3 4
FROM (
SELECT arrayJoin([11, 22, 22, 33]) value
) window win as (order by value desc)
-- ) window win as (partition by store_id order by value desc) 按照店铺分组
ORDER BY rk
SETTINGS allow_experimental_window_functions = 1
-- value|rk|
-- -----+--+
-- 33| 1|
-- 22| 2|
-- 22| 2|
-- 11| 4|

-- 等同于上面的结果,只是写法不同而已
SELECT
value,
rank() over (order by value) rk
FROM (
SELECT arrayJoin([11, 22, 22, 33]) value
)
SETTINGS allow_experimental_window_functions = 1
--------------------------------------------------------------------------------------------------------------
--------------------------------------------------------------------------------------------------------------
--------------------------------------------------------------------------------------------------------------

-- 再举一个例子:按照学生考试成绩排名

-- 1. 创建表
-- drop table student; -- 删除表
CREATE TABLE student
(
`id` UInt32,
`name` String,
`english_score` UInt32,
`math_score` UInt32
)
ENGINE = MergeTree
ORDER BY id;

-- 2. 插入5条数据(分数随机) ---- 大牛 二蛋 三驴 四毛 五虎
insert into student values
(1, '大牛', 90, 85),
(2, '二蛋', 85, 100),
(3, '三驴', 80, 85),
(4, '四毛', 80, 95),
(5, '五虎', 75, 90);

-- 3. 查询一下
select * from student;

-- 4. 排名:英语成绩,并排序
SELECT
*,
rank() over win as rk_english
FROM (
SELECT * FROM student
) window win as ( order by english_score desc)
ORDER BY rk_english
SETTINGS allow_experimental_window_functions = 1
;
-- +--+----+-------------+----------+----------+
-- |id|name|english_score|math_score|rk_english|
-- +--+----+-------------+----------+----------+
-- |1 |大牛 |90 |85 |1 |
-- |2 |二蛋 |85 |100 |2 |
-- |3 |三驴 |80 |85 |3 |
-- |4 |四毛 |80 |95 |3 |
-- |5 |五虎 |75 |90 |5 |
-- +--+----+-------------+----------+----------+

-- 5. 排名:英语成绩、数据成绩
SELECT
*,
rank() over win_english as rk_english,
rank() over win_math as rk_math,
''
FROM (
SELECT * FROM student
)
window
win_english as ( order by english_score desc),
win_math as ( order by math_score desc)
ORDER BY id
SETTINGS allow_experimental_window_functions = 1
;
-- +--+----+-------------+----------+----------+-------+--+
-- |id|name|english_score|math_score|rk_english|rk_math|''|
-- +--+----+-------------+----------+----------+-------+--+
-- |1 |大牛 |90 |85 |1 |4 | |
-- |2 |二蛋 |85 |100 |2 |1 | |
-- |3 |三驴 |80 |85 |3 |4 | |
-- |4 |四毛 |80 |95 |3 |2 | |
-- |5 |五虎 |75 |90 |5 |3 | |
-- +--+----+-------------+----------+----------+-------+--+

-- 6. 排名:英语成绩、数据成绩(推荐:效果同上,写法更简洁)
SELECT
*,
rank() over (order by english_score desc) as rk_english,
rank() over (order by math_score desc) as rk_math,
''
FROM (
SELECT * FROM student
)
ORDER BY id
SETTINGS allow_experimental_window_functions = 1
;
-- +--+----+-------------+----------+----------+-------+--+
-- |id|name|english_score|math_score|rk_english|rk_math|''|
-- +--+----+-------------+----------+----------+-------+--+
-- |1 |大牛 |90 |85 |1 |4 | |
-- |2 |二蛋 |85 |100 |2 |1 | |
-- |3 |三驴 |80 |85 |3 |4 | |
-- |4 |四毛 |80 |95 |3 |2 | |
-- |5 |五虎 |75 |90 |5 |3 | |
-- +--+----+-------------+----------+----------+-------+--+

WITH语法

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
-- 2023-01-13...14.02.10 ck 多with语法.txt
WITH target_info AS
(
select
rel_id,
tenant_id,
sum(toDecimal64OrZero(visitParamExtractString(arrayJoin(JSONExtractArrayRaw(cast(fetch_target_json as String))), 'targetValue'), 4)) AS targetValue
from
t_sc_feature_month_target_info
where
visitParamExtractString(arrayJoin(JSONExtractArrayRaw(cast(fetch_target_json as String))), 'featureCode') = 'sales'
and stats_date = '2022-12-01'
and tenant_id = 'lanling'
and rel_type = 'STORE'
and del_flag = 0
and rel_id in (6 ,7 ,8 ,9 ,10 ,11 ,12 ,13 ,14 ,15 ,16 ,17)
group by
rel_id, tenant_id, stats_date
),
store_daily_real_time AS
(
SELECT
sales_amt_actual,
tenant_id,
store_id
FROM
t_ads_store_month_total_stats
where stats_date = '2022-12-07' and tenant_id = 'lanling'
and store_id in (6 ,7 ,8 ,9 ,10 ,11 ,12 ,13 ,14 ,15 ,16 ,17)
)
SELECT
store_id,
targetValue,
sales_amt_actual,
intDivOrZero(toDecimal128(sales_amt_actual,5), toDecimal128(targetValue,5)) sales_achieving_rate
FROM
target_info
right JOIN store_daily_real_time
ON target_info.rel_id = store_daily_real_time.store_id and target_info.tenant_id = store_daily_real_time.tenant_id;

未分类

上周一

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
-- 实际使用
-- [上周一, 上周日]
and stats_date
between
addDays(toStartOfWeek(toDate($reportDate$), 1), -7)
and
addDays(toStartOfWeek(toDate($reportDate$), 1), -1)


-- dbeaver
-- 2023-07-26 15:04:22 上周一

-- 参考 CK 日期函数 https://clickhouse.com/docs/zh/sql-reference/functions/date-time-functions

select
toDate('2023-07-26') a1,-- 字符串转 Date
addWeeks(a1, 1) a2,-- Date + 7天
addWeeks(a1, -1) a3,-- Date - 7天
toStartOfWeek(a1, 1) a4,-- Date 所在周的开始 默认0周日为第一天 1周一为第一天
'---',
addDays(toStartOfWeek(toDate('2023-07-26'), 1), 0) bs1,-- 2023-07-24 当前周一
addDays(toStartOfWeek(toDate('2023-07-26'), 1), 6) bs2,-- 2023-07-30 当前周日

addDays(toStartOfWeek(toDate('2023-07-26'), 1), -7) b1,-- 2023-07-17 上周一
addDays(toStartOfWeek(toDate('2023-07-26'), 1), -1) b2,-- 2023-07-23 上周日
'';

平均首单时间

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
-- 实际使用
-- 时间取平均值
-- "HH:mm:ss" => 拼接日期 => 转 DateTime => 时间戳
-- 平均时间戳 => 取整 => 转 DateTime => 取字符串 "HH:mm"
select
store_id, formatDateTime(FROM_UNIXTIME(toInt32(avg(first_order_time))), '%H:%M') avg_first_bill_time
from (
select store_id, toInt32(toDateTime(concat($reportDate$, ' ', first_order_time))) first_order_time

-- dbeaver
-- 2023-07-26 15:30:27
-- 平均首单时间
select
store_id,
avg(first_order_time) t1, -- 有小数 3173=1690325920.5 3164=1690344791.5
round(avg(first_order_time), 0) t2, -- 四舍五入 取整
FROM_UNIXTIME(toInt32(round(avg(first_order_time), 0))) t3,

toInt32(avg(first_order_time)) t22, -- 舍去小数 取整
FROM_UNIXTIME(toInt32(avg(first_order_time))) t33,
formatDateTime(t33, '%H:%M') t44,
-- %F YYYY-MM-DD,相当于%Y-%m-%d
-- %Y 年
-- %m 月份为十进制数(01-12)
-- %d 月中的一天,零填充(01-31)

-- %T HH:MM:SS,相当于%H:%M:%S
-- %H 24小时格式(00-23)
-- %M 分钟(00-59)
-- %S 秒 (00-59)
''
from a2
group by store_id

case when

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
-- 实际使用
case when fb.billtime is null or toString(fb.billtime)='1970-01-01 08:00:00' then '未开单' else formatDateTime(fb.billtime, '%H:%M') end billtime, -- 首单时间 'HH:mm'

-- dbeaver
-- 2023-07-26 17:24:59
-- case when 使用


select
'a' a,
case when a='a' then '1' else '2' end b1,
case when a='aaaa' then '1' else '2' end b2,
case when a='aaaa' then '1' else null end b3,
''


-- 也可以 'case when 条件 then 值 end 别名'

左关联默认值

1
2
3
4
5
6
7
8
9
10
-- 默认值显示 0
ifNull(toString(cc.customer_cnt), '0') customer_cnt

-- 左关联为 0
-- 左关联会把 fb.h 设置为0
where fb.h is null or fb.h=0 or cc.h<=fb.h

-- 左关联时间 DateTime '1970-01-01 08:00:00'
-- 左关联会把 fb.billtime 设置为 DateTime '1970-01-01 08:00:00'
case when fb.billtime is null or toString(fb.billtime)='1970-01-01 08:00:00' then '未开单' else formatDateTime(fb.billtime, '%H:%M') end billtime, -- 首单时间 'HH:mm'

ifNull 字符串默认值

1
2
3
4
5
-- 默认字符串 0
ifNull(toString(cc.customer_cnt), '0') customer_cnt

-- 默认空数组
ifNull(fetch_target_json, '[]')

自定义函数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
select * from system.functions where name like '%Div%'
select * from system.functions where name like 'minus'
select * from system.functions where name like '%visitParamExtractString%';
select * from system.functions where name like '%visit%';
select * from system.functions where name like '%JSON%';
select * from system.functions where name like '%map%';
select * from system.functions where name like '%Array%';
select * from system.functions where name like '%date%';
select * from system.functions where name like '%Date%';

select * from system.functions where name like '%count%';
select * from system.functions where name like '%amoros%';
select * from system.functions where name like 'amorosDiv';
-- create function amorosDiv as (a,b) -> if(b=0, null, intDivOrZero(a,b))
-- drop function amorosDiv

select distinct origin from system.functions;-- System SQLUserDefined
select * from system.functions;-- 查询所有函数
select * from system.functions where origin='System';-- 系统函数
select * from system.functions where origin='SQLUserDefined';-- 自定义函数

集群

1
2
3
4
5
6
7
-- 删除函数 - 集群
DROP FUNCTION amorosDiv ON CLUSTER cluster_single_shard;

-- 创建函数 - 集群
CREATE FUNCTION amorosDiv ON CLUSTER cluster_single_shard AS (a, b) -> if(b = 0, NULL, intDivOrZero(toDecimal128(a, 5), toDecimal128(b, 5)));

需要添加集群参数

分组取第一条

1
2
3
4
5
6
7
8
9
with xxx as (
...
), rk as (
select * from (
select *, row_number() over (partition by store_id order by sales_amt_actual desc) AS rk
from p
SETTINGS allow_experimental_window_functions = 1
) tmp where rk <= 5
)

合并数组/数组拆分

1
2
3
4
5
with l as (
...
), post as (
select user_id, arrayStringConcat(groupUniqArray(post_name), ',') post_name from matomo_user_org group by user_id
)

arrayFilter/arrayMap

多字段相加

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
-- null + 0   结果为 null

select
...

-- 0+大货+儿童+潮牌 销售额
0
<if test="$where.brand_code$!=null && $where.brand_code$.contains('1101')">
+if(a is null, 0, a)
</if>
<if test="$where.brand_code$!=null && $where.brand_code$.contains('1102')">
+if(b is null, 0, b)
</if>
<if test="$where.brand_code$!=null && $where.brand_code$.contains('1103')">
+if(c is null, 0, c)
</if>
from xxx