SSB Benchmark性能测试
Star Schema Benchmark(以下简称SSB)是一个基于TPC-H的轻量级数据仓库场景下的星型模型测试集。该测试集为比较各类OLAP产品的基本性能指标提供了便利。本文为您介绍云原生数据仓库 AnalyticDB PostgreSQL 版在SSB 100 GB测试集上的性能表现。
前提条件
云原生数据仓库 AnalyticDB PostgreSQL 版实例和ECS实例位于同一VPC,详情请参见创建实例、创建ECS。
云原生数据仓库 AnalyticDB PostgreSQL 版实例和OSS存储空间Bucket位于同一地域,详情请参见创建存储空间。
已将ECS实例的私网IP地址添加到云原生数据仓库 AnalyticDB PostgreSQL 版实例的白名单中,详情请参见设置白名单。
已在ECS上安装psql,如需安装请参见psql。
使用阿里云账号或具备
AliyunGPDBFullAccess、AliyunECSFullAccess和AliyunOSSFullAccess权限的RAM用户,并为其创建AccessKey ID和AccessKey Secret。详情请参见RAM用户、创建AccessKey。
测试环境
云原生数据仓库 AnalyticDB PostgreSQL 版实例规格 | ECS实例规格 |
|
|
测试数据生成
本次测试使用SSB提供的toolkit生成SSB数据。
完成工具的编译。
git clone https://github.com/vadimtk/ssb-dbgen.git cd ssb-dbgen make生成100 GB测试数据。
FOR((i=1;i<=16;i++)); DO ./dbgen -s 100 -S $i -C 16 -f -T l & IF [ $(($i%16)) -eq 0 ] THEN wait echo $i FI DONE ./dbgen -s 100 -T p ./dbgen -s 100 -T c ./dbgen -s 100 -T s使用ossutil将测试数据上传至OSS存储空间。ossutil工具的安装与使用,请参见命令行工具ossutil。
./ossutil64 cp -r <测试数据所在的路径> <OSS Bucket路径> --exclude "*.tbl*"示例如下。
./ossutil64 cp -r /mnt/dbqgen/ oss://testBucketName --exclude "*.tbl*"创建数据表。
-- 创建对应表 CREATE TABLE customer --客户信息表 ( C_CUSTKEY int, C_NAME text, C_ADDRESS text, C_CITY text, C_NATION text, C_REGION text, C_PHONE text, C_MKTSEGMENT text ) USING beam WITH(compresstype=auto, compresslevel=1) DISTRIBUTED BY(C_CUSTKEY) ORDER BY (C_CUSTKEY); CREATE TABLE lineorder --商品订单明细表 ( LO_ORDERKEY bigint, LO_LINENUMBER int, LO_CUSTKEY int, LO_PARTKEY int, LO_SUPPKEY int, LO_ORDERDATE Date, LO_ORDERPRIORITY text, LO_SHIPPRIORITY int, LO_QUANTITY int, LO_EXTENDEDPRICE int, LO_ORDTOTALPRICE int, LO_DISCOUNT int, LO_REVENUE int, LO_SUPPLYCOST int, LO_TAX int, LO_COMMITDATE Date, LO_SHIPMODE text ) USING beam WITH(compresstype=auto,compresslevel=1) DISTRIBUTED BY(LO_ORDERKEY) ORDER BY (LO_ORDERDATE, LO_ORDERKEY); CREATE TABLE part --零件信息表 ( P_PARTKEY int, P_NAME text, P_MFGR text, P_CATEGORY text, P_BRAND text, P_COLOR text, P_TYPE text, P_SIZE int, P_CONTAINER text ) USING beam WITH(compresstype=auto,compresslevel=1) DISTRIBUTED BY(P_PARTKEY) ORDER BY (P_PARTKEY); CREATE TABLE supplier --供应商信息表 ( S_SUPPKEY int, S_NAME text, S_ADDRESS text, S_CITY text, S_NATION text, S_REGION text, S_PHONE text ) USING beam WITH(compresstype=auto,compresslevel=1) DISTRIBUTED BY(S_SUPPKEY) ORDER BY (S_SUPPKEY);创建OSS Foreign Table。利用OSS Foreign Table将OSS上的测试数据导入步骤4创建的表。
Endpoint和Bucket需指定为测试数据所在的OSS Bucket名称和Endpoint。获取Endpoint和Bucket请参见获取OSS Bucket信息。ID和Key即为阿里云账号或RAM用户的AccessKey ID和AccessKey Secret。获取AccessKey ID和AccessKey Secret请参见创建AccessKey。
CREATE SERVER oss_serv FOREIGN DATA WRAPPER oss_fdw OPTIONS ( endpoint 'oss-cn-hang****.aliyuncs.com', bucket 'testBucketName' ); CREATE USER MAPPING FOR PUBLIC SERVER oss_serv OPTIONS ( id 'STS.****************', key 'yourAccessKeySecret' ); CREATE FOREIGN TABLE ext_customer ( C_CUSTKEY int, C_NAME text, C_ADDRESS text, C_CITY text, C_NATION text, C_REGION text, C_PHONE text, C_MKTSEGMENT text ) server oss_serv options ( filepath 'ssb100g/customer.tbl', format 'csv', DELIMITER '|' ); CREATE FOREIGN TABLE ext_lineorder ( LO_ORDERKEY bigint, LO_LINENUMBER int, LO_CUSTKEY int, LO_PARTKEY int, LO_SUPPKEY int, LO_ORDERDATE Date, LO_ORDERPRIORITY text, LO_SHIPPRIORITY int, LO_QUANTITY int, LO_EXTENDEDPRICE int, LO_ORDTOTALPRICE int, LO_DISCOUNT int, LO_REVENUE int, LO_SUPPLYCOST int, LO_TAX int, LO_COMMITDATE Date, LO_SHIPMODE text ) server oss_serv options ( prefix 'ssb100g/lineorder_', format 'csv', DELIMITER '|' ); CREATE FOREIGN TABLE ext_part ( P_PARTKEY int, P_NAME text, P_MFGR text, P_CATEGORY text, P_BRAND text, P_COLOR text, P_TYPE text, P_SIZE int, P_CONTAINER text ) server oss_serv options ( filepath 'ssb100g/part.tbl', format 'csv', DELIMITER '|' ); CREATE FOREIGN TABLE ext_supplier ( S_SUPPKEY int, S_NAME text, S_ADDRESS text, S_CITY text, S_NATION text, S_REGION text, S_PHONE text ) server oss_serv options ( filepath 'ssb100g/supplier.tbl', format 'csv', DELIMITER '|' ); --导入数据 INSERT INTO part SELECT * FROM ext_part; INSERT INTO supplier SELECT * FROM ext_supplier; INSERT INTO customer SELECT * FROM ext_customer; INSERT INTO lineorder SELECT * FROM ext_lineorder; --删除外表 DROP EXTERNAL TABLE ext_part; DROP EXTERNAL TABLE ext_supplier; DROP EXTERNAL TABLE ext_customer; DROP EXTERNAL TABLE ext_lineorder;关联以上四张基础表生成flat表的数据。
CREATE TABLE lineorder_flat ( LO_ORDERKEY bigint NOT NULL, LO_LINENUMBER int NOT NULL, LO_CUSTKEY int NOT NULL, LO_PARTKEY int NOT NULL, LO_SUPPKEY int NOT NULL, LO_ORDERDATE Date NOT NULL, LO_ORDERPRIORITY text NOT NULL, LO_SHIPPRIORITY int NOT NULL, LO_QUANTITY int NOT NULL, LO_EXTENDEDPRICE int NOT NULL, LO_ORDTOTALPRICE int NOT NULL, LO_DISCOUNT int NOT NULL, LO_REVENUE int NOT NULL, LO_SUPPLYCOST int NOT NULL, LO_TAX int NOT NULL, LO_COMMITDATE Date NOT NULL, LO_SHIPMODE text NOT NULL, C_NAME text NOT NULL, C_ADDRESS text NOT NULL, C_CITY text NOT NULL, C_NATION text NOT NULL, C_REGION text NOT NULL, C_PHONE text NOT NULL, C_MKTSEGMENT text NOT NULL, S_NAME text NOT NULL, S_ADDRESS text NOT NULL, S_CITY text NOT NULL, S_NATION text NOT NULL, S_REGION text NOT NULL, S_PHONE text NOT NULL, P_NAME text NOT NULL, P_MFGR text NOT NULL, P_CATEGORY text NOT NULL, P_BRAND text NOT NULL, P_COLOR text NOT NULL, P_TYPE text NOT NULL, P_SIZE int NOT NULL, P_CONTAINER text NOT NULL ) USING beam WITH(compresstype=auto, compresslevel=1) DISTRIBUTED BY(LO_ORDERKEY) ORDER BY(LO_ORDERDATE, LO_ORDERKEY); INSERT INTO lineorder_flat SELECT l.LO_ORDERKEY AS LO_ORDERKEY, l.LO_LINENUMBER AS LO_LINENUMBER, l.LO_CUSTKEY AS LO_CUSTKEY, l.LO_PARTKEY AS LO_PARTKEY, l.LO_SUPPKEY AS LO_SUPPKEY, l.LO_ORDERDATE AS LO_ORDERDATE, l.LO_ORDERPRIORITY AS LO_ORDERPRIORITY, l.LO_SHIPPRIORITY AS LO_SHIPPRIORITY, l.LO_QUANTITY AS LO_QUANTITY, l.LO_EXTENDEDPRICE AS LO_EXTENDEDPRICE, l.LO_ORDTOTALPRICE AS LO_ORDTOTALPRICE, l.LO_DISCOUNT AS LO_DISCOUNT, l.LO_REVENUE AS LO_REVENUE, l.LO_SUPPLYCOST AS LO_SUPPLYCOST, l.LO_TAX AS LO_TAX, l.LO_COMMITDATE AS LO_COMMITDATE, l.LO_SHIPMODE AS LO_SHIPMODE, c.C_NAME AS C_NAME, c.C_ADDRESS AS C_ADDRESS, c.C_CITY AS C_CITY, c.C_NATION AS C_NATION, c.C_REGION AS C_REGION, c.C_PHONE AS C_PHONE, c.C_MKTSEGMENT AS C_MKTSEGMENT, s.S_NAME AS S_NAME, s.S_ADDRESS AS S_ADDRESS, s.S_CITY AS S_CITY, s.S_NATION AS S_NATION, s.S_REGION AS S_REGION, s.S_PHONE AS S_PHONE, p.P_NAME AS P_NAME, p.P_MFGR AS P_MFGR, p.P_CATEGORY AS P_CATEGORY, p.P_BRAND AS P_BRAND, p.P_COLOR AS P_COLOR, p.P_TYPE AS P_TYPE, p.P_SIZE AS P_SIZE, p.P_CONTAINER AS P_CONTAINER FROM lineorder AS l INNER JOIN customer AS c ON c.C_CUSTKEY = l.LO_CUSTKEY INNER JOIN supplier AS s ON s.S_SUPPKEY = l.LO_SUPPKEY INNER JOIN part AS p ON p.P_PARTKEY = l.LO_PARTKEY;在执行SQL后,后台会对该表采集统计信息。统计信息采集完成后,可开始测试查询。如果以下查询不返回结果,说明统计信息已采集完成。
SELECT datname, query, state FROM pg_stat_activity WHERE state <> 'idle' AND backend_type LIKE '%autovacuum%';
测试查询
\timing
--Q1.1
SELECT SUM(lo_extendedprice * lo_discount) AS revenue
FROM lineorder_flat
WHERE lo_orderdate >= '1993-01-01' AND lo_orderdate <= '1993-12-31'
AND lo_discount BETWEEN 1 AND 3 AND lo_quantity < 25;
--Q1.2
SELECT SUM(lo_extendedprice * lo_discount) AS revenue
FROM lineorder_flat
WHERE lo_orderdate >= '1994-01-01' AND lo_orderdate <= '1994-01-31'
AND lo_discount BETWEEN 4 AND 6 AND lo_quantity BETWEEN 26 AND 35;
--Q1.3
SELECT SUM(lo_extendedprice * lo_discount) AS revenue
FROM lineorder_flat
WHERE extract(week FROM lo_orderdate) = 6 AND lo_orderdate >= '1994-01-01'
AND lo_orderdate <= '1994-12-31' AND lo_discount BETWEEN 5 AND 7
AND lo_quantity BETWEEN 26 AND 35;
--Q2.1
SELECT SUM(lo_revenue), extract(YEAR FROM lo_orderdate) AS YEAR, p_brand
FROM lineorder_flat
WHERE p_category = 'MFGR#12' AND s_region = 'AMERICA'
GROUP BY YEAR, p_brand
ORDER BY YEAR, p_brand
LIMIT 100;
--Q2.2
SELECT SUM(lo_revenue), extract(YEAR FROM lo_orderdate) AS YEAR, p_brand
FROM lineorder_flat
WHERE p_brand >= 'MFGR#2221' AND p_brand <= 'MFGR#2228' AND s_region = 'ASIA'
GROUP BY YEAR, p_brand
ORDER BY YEAR, p_brand
LIMIT 100;
--Q2.3
SELECT SUM(lo_revenue), extract(YEAR FROM lo_orderdate) AS YEAR, p_brand
FROM lineorder_flat
WHERE p_brand = 'MFGR#2239' AND s_region = 'EUROPE'
GROUP BY YEAR, p_brand
ORDER BY YEAR, p_brand
LIMIT 100;
--Q3.1
SELECT c_nation, s_nation, extract(YEAR FROM lo_orderdate) AS YEAR,
SUM(lo_revenue) AS revenue
FROM lineorder_flat
WHERE c_region = 'ASIA' AND s_region = 'ASIA' AND lo_orderdate >= '1992-01-01'
AND lo_orderdate <= '1997-12-31'
GROUP BY c_nation, s_nation, YEAR
ORDER BY YEAR ASC, revenue DESC
LIMIT 100;
--Q3.2
SELECT c_city, s_city, extract(YEAR FROM lo_orderdate) AS YEAR,
SUM(lo_revenue) AS revenue
FROM lineorder_flat
WHERE c_nation = 'UNITED STATES' AND s_nation = 'UNITED STATES'
AND lo_orderdate >= '1992-01-01' AND lo_orderdate <= '1997-12-31'
GROUP BY c_city, s_city, YEAR
ORDER BY YEAR ASC, revenue DESC
LIMIT 100;
--Q3.3
SELECT c_city, s_city, extract(YEAR FROM lo_orderdate) AS YEAR,
SUM(lo_revenue) AS revenue
FROM lineorder_flat
WHERE c_city IN ( 'UNITED KI1' ,'UNITED KI5')
AND s_city IN ( 'UNITED KI1' ,'UNITED KI5')
AND lo_orderdate >= '1992-01-01' AND lo_orderdate <= '1997-12-31'
GROUP BY c_city, s_city, YEAR
ORDER BY YEAR ASC, revenue DESC
LIMIT 100;
--Q3.4
SELECT c_city, s_city, extract(YEAR FROM lo_orderdate) AS YEAR,
SUM(lo_revenue) AS revenue
FROM lineorder_flat
WHERE c_city IN ('UNITED KI1', 'UNITED KI5')
AND s_city IN ( 'UNITED KI1', 'UNITED KI5')
AND lo_orderdate >= '1997-12-01' AND lo_orderdate <= '1997-12-31'
GROUP BY c_city, s_city, YEAR
ORDER BY YEAR ASC, revenue DESC
LIMIT 100;
--Q4.1
SELECT EXTRACT(YEAR FROM lo_orderdate) AS YEAR, c_nation,
SUM(lo_revenue - lo_supplycost) AS profit
FROM lineorder_flat
WHERE c_region = 'AMERICA' AND s_region = 'AMERICA'
AND p_mfgr IN ('MFGR#1', 'MFGR#2')
GROUP BY YEAR, c_nation
ORDER BY YEAR ASC,c_nation ASC LIMIT 100;
--Q4.2
SELECT extract(YEAR FROM lo_orderdate) AS YEAR, s_nation, p_category,
SUM(lo_revenue - lo_supplycost) AS profit
FROM lineorder_flat
WHERE c_region = 'AMERICA' AND s_region = 'AMERICA'
AND lo_orderdate >= '1997-01-01' AND lo_orderdate <= '1998-12-31'
AND p_mfgr IN ( 'MFGR#1' , 'MFGR#2')
GROUP BY YEAR, s_nation, p_category
ORDER BY YEAR ASC, s_nation ASC, p_category ASC
LIMIT 100;
--Q4.3
SELECT extract(YEAR FROM lo_orderdate) AS YEAR, s_city, p_brand,
SUM(lo_revenue - lo_supplycost) AS profit
FROM lineorder_flat
WHERE s_nation = 'UNITED STATES' AND lo_orderdate >= '1997-01-01'
AND lo_orderdate <= '1998-12-31' AND p_category = 'MFGR#14'
GROUP BY YEAR, s_city, p_brand
ORDER BY YEAR ASC, s_city ASC, p_brand ASC
LIMIT 100;测试结果
测试查询所需时间如下。
Query | Time(ms) |
SUM | 6755.965 |
Q1.1 | 198.882 |
Q1.2 | 131.133 |
Q1.3 | 166.825 |
Q2.1 | 716.363 |
Q2.2 | 690.485 |
Q2.3 | 413.219 |
Q3.1 | 1119.772 |
Q3.2 | 772.267 |
Q3.3 | 441.492 |
Q3.4 | 43.653 |
Q4.1 | 1315.484 |
Q4.2 | 491.881 |
Q4.3 | 254.509 |