CREATE TABLE创建分区表与复制表
本文介绍云原生数据仓库 AnalyticDB MySQL 版的CREATE TABLE
建表语法。您将了解到如何创建分区表和复制表,以及如何定义表的分布键、分区键、索引、生命周期、冷热数据分层等。
语法
CREATE TABLE [IF NOT EXISTS] table_name
({column_name column_type [column_attributes] [ column_constraints ] [COMMENT 'column_comment']
| table_constraints}
[, ... ])
[table_attribute]
[partition_options]
[storage_policy]
[block_size]
[engine]
[rt_engine]
[table_properties]
[AS query_expr]
[COMMENT 'table_comment']
column_attributes:
[DEFAULT {constant | CURRENT_TIMESTAMP}]
[AUTO_INCREMENT]
column_constraints:
[{NOT NULL|NULL} ]
[PRIMARY KEY]
table_constraints:
[{INDEX|KEY} [index_name] (column_name,...)]
[{INDEX|KEY} [index_name] (column_name->'$[*]')]
[FULLTEXT [INDEX|KEY] [index_name] (column_name) [index_option]] [,...]
[PRIMARY KEY [index_name] (column_name,...)]
[CLUSTERED KEY [index_name] (column_name,...)]
[[CONSTRAINT [symbol]] FOREIGN KEY (fk_column_name) REFERENCES pk_table_name (pk_column_name)][,...]
[ANN INDEX [index_name] (column_name,...) [index_option]] [,...]
table_attribute:
DISTRIBUTED BY HASH(column_name,...) | DISTRIBUTED BY BROADCAST
partition_options:
PARTITION BY
{VALUE(column_name) | VALUE(DATE_FORMAT(column_name, 'format')) | VALUE(FROM_UNIXTIME(column_name, 'format'))}
LIFECYCLE N
storage_policy:
STORAGE_POLICY= {'HOT'|'COLD'|'MIXED' {hot_partition_count=N}}
block_size:
BLOCK_SIZE= VALUE
engine:
ENGINE= 'XUANWU|XUANWU_V2'
参数
table_name、column_name、column_type、COMMENT
示例
新建分区表并设置生命周期
新建普通表customer,login_time
、customer_id
、phone_num
为复合主键, customer_id
为分布键,login_time
为分区键,分区的生命周期为30。
所有分区,按分区键login_time
的值(例如,20231202,20231201等)从大到小排序,仅保留分区键值最大的30个分区,当第31个分区数据写入时,自动删除最小的第1个分区。
假设,第1天login_time的值为20231201,第二天login_time的值为20231202,依次类推,第30天login_time的值为20231230。当第31天login_time为20231231的数据写入时,最小的分区(即'20231201'分区)数据将会被自动删除,从而实现只保留最近30天的数据。
CREATE TABLE customer (
customer_id BIGINT NOT NULL COMMENT '顾客ID',
customer_name VARCHAR NOT NULL COMMENT '顾客姓名',
phone_num BIGINT NOT NULL COMMENT '电话',
city_name VARCHAR NOT NULL COMMENT '所属城市',
sex INT NOT NULL COMMENT '性别',
id_number VARCHAR NOT NULL COMMENT '身份证号码',
home_address VARCHAR NOT NULL COMMENT '家庭住址',
office_address VARCHAR NOT NULL COMMENT '办公地址',
age INT NOT NULL COMMENT '年龄',
login_time TIMESTAMP NOT NULL COMMENT '登录时间',
PRIMARY KEY (login_time,customer_id,phone_num)
)
DISTRIBUTED BY HASH(customer_id)
PARTITION BY VALUE(DATE_FORMAT(login_time, '%Y%m%d')) LIFECYCLE 30
COMMENT '客户信息表';
新建表(未定义分布键)
未定义分布键,自动将主键作为分布键
表定义了主键但未定义分布键,AnalyticDB for MySQL默认将主键作为分布键。
CREATE TABLE orders (
order_id BIGINT NOT NULL COMMENT '订单ID',
customer_id INT NOT NULL COMMENT '顾客ID',
order_status VARCHAR(1) NOT NULL COMMENT '订单状态',
total_price DECIMAL(15, 2) NOT NULL COMMENT '订单金额',
order_date DATE NOT NULL COMMENT '订单日期',
PRIMARY KEY(order_id,order_date)
);
查询建表语句,可以看到主键order_id和order_date被采纳为分布键。
SHOW CREATE TABLE orders;
+---------+-----------------------------------------------------------------------------------------------------------------------------------------------+
| Table | Create Table |
+---------+-----------------------------------------------------------------------------------------------------------------------------------------------+
| orders | CREATE TABLE `orders` ( |
| | `order_id` bigint NOT NULL COMMENT '订单ID', |
| | `customer_id` int NOT NULL COMMENT '顾客ID', |
| | `order_status` varchar(1) NOT NULL COMMENT '订单状态', |
| | `total_price` decimal(15, 2) NOT NULL COMMENT '订单金额', |
| | `order_date` date NOT NULL COMMENT '订单日期', |
| | PRIMARY KEY (`order_id`,`order_date`) |
| | ) DISTRIBUTED BY HASH(`order_id`,`order_date`) INDEX_ALL='Y' STORAGE_POLICY='HOT' ENGINE='XUANWU' TABLE_PROPERTIES='{"format":"columnstore"}' |
+---------+-----------------------------------------------------------------------------------------------------------------------------------------------+
1 row in set (0.04 sec)
未定义主键,自动增加主键
表未定义主键,也未定义分布键,AnalyticDB for MySQL将添加一个列__adb_auto_id__
作为主键和分布键。
CREATE TABLE orders_new (
order_id BIGINT NOT NULL COMMENT '订单ID',
customer_id INT NOT NULL COMMENT '顾客ID',
order_status VARCHAR(1) NOT NULL COMMENT '订单状态',
total_price DECIMAL(15, 2) NOT NULL COMMENT '订单金额',
order_date DATE NOT NULL COMMENT '订单日期'
);
查询建表语句,可以看到表中自动增加一个自增列__adb_auto_id__,该自增列作为表的主键和分布键。
SHOW CREATE TABLE orders_new;
+-------------+-----------------------------------------------------------------------------------------------------------------------------------------------+
| Table | Create Table |
+-------------+-----------------------------------------------------------------------------------------------------------------------------------------------+
| orders_new | CREATE TABLE `orders_new` ( |
| | `__adb_auto_id__` bigint AUTO_INCREMENT, |
| | `order_id` bigint NOT NULL COMMENT '订单ID', |
| | `customer_id` int NOT NULL COMMENT '顾客ID', |
| | `order_status` varchar(1) NOT NULL COMMENT '订单状态', |
| | `total_price` decimal(15, 2) NOT NULL COMMENT '订单金额', |
| | `order_date` date NOT NULL COMMENT '订单日期', |
| | PRIMARY KEY (`__adb_auto_id__`) |
| | ) DISTRIBUTED BY HASH(`__adb_auto_id__`) INDEX_ALL='Y' STORAGE_POLICY='HOT' ENGINE='XUANWU' TABLE_PROPERTIES='{"format":"columnstore"}' |
+-------------+-----------------------------------------------------------------------------------------------------------------------------------------------+
1 row in set (0.04 sec)
新建表(未定义分区键)
新建表supplier,supplier_id
为自增列,分布键为supplier_id
,按照supplier_id
值进行HASH分片。
CREATE TABLE supplier (
supplier_id BIGINT AUTO_INCREMENT PRIMARY KEY,
supplier_name VARCHAR,
address INT,
phone VARCHAR
)
DISTRIBUTED BY HASH(supplier_id);
定义冷热数据存储策略
定义冷(COLD)存储策略
CREATE TABLE item (
order_id BIGINT NOT NULL,
item_id INT NOT NULL,
quantity DECIMAL(15, 2) NOT NULL,
discount DECIMAL(15, 2) NOT NULL,
shipdate DATE NOT NULL,
PRIMARY KEY (order_id,item_id,shipdate)
)
DISTRIBUTED BY HASH(item_id)
PARTITION BY VALUE(date_format(shipdate, '%Y%m')) LIFECYCLE 200
STORAGE_POLICY='COLD';
定义热(HOT)存储策略
CREATE TABLE item (
order_id BIGINT NOT NULL,
item_id INT NOT NULL,
quantity DECIMAL(15, 2) NOT NULL,
discount DECIMAL(15, 2) NOT NULL,
shipdate DECIMAL NOT NULL,
PRIMARY KEY (order_id,item_id,shipdate)
)
DISTRIBUTED BY HASH(item_id)
PARTITION BY VALUE(date_format(shipdate, '%Y%m')) LIFECYCLE 200
STORAGE_POLICY='HOT';
定义混合(MIXED)存储策略,同时指定热分区数量为16个
CREATE TABLE item (
order_id BIGINT NOT NULL,
item_id INT NOT NULL,
quantity DECIMAL(15, 2) NOT NULL,
discount DECIMAL(15, 2) NOT NULL,
shipdate DATE NOT NULL,
PRIMARY KEY (order_id,item_id,shipdate)
)
DISTRIBUTED BY HASH(item_id)
PARTITION BY VALUE(date_format(shipdate, '%Y%m')) LIFECYCLE 200
STORAGE_POLICY='MIXED' HOT_PARTITION_COUNT=16;
定义全文索引
为content列创建全文索引,索引名称为fidx_c。
CREATE TABLE fulltext_tb (
id INT,
content VARCHAR,
keyword VARCHAR,
FULLTEXT INDEX fidx_c(content),
PRIMARY KEY (id)
)
DISTRIBUTED BY HASH(id);
关于创建和变更全文索引的更多内容,请参见创建全文索引。
关于全文检索,请参见全文检索。
定义向量索引
定义short_feature、float_feature为向量列,类型是array<float>,向量维数为4。
根据short_feature创建向量索引short_feature_index,根据float_feature创建向量索引float_feature_index。
CREATE TABLE fact_tb (
xid BIGINT NOT NULL,
cid BIGINT NOT NULL,
uid VARCHAR NOT NULL,
vid VARCHAR NOT NULL,
wid VARCHAR NOT NULL,
short_feature array<smallint>(4),
float_feature array<float>(4),
ann index short_feature_index(short_feature),
ann index float_feature_index(float_feature),
PRIMARY KEY (xid, cid, vid)
)
DISTRIBUTED BY HASH(xid) PARTITION BY VALUE(cid) LIFECYCLE 4;
更多关于向量索引和向量检索的内容,请参见向量检索。
定义外键索引
新增一个名为store_returns
的表,通过使用外键语法FOREIGN KEY
将sr_item_sk
列和customer
表的主键列customer_id
关联起来。
CREATE TABLE store_returns (
sr_sale_id BIGINT NOT NULL PRIMARY KEY,
sr_store_sk BIGINT,
sr_item_sk BIGINT NOT NULL,
FOREIGN KEY (sr_item_sk) REFERENCES customer (customer_id)
);
定义JSON Array索引
为vj列创建JSON Array索引,索引名称为idx_vj。
CREATE TABLE json(
id INT,
vj JSON,
INDEX idx_vj(vj->'$[*]')
)
DISTRIBUTED BY HASH(id);
关于创建和变更JSON Array索引的更多内容,请参见创建JSON Array索引和JSON Array索引。
常见问题
列属性和列约束
分布键、分区键与生命周期
索引
列存
其他
常见报错
相关文档
向表中写入数据,请参见INSERT INTO。
将查询结果写入或覆盖写入,请参见INSERT SELECT FROM或INSERT OVERWRITE SELECT。
将RDS、MaxCompute、OSS或其他数据源的数据导入,请参见数据导入。