CREATE TABLE创建分区表与复制表
本文介绍云原生数据仓库 AnalyticDB MySQL 版的CREATE TABLE建表语法。您将了解到如何创建分区表和复制表,以及如何定义表的分布键、分区键、索引、生命周期、冷热数据分层等。
语法
CREATE TABLE [IF NOT EXISTS] table_name
  ({column_name column_type [column_attributes] [ column_constraints ] [COMMENT 'column_comment']
  | table_constraints}
  [, ... ])
  [table_attribute]
  [partition_options]
  [index_all]
  [storage_policy]
  [block_size]
  [engine]
  [table_properties]
  [AS query_expr]
  [COMMENT 'table_comment']
column_attributes:
  [DEFAULT {constant | CURRENT_TIMESTAMP}]
  [AUTO_INCREMENT]
column_constraints:
  [{NOT NULL|NULL} ]
  [PRIMARY KEY]
table_constraints:
  [{INDEX|KEY} [index_name] (column_name|column_name->'$.json_path'|column_name->'$[*]')][,...]
  [FULLTEXT [INDEX|KEY] [index_name] (column_name) [index_option]] [,...]
  [PRIMARY KEY [index_name] (column_name,...)]
  [CLUSTERED KEY [index_name] (column_name[ASC|DESC],...) ]
  [[CONSTRAINT [symbol]] FOREIGN KEY (fk_column_name) REFERENCES pk_table_name (pk_column_name)][,...]
  [ANN INDEX [index_name] (column_name,...) [index_option]] [,...]
table_attribute:
  DISTRIBUTED BY HASH(column_name,...) | DISTRIBUTED BY BROADCAST
partition_options:
  PARTITION BY 
        {VALUE(column_name) | VALUE(DATE_FORMAT(column_name, 'format')) | VALUE(FROM_UNIXTIME(column_name, 'format'))}
  LIFECYCLE N
  
 index_all:
 INDEX_ALL= 'Y|N'
storage_policy:
  STORAGE_POLICY= {'HOT'|'COLD'|'MIXED' {hot_partition_count=N}}
block_size:
  BLOCK_SIZE= VALUE
engine:
  ENGINE= 'XUANWU|XUANWU_V2'AnalyticDB for MySQL内表默认采用zstd压缩算法。
参数
table_name、column_name、column_type、COMMENT
示例
新建分区表,按日期自动分区
新建分区表sales,该分区表按sale_time的日期值自动分区。
CREATE TABLE sales (
  sale_id BIGINT NOT NULL COMMENT '订单ID',
  customer_id VARCHAR NOT NULL COMMENT '顾客ID',
  phone_num BIGINT NOT NULL COMMENT '顾客联系电话',
  revenue DECIMAL(15, 2) COMMENT '订单金额',
  sale_time TIMESTAMP NOT NULL COMMENT '订单时间',
  PRIMARY KEY (sale_time,sale_id)
 )
DISTRIBUTED BY HASH(sale_id)
PARTITION BY VALUE(DATE_FORMAT(sale_time, '%Y%m%d'));                   新建分区表并设置生命周期
新建分区表customer,login_time、customer_id、phone_num为复合主键, customer_id为分布键,login_time为分区键,分区的生命周期为30。
所有分区,按分区键login_time的值(例如,20231202,20231201等)从大到小排序,仅保留分区键值最大的30个分区,当第31个分区数据写入时,自动删除最小的第1个分区。
假设,第1天login_time的值为20231201,第二天login_time的值为20231202,依次类推,第30天login_time的值为20231230。当第31天login_time为20231231的数据写入时,最小的分区(即'20231201'分区)数据将会被自动删除,从而实现只保留最近30天的数据。
CREATE TABLE customer (
  customer_id BIGINT NOT NULL COMMENT '顾客ID',
  customer_name VARCHAR NOT NULL COMMENT '顾客姓名',
  phone_num BIGINT NOT NULL COMMENT '电话',
  city_name VARCHAR NOT NULL COMMENT '所属城市',
  sex INT NOT NULL COMMENT '性别',
  id_number VARCHAR NOT NULL COMMENT '身份证号码',
  home_address VARCHAR NOT NULL COMMENT '家庭住址',
  office_address VARCHAR NOT NULL COMMENT '办公地址',
  age INT NOT NULL COMMENT '年龄',
  login_time TIMESTAMP NOT NULL COMMENT '登录时间',
  PRIMARY KEY (login_time,customer_id,phone_num)
 )
DISTRIBUTED BY HASH(customer_id)
PARTITION BY VALUE(DATE_FORMAT(login_time, '%Y%m%d')) LIFECYCLE 30
COMMENT '客户信息表';                   新建非分区表
未定义分布键和分区键,系统自动将主键作为分布键
表定义了主键但未定义分布键,AnalyticDB for MySQL默认将主键作为分布键。
CREATE TABLE orders (
  order_id BIGINT NOT NULL COMMENT '订单ID',
  customer_id INT NOT NULL COMMENT '顾客ID',
  order_status VARCHAR(1) NOT NULL COMMENT '订单状态',
  total_price DECIMAL(15, 2) NOT NULL COMMENT '订单金额',
  order_date DATE NOT NULL COMMENT '订单日期',
  PRIMARY KEY(order_id,order_date)
);查询建表语句,可以看到主键order_id和order_date被采纳为分布键。
SHOW CREATE TABLE orders;+---------+-----------------------------------------------------------------------------------------------------------------------------------------------+
| Table   | Create Table                                                                                                                                  | 
+---------+-----------------------------------------------------------------------------------------------------------------------------------------------+
| orders  | CREATE TABLE `orders` (                                                                                                                       |
|         | `order_id` bigint NOT NULL COMMENT '订单ID',                                                                                                   |
|         | `customer_id` int NOT NULL COMMENT '顾客ID',                                                                                                   |
|         | `order_status` varchar(1) NOT NULL COMMENT '订单状态',                                                                                         | 
|         | `total_price` decimal(15, 2) NOT NULL COMMENT '订单金额',                                                                                      |
|         | `order_date` date NOT NULL COMMENT '订单日期',                                                                                                 |
|         | PRIMARY KEY (`order_id`,`order_date`)                                                                                                         |
|         | ) DISTRIBUTED BY HASH(`order_id`,`order_date`) INDEX_ALL='Y' STORAGE_POLICY='HOT' ENGINE='XUANWU' TABLE_PROPERTIES='{"format":"columnstore"}'  |
+---------+-----------------------------------------------------------------------------------------------------------------------------------------------+
1 row in set (0.04 sec)未定义主键和分布键,系统自动增加主键和分布键
表未定义主键,也未定义分布键,AnalyticDB for MySQL将添加一个列__adb_auto_id__作为主键和分布键。
CREATE TABLE orders_new (
  order_id BIGINT NOT NULL COMMENT '订单ID',
  customer_id INT NOT NULL COMMENT '顾客ID',
  order_status VARCHAR(1) NOT NULL COMMENT '订单状态',
  total_price DECIMAL(15, 2) NOT NULL COMMENT '订单金额',
  order_date DATE NOT NULL COMMENT '订单日期'
);查询建表语句,可以看到表中自动增加一个自增列__adb_auto_id__,该自增列作为表的主键和分布键。
SHOW CREATE TABLE orders_new;+-------------+-----------------------------------------------------------------------------------------------------------------------------------------------+
| Table       | Create Table                                                                                                                                  | 
+-------------+-----------------------------------------------------------------------------------------------------------------------------------------------+
| orders_new  | CREATE TABLE `orders_new` (                                                                                                                   |
|             | `__adb_auto_id__` bigint AUTO_INCREMENT,                                                                                                      |
|             | `order_id` bigint NOT NULL COMMENT '订单ID',                                                                                                   |
|             | `customer_id` int NOT NULL COMMENT '顾客ID',                                                                                                   |
|             | `order_status` varchar(1) NOT NULL COMMENT '订单状态',                                                                                         | 
|             | `total_price` decimal(15, 2) NOT NULL COMMENT '订单金额',                                                                                      |
|             | `order_date` date NOT NULL COMMENT '订单日期',                                                                                                 |
|             | PRIMARY KEY (`__adb_auto_id__`)                                                                                                               |
|             | ) DISTRIBUTED BY HASH(`__adb_auto_id__`) INDEX_ALL='Y' STORAGE_POLICY='HOT' ENGINE='XUANWU' TABLE_PROPERTIES='{"format":"columnstore"}'        |
+-------------+-----------------------------------------------------------------------------------------------------------------------------------------------+
1 row in set (0.04 sec)定义主键和分布键,但未定义分区键
新建表supplier,supplier_id为自增列,分布键为supplier_id,按照supplier_id值进行HASH分片。
CREATE TABLE supplier (
  supplier_id BIGINT AUTO_INCREMENT PRIMARY KEY,
  supplier_name VARCHAR,
  address INT,
  phone VARCHAR
) 
DISTRIBUTED BY HASH(supplier_id);定义冷热数据存储策略
定义冷(COLD)存储策略
CREATE TABLE item (
  order_id BIGINT NOT NULL,
  item_id INT NOT NULL,
  quantity DECIMAL(15, 2) NOT NULL,
  discount DECIMAL(15, 2) NOT NULL,
  shipdate DATE NOT NULL,
  PRIMARY KEY (order_id,item_id,shipdate)
) 
DISTRIBUTED BY HASH(item_id) 
PARTITION BY VALUE(date_format(shipdate, '%Y%m')) LIFECYCLE 200 
STORAGE_POLICY='COLD';定义热(HOT)存储策略
CREATE TABLE item (
  order_id BIGINT NOT NULL,
  item_id INT NOT NULL,
  quantity DECIMAL(15, 2) NOT NULL,
  discount DECIMAL(15, 2) NOT NULL,
  shipdate DATE NOT NULL,
  PRIMARY KEY (order_id,item_id,shipdate)
) 
DISTRIBUTED BY HASH(item_id) 
PARTITION BY VALUE(date_format(shipdate, '%Y%m')) LIFECYCLE 200 
STORAGE_POLICY='HOT';定义混合(MIXED)存储策略,同时指定热分区数量为16个
CREATE TABLE item (
  order_id BIGINT NOT NULL,
  item_id INT NOT NULL,
  quantity DECIMAL(15, 2) NOT NULL,
  discount DECIMAL(15, 2) NOT NULL,
  shipdate DATE NOT NULL,
  PRIMARY KEY (order_id,item_id,shipdate)
) 
DISTRIBUTED BY HASH(item_id) 
PARTITION BY VALUE(date_format(shipdate, '%Y%m')) LIFECYCLE 200  
STORAGE_POLICY='MIXED' HOT_PARTITION_COUNT=16;对部分列创建普通索引
仅对id列和date列创建普通索引,其他列不创建索引。
CREATE TABLE index_tb (
  id INT,
  sales DECIMAL(15, 2),
  date DATE,
  INDEX (id),
  INDEX (date),
  PRIMARY KEY (id)
) 
DISTRIBUTED BY HASH(id);定义聚集索引
为quantity列创建聚集索引,索引名称为clustered_index。
CREATE TABLE clustered (
  product_id INT,
  product_name VARCHAR,
  quantity INT,        
  price DECIMAL(10, 2),
  CLUSTERED KEY INDEX clustered_index(quantity)
)
DISTRIBUTED BY HASH(product_id);定义全文索引
为content列创建全文索引,索引名称为fidx_c。
CREATE TABLE fulltext_tb (
  id INT,
  content VARCHAR,
  keyword VARCHAR,
  FULLTEXT INDEX fidx_c(content),
  PRIMARY KEY (id)
) 
DISTRIBUTED BY HASH(id);关于创建和变更全文索引的更多内容,请参见创建全文索引。
关于全文检索,请参见全文检索。
定义向量索引
定义short_feature、float_feature为向量列,类型是array<float>,向量维数为4。
根据short_feature创建向量索引short_feature_index,根据float_feature创建向量索引float_feature_index。
CREATE TABLE fact_tb (  
  xid BIGINT NOT NULL,  
  cid BIGINT NOT NULL,  
  uid VARCHAR NOT NULL,  
  vid VARCHAR NOT NULL,  
  wid VARCHAR NOT NULL,  
  short_feature array<smallint>(4),  
  float_feature array<float>(4),  
  ann index short_feature_index(short_feature), 
  ann index float_feature_index(float_feature),  
  PRIMARY KEY (xid, cid, vid)
) 
DISTRIBUTED BY HASH(xid) PARTITION BY VALUE(cid) LIFECYCLE 4;更多关于向量索引和向量检索的内容,请参见向量检索。
定义外键索引
新增一个名为store_returns的表,通过使用外键语法FOREIGN KEY将sr_item_sk列和customer表的主键列customer_id关联起来。
CREATE TABLE store_returns (
  sr_sale_id BIGINT NOT NULL PRIMARY KEY,
  sr_store_sk BIGINT,
  sr_item_sk BIGINT NOT NULL,
  FOREIGN KEY (sr_item_sk) REFERENCES customer (customer_id)
);定义JSON Array索引
为vj列创建JSON Array索引,索引名称为idx_vj。
CREATE TABLE json(
  id INT,
  vj JSON,
  INDEX idx_vj(vj->'$[*]')
)
DISTRIBUTED BY HASH(id);关于创建和变更JSON Array索引的更多内容,请参见创建JSON Array索引和JSON Array索引。
常见问题
列属性和列约束
分布键、分区键与生命周期
索引
列存
其他
常见报错
相关文档
- 向表中写入数据,请参见INSERT INTO。 
- 将查询结果写入或覆盖写入,请参见INSERT SELECT FROM或INSERT OVERWRITE SELECT。 
- 将RDS、MaxCompute、OSS或其他数据源的数据导入,请参见数据导入。