本文介绍如何通过EMR OpenAPI创建一个Hadoop半托管集群。
集群基本信息
需要创建的集群基本信息如下:
- 可用区: 华东1(杭州)可用区I。
- 付费类型:按量付费。
- 集群名称:emr_openapi_demo。
- 开启高可用,详细实例:
- 2个Master,每个Master ecs.g5.xlarge,系统盘为1*120 GB的ESSD,数据盘为1*80 GB的高效云盘。
- 2个Core,每台Core ecs.g5.2xlarge,系统盘为1*120 GB的ESSD,数据盘为4*80 GB的高效云盘。
- 2个Task,每台Task ecs.c5.2xlarge,系统盘为1*120 GB的ESSD,数据盘为4*80 GB的高效云盘。
- 开启挂载公网。
- 设置登录密码。
- 自定义组件配置。
组件名 配置文件 Key Value Yarn yarn-site.xml yarn_nodemanager_heapsize 1234 Hive hive-site.xml hive.metastore.warehouse.dir /user/hive/warehouse_emr
EMR OpenAPI示例
- Python
#!/usr/bin/env python #coding=utf-8 from aliyunsdkcore.client import AcsClient from aliyunsdkcore.acs_exception.exceptions import ClientException from aliyunsdkcore.acs_exception.exceptions import ServerException from aliyunsdkemr.request.v20160408.CreateClusterV2Request import CreateClusterV2Request client = AcsClient('<accessKeyId>', '<accessSecret>', 'cn-hangzhou') request = CreateClusterV2Request() request.set_accept_format('json') request.set_Name("emr_openapi_demo") request.set_ZoneId("cn-hangzhou-i") request.set_EmrVer("EMR-3.23.0") request.set_ClusterType("HADOOP") request.set_HostGroups([ { "HostGroupName": "master_group", "HostGroupType": "MASTER", "ChargeType": "PostPaid", "NodeCount": 2, "InstanceType": "ecs.g5.xlarge", "DiskType": "CLOUD_EFFICIENCY", "DiskCapacity": 80, "DiskCount": 1, "SysDiskType": "CLOUD_ESSD", "SysDiskCapacity": 120, "VSwitchId": "vsw-bp11t4amri1iuj*****" }, { "HostGroupName": "core_group", "HostGroupType": "CORE", "ChargeType": "PostPaid", "NodeCount": 2, "InstanceType": "ecs.g5.2xlarge", "DiskType": "CLOUD_EFFICIENCY", "DiskCapacity": 80, "DiskCount": 4, "SysDiskType": "CLOUD_ESSD", "SysDiskCapacity": 120, "VSwitchId": "vsw-bp11t4amri1iuj*****" }, { "HostGroupName": "task_group", "HostGroupType": "TASK", "ChargeType": "PostPaid", "NodeCount": 2, "InstanceType": "ecs.c5.xlarge", "DiskType": "CLOUD_EFFICIENCY", "DiskCapacity": 80, "DiskCount": 4, "SysDiskType": "CLOUD_ESSD", "SysDiskCapacity": 120, "VSwitchId": "vsw-bp11t4amri1iuj*****" } ]) request.set_SecurityGroupId("sg-bp13cqwumhn7x*****") request.set_IsOpenPublicIp(True) request.set_SecurityGroupName("newgroup") request.set_ChargeType("PostPaid") request.set_VpcId("vpc-bp1gjl3y9fezuk*****") request.set_VSwitchId("vsw-bp11t4amri1iuj*****") request.set_NetType("vpc") request.set_UserDefinedEmrEcsRole("AliyunEmrEcsDefaultRole") request.set_HighAvailabilityEnable(True) request.set_IoOptimized(True) request.set_SshEnable(True) request.set_MasterPwd("EMRtest12345!") request.set_DepositType("HALF_MANAGED") request.set_MachineType("ECS") request.set_Configs([ { "ServiceName": "YARN", "FileName": "yarn-site", "ConfigKey": "yarn_nodemanager_heapsize", "ConfigValue": "1234" }, { "ServiceName": "HIVE", "FileName": "hive-site", "ConfigKey": "hive.metastore.warehouse.dir", "ConfigValue": "/user/hive/warehouse_emr" } ]) response = client.do_action_with_exception(request) # python2: print(response) print(str(response, encoding='utf-8'))
- Java
import com.aliyuncs.DefaultAcsClient; import com.aliyuncs.IAcsClient; import com.aliyuncs.exceptions.ClientException; import com.aliyuncs.exceptions.ServerException; import com.aliyuncs.profile.DefaultProfile; import com.google.gson.Gson; import java.util.*; import com.aliyuncs.emr.model.v20160408.*; public class CreateClusterV2 { public static void main(String[] args) { DefaultProfile profile = DefaultProfile.getProfile("cn-hangzhou", "<accessKeyId>", "<accessSecret>"); IAcsClient client = new DefaultAcsClient(profile); CreateClusterV2Request request = new CreateClusterV2Request(); request.setRegionId("cn-hangzhou"); request.setName("emr_openapi_demo"); request.setZoneId("cn-hangzhou-i"); request.setEmrVer("EMR-3.23.0"); request.setClusterType("HADOOP"); List<CreateClusterV2Request.HostGroup> hostGroupList = new ArrayList<CreateClusterV2Request.HostGroup>(); CreateClusterV2Request.HostGroup hostGroup1 = new CreateClusterV2Request.HostGroup(); hostGroup1.setHostGroupName("master_group"); hostGroup1.setHostGroupType("MASTER"); hostGroup1.setChargeType("PostPaid"); hostGroup1.setNodeCount(2); hostGroup1.setInstanceType("ecs.g5.xlarge"); hostGroup1.setDiskType("CLOUD_EFFICIENCY"); hostGroup1.setDiskCapacity(80); hostGroup1.setDiskCount(1); hostGroup1.setSysDiskType("CLOUD_ESSD"); hostGroup1.setSysDiskCapacity(120); hostGroup1.setVSwitchId("vsw-bp11t4amri1iuj*****"); hostGroupList.add(hostGroup1); CreateClusterV2Request.HostGroup hostGroup2 = new CreateClusterV2Request.HostGroup(); hostGroup2.setHostGroupName("core_group"); hostGroup2.setHostGroupType("CORE"); hostGroup2.setChargeType("PostPaid"); hostGroup2.setNodeCount(2); hostGroup2.setInstanceType("ecs.g5.2xlarge"); hostGroup2.setDiskType("CLOUD_EFFICIENCY"); hostGroup2.setDiskCapacity(80); hostGroup2.setDiskCount(4); hostGroup2.setSysDiskType("CLOUD_ESSD"); hostGroup2.setSysDiskCapacity(120); hostGroup2.setVSwitchId("vsw-bp11t4amri1iuj*****"); hostGroupList.add(hostGroup2); CreateClusterV2Request.HostGroup hostGroup3 = new CreateClusterV2Request.HostGroup(); hostGroup3.setHostGroupName("task_group"); hostGroup3.setHostGroupType("TASK"); hostGroup3.setChargeType("PostPaid"); hostGroup3.setNodeCount(2); hostGroup3.setInstanceType("ecs.c5.xlarge"); hostGroup3.setDiskType("CLOUD_EFFICIENCY"); hostGroup3.setDiskCapacity(80); hostGroup3.setDiskCount(4); hostGroup3.setSysDiskType("CLOUD_ESSD"); hostGroup3.setSysDiskCapacity(120); hostGroup3.setVSwitchId("vsw-bp11t4amri1iuj*****"); hostGroupList.add(hostGroup3); request.setHostGroups(hostGroupList); request.setSecurityGroupId("sg-bp13cqwumhn7x*****"); request.setIsOpenPublicIp(true); request.setSecurityGroupName("newgroup"); request.setChargeType("PostPaid"); request.setVpcId("vpc-bp1gjl3y9fezukq*****"); request.setVSwitchId("vsw-bp11t4amri1iuj*****"); request.setNetType("vpc"); request.setUserDefinedEmrEcsRole("AliyunEmrEcsDefaultRole"); request.setHighAvailabilityEnable(true); request.setIoOptimized(true); request.setSshEnable(true); request.setMasterPwd("EMRtest12345!"); request.setDepositType("HALF_MANAGED"); request.setMachineType("ECS"); List<CreateClusterV2Request.Config> configList = new ArrayList<CreateClusterV2Request.Config>(); CreateClusterV2Request.Config config1 = new CreateClusterV2Request.Config(); config1.setServiceName("YARN"); config1.setFileName("yarn-site"); config1.setConfigKey("yarn_nodemanager_heapsize"); config1.setConfigValue("1234"); configList.add(config1); CreateClusterV2Request.Config config2 = new CreateClusterV2Request.Config(); config2.setServiceName("HIVE"); config2.setFileName("hive-site"); config2.setConfigKey("hive.metastore.warehouse.dir"); config2.setConfigValue("/user/hive/warehouse_emr"); configList.add(config2); request.setConfigs(configList); try { CreateClusterV2Response response = client.getAcsResponse(request); System.out.println(new Gson().toJson(response)); } catch (ServerException e) { e.printStackTrace(); } catch (ClientException e) { System.out.println("ErrCode:" + e.getErrCode()); System.out.println("ErrMsg:" + e.getErrMsg()); System.out.println("RequestId:" + e.getRequestId()); } } }
更多配置
如果所需集群有其它的配置项,请参见创建集群,单击,可根据输入的配置项自动生成对应的代码。
说明
配置里有两组信息是和付费类型相关的:
- 当机器组类型为Master和Core时,付费类型以ChargeType或Period为准。
- 当机器组类型为Task时,付费类型以HostGroup.N.ChargeType或HostGroup.N.Period为准。
- 创建包年包月集群参数描述如下。
参数 描述 ChargeType 付费类型,PrePaid为包年包月。 Period 包年包月时间(包月数有:1、2、3、4、5、6、7、8、9、12、24、36)。当ChargeType为PrePaid时,必填。 HostGroup.N.ChargeType 机器组机器付费类型。 HostGroup.N.Period 包年包月时间(包月数有1、2、3、4、5、6、7、8、9、12、24、36)。当HostGroup.n.ChargeType为PrePaid时,必填。 - 创建Gateway集群参数描述如下。
Gateway实例在Gateway集群中创建,需添加HostGroup.N.HostGroupType=GATEWAY的实例定义。
参数 描述 ClusterType 当创建Gateway集群时,取值为GATEWAY。 HostGroup.N.HostGroupType Gateway实例,取值为GATEWAY。 RelatedClusterId 当前是Gateway集群时,其关联的主集群ID。