获取实验详情。
调试
您可以在OpenAPI Explorer中直接运行该接口,免去您计算签名的困扰。运行成功后,OpenAPI Explorer可以自动生成SDK代码示例。
授权信息
下表是API对应的授权信息,可以在RAM权限策略语句的Action
元素中使用,用来给RAM用户或RAM角色授予调用此API的权限。具体说明如下:
- 操作:是指具体的权限点。
- 访问级别:是指每个操作的访问级别,取值为写入(Write)、读取(Read)或列出(List)。
- 资源类型:是指操作中支持授权的资源类型。具体说明如下:
- 对于必选的资源类型,用前面加 * 表示。
- 对于不支持资源级授权的操作,用
全部资源
表示。
- 条件关键字:是指云产品自身定义的条件关键字。
- 关联操作:是指成功执行操作所需要的其他权限。操作者必须同时具备关联操作的权限,操作才能成功。
操作 | 访问级别 | 资源类型 | 条件关键字 | 关联操作 |
---|---|---|---|---|
eflo:GetExperiment | get | *Experiment acs:eflo:{#regionId}:{#accountId}:experiment/{#ExperimentId} |
| 无 |
请求参数
名称 | 类型 | 必填 | 描述 | 示例值 |
---|---|---|---|---|
ExperimentId | long | 是 | 实验 ID | 234 |
ResourceGroupId | string | 否 | Resource Group Id | rg-kdsfjascjfm3 |
返回参数
示例
正常返回示例
JSON
格式
{
"Data": {
"Task": {
"TaskId": 167420,
"CreateTime": 0,
"UpdateTime": 0,
"StartTime": 0,
"EndTime": 0,
"Params": {
"key": {}
},
"Scene": "baseline",
"Status": "success"
},
"Workload": {
"WorkloadId": 13,
"WorkloadName": "test",
"WorkloadDescription": "test",
"WorkloadType": "AI",
"Family": "AI",
"Scene": "NLP-LLM",
"Scope": "common",
"JobKind": "PyTorchJob",
"DefaultCpuPerWorker": 90,
"DefaultGpuPerWorker": 8,
"DefaultMemoryPerWorker": 500,
"DefaultShareMemory": 500,
"ParamSettings": [
{
"ParamName": "ITERATION",
"ParamDesc": "迭代数",
"ParamValue": 100,
"DefaultValue": 100,
"ParamRegex": "[0-9]+",
"ParamType": "number"
}
],
"StaticConfig": {
"FrameWork": "pyTorch",
"SoftwareStack": "python",
"Os": "linux",
"Parameters": "7B"
},
"VersionId": 1
},
"Resource": {
"ResourceId": 189,
"ResourceName": "ecs.r8y.4xlarge",
"CpuCoreLimit": 90,
"GpuLimit": 8,
"MemoryLimit": 500,
"MaxGpu": 8,
"MaxCpuCore": 90,
"MaxMemory": 500,
"UserAccessParam": {
"AccessId": "dev",
"AccessKey": "test",
"WorkspaceId": 123434542498,
"Endpoint": "test"
},
"MachineType": {
"MemoryInfo": "32x 64GB DDR4 3200 Memory",
"Type": "Public",
"BondNum": 5,
"NodeCount": 1,
"CpuInfo": "2x Intel Icelake 8369B 32C CPU",
"NetworkInfo": "1x 100Gbps DP NIC for VPC \\n 4x 100Gbps DP RoCE NIC",
"GpuInfo": "8x NVIDIA SXM4 80GB A100 GPU",
"DiskInfo": "2x 480GB SATA SSD \\n 4x 3.84TB NVMe SSD",
"NetworkMode": 2,
"Name": "efg1.nvga1n"
},
"ResourceNodes": [
{
"NodeName": "InputCheck"
}
]
},
"ExperimentId": 1726882991828689000,
"CreateTime": 0,
"UpdateTime": 0,
"ExperimentName": "test",
"ExperimentType": "AI",
"ResourceName": "cifnews-guoyuan",
"WorkloadName": "test",
"StartTime": "2024-11-29 02:16:35",
"EndTime": "2024-11-29 02:26:35",
"Status": "RUNNING",
"Results": {
"ExperimentId": 1748274952976261000,
"Duration": 764,
"SecondsPerIteration": 1000,
"SamplesPerSecond": 10,
"Mfu": 54.2,
"WarningWorker": [
{
"ExperimentId": 9,
"Hostname": "whza008403",
"PodName": "fluxserv-6fc89b45cf-w8wq6",
"GpuNum": 8,
"GpuName": "8x OAM 810 GPU",
"WarningFlag": true,
"WarningMsg": "存在慢节点",
"ErrorFlag": true,
"ErrorMsg": "错误信息",
"Tflops": 14,
"SamplesPerSecond": 15
}
],
"ErrorWorker": [
{
"ExperimentId": 97,
"Hostname": "60.188.98.209",
"PodName": "hzs-forge-sdxl-online-7ff4d86444-pc95h",
"GpuNum": 8,
"GpuName": "8x OAM 810 GPU",
"WarningFlag": false,
"WarningMsg": "无",
"ErrorFlag": true,
"ErrorMsg": "Connection reset",
"Tflops": 12,
"SamplesPerSecond": 23
}
],
"WarningBoundList": [
{
"Iteration": 10,
"Upper": 56,
"Lower": 14
}
],
"TaskIndividualResultList": [
{
"ExperimentId": 48,
"Hostname": "p-jt-waf-app1",
"PodName": "fluxserv-6fc89b45cf-w8wq6",
"GpuNum": 8,
"GpuName": "8x OAM 810 GPU",
"WarningFlag": false,
"WarningMsg": "无",
"ErrorFlag": false,
"ErrorMsg": "无",
"Tflops": 16,
"SamplesPerSecond": 28
}
],
"TaskIndividualResultMap": {
"key": [
{
"ExperimentId": 54,
"Hostname": "p-jt-waf-app1\n",
"PodName": "fluxserv-6fc89b45cf-w8wq6",
"GpuNum": 8,
"GpuName": "8x OAM 810 GPU\n",
"WarningFlag": false,
"WarningMsg": "无",
"ErrorFlag": false,
"ErrorMsg": "无",
"Tflops": 45,
"SamplesPerSecond": 23
}
]
}
},
"SetParams": {
"key": {}
},
"GetParams": {
"key": {}
},
"EnvParams": {
"CpuPerWorker": 90,
"GpuPerWorker": 8,
"MemoryPerWorker": 500,
"ShareMemory": 500,
"WorkerNum": 1,
"CudaVersion": "1.0.0",
"NCCLVersion": "1.0.0",
"GpuDriverVersion": "1.0.0",
"PyTorchVersion": "1.0.0",
"ResourceNodes": [
{
"NodeName": "p-jt-waf-app1",
"TotalCPU": 90,
"TotalGPU": 8,
"TotalMemory": 500,
"RequestCPU": 90,
"RequestGPU": 8,
"RequestMemory": 500
}
],
"ExtendParam": {
"key": {}
}
}
},
"RequestId": "E67E2E4C-2B47-5C55-AA17-1D771E070AEF",
"AccessDeniedDetail": {},
"TotalCount": 0
}
错误码
HTTP status code | 错误码 | 错误信息 | 描述 |
---|---|---|---|
400 | NotFound | not found. | 数据不存在 |
访问错误中心查看更多错误码。