全球疫情分析
本文介绍使用Jupyter Lab进行全球疫情数据分析的操作方法。
说明
疫情数据来源暂时无法直接模拟,该示例仅供您在数据科学分析场景参考使用。
前提条件
已创建RAM用户并完成授权。具体操作,请参见创建RAM用户并完成授权。
已配置环境变量ALIBABA_CLOUD_ACCESS_KEY_ID和ALIBABA_CLOUD_ACCESS_KEY_SECRET。具体操作,请参见配置环境变量。
重要阿里云账号的AccessKey拥有所有API的访问权限,建议您使用RAM用户的AccessKey进行API访问或日常运维。
强烈建议不要把AccessKey ID和AccessKey Secret保存到工程代码里,否则可能导致AccessKey泄露,威胁您账号下所有资源的安全。
步骤一:初始化日志服务Client
LogClient是日志服务的Python客户端,用于管理Project、Logstore等日志服务资源。使用Python SDK发起日志服务请求,您需要初始化一个Client实例。示例代码如下所示:
# Setup basic client
# !pip install -U matplotlib
import time
import os
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import aliyun.log as sls
# 日志服务的服务入口。
endpoint = "cn-beijing.log.aliyuncs.com"
# 本示例从环境变量中获取AccessKey ID和AccessKey Secret。
accessId = os.environ.get('ALIBABA_CLOUD_ACCESS_KEY_ID', '')
accessKey = os.environ.get('ALIBABA_CLOUD_ACCESS_KEY_SECRET', '')
# Project名称。
project = "YOUR_SLS_PROJECT"
# MetricStore名称。
metricstore = "YOUR_SLS_METRICSTORE"
# 保存巡检结果的Logstore。
sink_logstore = 'YOUR_SLS_LOGSTORE_FOR_RESULTS_WRITE'
# 设置任务名称。
task_name = "YOUR_TASK_NAME"
# 创建LogClient。
client = sls.LogClient(endpoint, accessId, accessKey)
步骤二:获取疫情数据
示例代码如下:
import time
import json
import pandas as pd
import matplotlib.pyplot as plt
project = "YOUR_SLS_PROJECT"
logstore = "YOUR_SLS_LOGSTORE"
def query_logstore(query, stime, etime):
datas = []
for i in client.get_log_all(project, logstore, stime, etime, query=query):
for log in i.logs:
datas.append(log.get_contents())
# Convert datas to pandas dataframe
df_ret = pd.DataFrame(datas)
return df_ret
步骤三:查询和分析疫情数据
全球疫情热力图
示例代码如下:
# 安装依赖 # !pip3 install folium --user etime = int(time.time()) stime = etime - 86400 * 7 query = ''' Type : "Country/Region Cases" or Type : "Province/State Cases" | select LatLng[1] as lat, LatLng[2] as lng, Confirmed from (select split(LatLng, ',') as LatLng, Confirmed from log l right join (select max(version) as version from log) r on l.version = r.version where LatLng is not null and LatLng like '%,%' limit 1000)''' df_ncp_global = query_logstore(query, stime, etime) df_ncp_global['lat'] = df_ncp_global['lat'].astype(float) df_ncp_global['lng'] = df_ncp_global['lng'].astype(float) df_ncp_global['Confirmed'] = df_ncp_global['Confirmed'].astype(float) data = df_ncp_global.values.tolist() from folium.plugins import HeatMap city_map = folium.Map(location=(35.4,-100.56), zoom_start=4) city_map.add_child(HeatMap(data=data)) city_map
全球累计确诊数按国家和地区排行
示例代码如下:
stime = etime - 86400 * 7 query = ''' Type : "Country/Region Cases" | select min(num) as num, case when num <= 10 then "Country/Region" else 'Other' end as "Country/Region", sum(Confirmed) as "Total Confirmed" from ( select row_number() over () AS num, "Country/Region", Confirmed from (select "Country/Region", Confirmed from log l right join (select max(version) as version from log) r on l.version = r.version order by Confirmed desc)) group by "Country/Region" order by num ''' df_ncp_country = query_logstore(query, stime, etime) df_ncp_country['Total Confirmed'] = df_ncp_country['Total Confirmed'].astype(int) plt.barh(df_ncp_country['Country/Region'],df_ncp_country['Total Confirmed']) df_ncp_country
全球疫情态势
示例代码如下:
# get data lastest version etime = int(time.time()) stime = etime - 86400 * 7 query = ''' * and type: "Global Cases" | select max(version) as lastest_version ''' df_ncp_data_version = query_logstore(query, stime, etime) lastest_version = df_ncp_data_version.values[0][0] print(lastest_version)
全球累计确诊数趋势
示例代码如下:
etime = int(time.time()) stime = etime - 86400 * 7 query = f'''* and version: "{lastest_version}" and type: "Global Cases" ''' df_ncp_global = query_logstore(query, stime, etime) datas = [] def g(x): ret = json.loads(x) for k,v in ret.items(): datas.append({'ds' : k, 'month' : ''.join(k.split('-')[0:2]), 'value' : v}) df_ncp_global['Confirmed Trend'].apply(g) df_global_trend = pd.DataFrame(datas) df_global_month_trend = df_global_trend.groupby(['month']).agg({'value': max}).reset_index() %matplotlib inline import matplotlib.pyplot as plt figure = plt.figure(figsize=(18,5),dpi=98) plt.title(u"NCP Global Cases - Confirmed Trend", fontproperties='SimHei',fontsize = 15) plt.plot(df_global_month_trend['month'], df_global_month_trend['value'], label='value', color = 'r') plt.legend(loc='best')
国家累计确诊数趋势
示例代码如下:
etime = int(time.time()) stime = etime - 86400 * 7 country_region = 'China' query = f'''* and version: "{lastest_version}" and type: "Country/Region Cases" ''' df_ncp_country = query_logstore(query, stime, etime) datas = [] def g(x): ret = json.loads(x['Confirmed Trend']) for k,v in ret.items(): datas.append({'Country/Region' : x['Country/Region'] ,'ds' : k, 'month' : ''.join(k.split('-')[0:2]), 'value' : v}) df_ncp_country[['Confirmed Trend','Country/Region']].apply(g,axis=1) df_country_moth_trend = pd.DataFrame(datas) df_country_moth_trend = df_country_moth_trend.groupby(['Country/Region','month']).agg({'value' : 'max'}).reset_index() df_country_trend_spec = df_country_moth_trend[df_country_moth_trend['Country/Region']==country_region] %matplotlib inline import matplotlib.pyplot as plt figure = plt.figure(figsize=(18,5),dpi=98) plt.title(f"NCP {country_region} Cases - Confirmed Trend", fontproperties='SimHei',fontsize = 15) plt.plot(df_country_trend_spec['month'], df_country_trend_spec['value'], label='value', color = 'r') plt.legend(loc='best')
确诊患者行程信息
示例代码如下:
etime = int(time.time()) stime = etime - 86400 * 7 query = ''' type: travel_detail | select start_time as "开始时间", end_time as "结束时间", travel_type as "出行类型", travel_no as "车次/车牌/航班/场所名", travel_sub_no as "车厢", travel_detail as "出行描述", start_pos as "出发站", end_pos as "到达站", case when strpos(url, 'https://') = 1 then substr(url, 9) when strpos(url, 'http://') = 1 then substr(url, 8) else url end as url, author as "线索来源", '详情' as "新闻" from log l right join (select max(version) as version from log) r on l.version = r.version order by start_time desc limit 50000 ''' df_ncp_trace = query_logstore(query, stime, etime) df_ncp_trace