通过本篇文档,用户可以更好的体验和了解OpenSearch行业算法版推出的定制排序模型功能。
操作步骤
在特征管理中建立如下字段特征:(以system_item表为例,如果需要的特征不在system_item表中,可以先将外部MaxCompute表注册进来)如title字段分别建立原值(custom_title), 分词后生成lookup特征(custom_title_match),分词后统计词数量(custom_title_len),其他字段类似,可根据业务需要增加。以ctr中使用的字段为例:
结合system_internal表中的内置特征,以及上步中创建的字段特征,下面进行特征生成(特征工程):
以目前CTR常用的特征生成为例:可以通过OpenApi(CreateFunctionResource - 创建算法资源)批量注册:
其中ResourceType
选择feature_generator
,Data
填以下内容:(注意每个input features中以custom_
开头的特征需要提前准备好,如缺失请在第一步中添加)
[
{
"input": {
"features": [
{
"type": "user",
"name": "system_raw_q_ultra"
},
{
"type": "item",
"name": "system_item_id"
}
]
},
"generator": "combo",
"output": "comb_q_nid"
},
{
"input": {
"features": [
{
"type": "user",
"name": "system_user_id"
},
{
"type": "item",
"name": "system_item_id"
}
]
},
"generator": "combo",
"output": "comb_uid_nid"
},
{
"input": {
"features": [
{
"type": "user",
"name": "system_user_id"
},
{
"type": "item",
"name": "custom_tags"
}
]
},
"generator": "combo",
"output": "comb_uid_tags"
},
{
"input": {
"features": [
{
"type": "user",
"name": "system_raw_q_ultra"
},
{
"type": "item",
"name": "custom_tags"
}
]
},
"generator": "combo",
"output": "comb_q_tags"
},
{
"input": {
"features": [
{
"type": "user",
"name": "system_exp_time"
}
]
},
"generator": "id",
"output": "exp_time"
},
{
"input": {
"features": [
{
"type": "user",
"name": "system_terms2"
}
]
},
"generator": "id",
"output": "terms2"
},
{
"input": {
"features": [
{
"type": "user",
"name": "system_raw_q_ultra"
}
]
},
"generator": "id",
"output": "raw_q_ultra"
},
{
"input": {
"features": [
{
"type": "user",
"name": "system_user_id"
}
]
},
"generator": "id",
"output": "user_id"
},
{
"input": {
"features": [
{
"type": "item",
"name": "system_item_id"
}
]
},
"generator": "id",
"output": "item_id"
},
{
"input": {
"features": [
{
"type": "item",
"name": "custom_description"
}
]
},
"generator": "id",
"output": "description"
},
{
"input": {
"features": [
{
"type": "item",
"name": "custom_desc_len"
}
]
},
"generator": "id",
"output": "desc_len"
},
{
"input": {
"features": [
{
"type": "item",
"name": "custom_title"
}
]
},
"generator": "id",
"output": "title"
},
{
"input": {
"features": [
{
"type": "item",
"name": "custom_title_len"
}
]
},
"generator": "id",
"output": "title_len"
},
{
"input": {
"features": [
{
"type": "item",
"name": "custom_category"
}
]
},
"generator": "id",
"output": "category"
},
{
"input": {
"features": [
{
"type": "item",
"name": "custom_tags"
}
]
},
"generator": "id",
"output": "tags"
},
{
"input": {
"features": [
{
"type": "item",
"name": "system_all_nid_ctr_30"
}
]
},
"generator": "id",
"output": "all_nid_ctr_30"
},
{
"input": {
"features": [
{
"type": "item",
"name": "system_all_nid_ctr_7"
}
]
},
"generator": "id",
"output": "all_nid_ctr_7"
},
{
"input": {
"features": [
{
"type": "item",
"name": "system_all_nid_ctr_1"
}
]
},
"generator": "id",
"output": "all_nid_ctr_1"
},
{
"input": {
"features": [
{
"type": "item",
"name": "system_all_nid_pv_30"
}
]
},
"generator": "id",
"output": "all_nid_pv_30"
},
{
"input": {
"features": [
{
"type": "item",
"name": "system_all_nid_pv_7"
}
]
},
"generator": "id",
"output": "all_nid_pv_7"
},
{
"input": {
"features": [
{
"type": "item",
"name": "system_all_nid_pv_1"
}
]
},
"generator": "id",
"output": "all_nid_pv_1"
},
{
"input": {
"features": [
{
"type": "item",
"name": "system_all_nid_ipv_30"
}
]
},
"generator": "id",
"output": "all_nid_ipv_30"
},
{
"input": {
"features": [
{
"type": "item",
"name": "system_all_nid_ipv_7"
}
]
},
"generator": "id",
"output": "all_nid_ipv_7"
},
{
"input": {
"features": [
{
"type": "item",
"name": "system_all_nid_ipv_1"
}
]
},
"generator": "id",
"output": "all_nid_ipv_1"
},
{
"input": {
"features": [
{
"role": "map",
"type": "item",
"name": "custom_title_match"
},
{
"role": "key",
"type": "user",
"name": "system_terms2"
}
]
},
"generator": "lookup",
"output": "term_title_match"
},
{
"input": {
"features": [
{
"role": "map",
"type": "item",
"name": "custom_desc_match"
},
{
"role": "key",
"type": "user",
"name": "system_terms2"
}
]
},
"generator": "lookup",
"output": "term_desc_match"
},
{
"input": {
"features": [
{
"role": "map",
"type": "item",
"name": "custom_tags_match"
},
{
"role": "key",
"type": "user",
"name": "system_terms2"
}
]
},
"generator": "lookup",
"output": "term_tags_match"
},
{
"input": {
"features": [
{
"role": "map",
"type": "item",
"name": "system_qterm_match_decay"
},
{
"role": "key",
"type": "user",
"name": "system_terms2"
}
]
},
"generator": "lookup",
"output": "term_os_kw_match"
},
{
"input": {
"features": [
{
"type": "item",
"name": "system_query_cnt"
}
]
},
"generator": "id",
"output": "opensearch_query_cnt"
},
{
"input": {
"features": [
{
"type": "item",
"name": "system_qterm_cnt"
}
]
},
"generator": "id",
"output": "opensearch_qterm_cnt"
},
{
"input": {
"features": [
{
"role": "map",
"type": "item",
"name": "system_query_ctr_decay"
},
{
"role": "key",
"type": "user",
"name": "system_raw_q_ultra"
}
]
},
"generator": "lookup",
"output": "os_q_ctr_decay"
},
{
"input": {
"features": [
{
"role": "map",
"type": "item",
"name": "system_qterm_ctr_decay"
},
{
"role": "key",
"type": "user",
"name": "system_terms2"
}
]
},
"generator": "lookup",
"output": "os_term_ctr_decay"
},
{
"input": {
"features": [
{
"role": "map",
"type": "item",
"name": "system_query_ctr_decay"
},
{
"role": "key",
"type": "user",
"name": "system_raw_q_ultra"
}
]
},
"generator": "lookup",
"output": "os_q_ctr_decay_nokey"
},
{
"input": {
"features": [
{
"role": "map",
"type": "item",
"name": "system_qterm_ctr_decay"
},
{
"role": "key",
"type": "user",
"name": "system_terms2"
}
]
},
"generator": "lookup",
"output": "os_term_ctr_decay_nokey"
},
{
"input": {
"features": [
{
"type": "item",
"name": "system_query_seq_decay"
}
]
},
"generator": "id",
"output": "os_q_seq_decay"
},
{
"input": {
"features": [
{
"type": "item",
"name": "system_qterm_seq_decay"
}
]
},
"generator": "id",
"output": "os_term_seq_decay"
},
{
"input": {
"features": [
{
"role": "query",
"type": "user",
"name": "system_terms2"
},
{
"role": "title",
"type": "item",
"name": "system_qterm_seq_decay"
}
],
"method": "query_common_ratio"
},
"generator": "overlap",
"output": "os_qterm_q_common_ratio"
},
{
"input": {
"features": [
{
"role": "query",
"type": "user",
"name": "system_terms2"
},
{
"role": "title",
"type": "item",
"name": "system_qterm_seq_decay"
}
],
"method": "title_common_ratio"
},
"generator": "overlap",
"output": "os_qterm_title_common_ratio"
},
{
"input": {
"features": [
{
"role": "query",
"type": "user",
"name": "system_terms2"
},
{
"role": "title",
"type": "item",
"name": "custom_title"
}
],
"method": "query_common_ratio"
},
"generator": "overlap",
"output": "title_q_common_ratio"
},
{
"input": {
"features": [
{
"role": "query",
"type": "user",
"name": "system_terms2"
},
{
"role": "title",
"type": "item",
"name": "custom_title"
}
],
"method": "title_common_ratio"
},
"generator": "overlap",
"output": "title_title_common_ratio"
},
{
"input": {
"features": [
{
"role": "query",
"type": "user",
"name": "system_terms2"
},
{
"role": "title",
"type": "item",
"name": "custom_description"
}
],
"method": "query_common_ratio"
},
"generator": "overlap",
"output": "desc_q_common_ratio"
},
{
"input": {
"features": [
{
"role": "query",
"type": "user",
"name": "system_terms2"
},
{
"role": "title",
"type": "item",
"name": "custom_description"
}
],
"method": "title_common_ratio"
},
"generator": "overlap",
"output": "desc_title_common_ratio"
},
{
"input": {
"features": [
{
"type": "user",
"name": "system_term_seq_length"
}
],
"dimension": 1
},
"generator": "raw",
"output": "term_seq_length"
}
]
创建完成后,可以在对应页面上进行编辑:
至此特征准备完成,具体如何使用特征需要在模型代码中进行指定。
创建模型描述:
以快速入门为基础,修改需要使用的特征列表。一般用embedding_colums。
在自定义排序模型中指定需要使用的特征描述和模型描述
文档内容是否对您有帮助?