Py学习  »  Elasticsearch

Elasticsearch按自定义项权重排序

Niko Gamulin • 4 年前 • 838 次点击  

我已经存储了包含status属性的文档。我想按状态优先级(而不是按字母顺序)对文档进行排序。我按照前面的答案编写了以下函数,但仍然不能按预期工作;文档按状态名称(按字母顺序)排序:

function getESSortingByStatusQuery(query, order) {
        let statusOrder = ['BLUE', 'RED', 'BLACK', 'YELLOW', 'GREEN'];
        if(order == 'desc'){
            statusOrder.reverse();
        }
        const functions = statusOrder.map((item) => {
            const idx = statusOrder.indexOf(item);
            return {filter: {match: {statusColor: item}},
                weight: (idx + 1) * 50}
        });
        const queryModified = {
            "function_score": {
                "query": {"match_all": {}}, // this is for testing purposes and should be replaced with original query
                "boost": "5",
                "functions": functions,
                "score_mode": "multiply",
                "boost_mode": "replace"
            }
        }
        return queryModified;
    }

如果有人建议根据属性的预定义优先级(在本例中是status)对项目进行排序,我将不胜感激。

Python社区是高质量的Python/Django开发社区
本文地址:http://www.python88.com/topic/51223
 
838 次点击  
文章 [ 2 ]  |  最新文章 4 年前
Wijayanga Wijekoon
Reply   •   1 楼
Wijayanga Wijekoon    4 年前
Here's the code sample of sorting result. I think this will helps you. If you don't want to get entire documents as result you can filter results using includes. 

GET testindex/_search
{
  "_source": {
"includes": [
  "filed1"
]
},
  "aggs": {
    "emp_figures": {
      "terms": {
        "field": "status"
      }
    }
}
}

This is the sample result you should retrieve 
{
  "took": 11,
  "timed_out": false,
  "_shards": {
    "total": 2,
    "successful": 2,
    "failed": 0
  },
  "hits": {
    "total": 84968,
    "max_score": 1,
    "hits": [
      {
        "_index": "test",
        "_type": "type",
        "_id": "0001",
        "_score": 1,
        "_source": {
          "filed1": "color1,
          }
        },
         {
        "_index": "test",
        "_type": "type",
        "_id": "0002",
        "_score": 1,
        "_source": {
          "filed1": "color2,
          }
        }
      }
    }
}
Kamal
Reply   •   2 楼
Kamal    4 年前

下面是一个样本 custom sort script 我想这就是你要找的。我已经添加了示例映射、文档、查询和响应。

映射:

PUT color_index
{
  "mappings": {
    "properties": {
      "color":{
        "type": "keyword"
      },
      "product":{
        "type": "text"
      }
    }
  }
}

示例文档:

POST color_index/_doc/1
{
  "color": "BLUE",
  "product": "adidas and nike"
}

POST color_index/_doc/2
{
  "color": "GREEN",
  "product": "adidas and nike and puma"
}

POST color_index/_doc/3
{
  "color": "GREEN",
  "product": "adidas and nike"
}

POST color_index/_doc/4
{
  "color": "RED",
  "product": "nike"
}

POST color_index/_doc/5
{
  "color": "RED",
  "product": "adidas and nike"
}

查询:

POST color_index/_search
{
  "query": {
    "bool": {
      "must": [
        {
          "query_string": {
            "default_field": "*",
            "query": "adidas OR nike"
          }
        }
      ]
    }
  },
  "sort": [
    { "_score": { "order": "desc"} },          <---- First sort by score
    { "_script": {                             <---- Second sort by Colors
            "type": "number",
            "script": {
                "lang": "painless",
                "source": "if(params.scores.containsKey(doc['color'].value)) { return params.scores[doc['color'].value];} return 100000;",
                "params": {
                    "scores": {
                        "BLUE": 0,
                        "RED": 1,
                        "BLACK": 2,
                        "YELLOW": 3,
                        "GREEN": 4
                    }
                }
            },
            "order": "asc"
        }

    }
  ]
}

首先,它将返回按分数排序的文档,然后将第二个排序逻辑应用于该结果。

对于第二个排序,即使用脚本排序,请注意我是如何将数值添加到 scores 章节。您需要相应地构造查询。

其工作原理的逻辑是 source 我认为可以自我解释的部分 doc['color'].值 因为这是我应用自定义排序逻辑的字段。

回应:

{
  "took" : 5,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 5,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [
      {
        "_index" : "color_index",
        "_type" : "_doc",
        "_id" : "1",
        "_score" : 0.5159407,
        "_source" : {
          "color" : "BLUE",
          "product" : "adidas and nike"
        },
        "sort" : [
          0.5159407,                     <--- This value is score(desc by nature)
          0.0                            <--- This value comes from script sort as its BLUE and I've used value 0 in the script which is in 'asc' order
        ]
      },
      {
        "_index" : "color_index",
        "_type" : "_doc",
        "_id" : "5",
        "_score" : 0.5159407,
        "_source" : {
          "color" : "RED",
          "product" : "adidas and nike"
        },
        "sort" : [
          0.5159407,
          1.0
        ]
      },
      {
        "_index" : "color_index",
        "_type" : "_doc",
        "_id" : "3",
        "_score" : 0.5159407,
        "_source" : {
          "color" : "GREEN",
          "product" : "adidas and nike"
        },
        "sort" : [
          0.5159407,
          4.0
        ]
      },
      {
        "_index" : "color_index",
        "_type" : "_doc",
        "_id" : "2",
        "_score" : 0.40538198,
        "_source" : {
          "color" : "GREEN",
          "product" : "adidas and nike and puma"
        },
        "sort" : [
          0.40538198,
          4.0
        ]
      },
      {
        "_index" : "color_index",
        "_type" : "_doc",
        "_id" : "4",
        "_score" : 0.10189847,
        "_source" : {
          "color" : "RED",
          "product" : "nike"
        },
        "sort" : [
          0.10189847,
          1.0
        ]
      }
    ]
  }
}

注意前三个文档,它的确切值是 product 但不一样 color 你可以看到当我们第一次按 _score 然后我们按 颜色

如果有帮助,请告诉我!