Py学习  »  Elasticsearch

在ElasticSearch中,有没有一种方法可以在顶部找到最短(最近)的单词?

somethingyouwant • 5 年前 • 780 次点击  

我的索引中有这样的词:“kem,kem i,kemah,kemer,kemerburgaz,kemang,kembs,kemnay,kempley,kempsey,kemerovo”。

当我搜索“kem”时,我希望“kem i”排在最前面,因为它是最接近的词。(kem+i=kem i)。但这不是我想要的。

索引:

{
"settings": {
    "number_of_shards": 1,
    "analysis": {
    "filter": {
        "autocomplete_filter": {
        "type": "edge_ngram",
        "min_gram": 2,
        "max_gram": 15
        }
    },
    "analyzer": {
        "autocomplete": { 
        "type": "custom",
        "tokenizer": "standard",
        "filter": [
            "lowercase",
            "autocomplete_filter"
        ]
        }
    }
    }
},
"mappings": {
    "_doc": {
    "properties": {
        "name": {
            "fields": {
                "keyword": {
                    "type": "keyword"
                }
            },
        "type": "text",
        "similarity": "classic",
        "analyzer": "autocomplete", 
        "search_analyzer": "standard" 
        },
        "id": {
            "type": "keyword"
        },
        "slug": {
            "type": "keyword"
        },
        "type": {
            "type": "keyword"
        }
    }
    }
}
}

查询:

{
"from" : 0, "size" : 10,
"query": {
    "bool": {
    "must": [
        {
        "match": {
            "name": "Kem"
        }
        }
    ],
    "should": [
        {
        "term": {
            "name.keyword": {
            "value": "Kem"            
            }
        }
        }
    ]
    }
}
}
'

结果:

{
"took" : 6,
"timed_out" : false,
"_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
},
"hits" : {
    "total" : 143,
    "max_score" : 20.795834,
    "hits" : [
    {
        "_index" : "destinations",
        "_type" : "_doc",
        "_id" : "lPL8Y2YBqxTX_xwrZlGc",
        "_score" : 20.795834,
        "_source" : {
        "id" : "c6317201",
        "name" : "Kem",
        "slug" : "yurtdisi/karelya-cumhuriyeti/kem"
        }
    },
    {
        "_index" : "destinations",
        "_type" : "_doc",
        "_id" : "se78Y2YBqxTX_xwrVFIU",
        "_score" : 8.61574,
        "_source" : {
        "id" : "c121023",
        "name" : "Kemah",
        "slug" : "yurtdisi/houston-ve-civari/kemah"
        }
    },
    {
        "_index" : "destinations",
        "_type" : "_doc",
        "_id" : "ze78Y2YBqxTX_xwrVFo5",
        "_score" : 8.61574,
        "_source" : {
        "id" : "c1783",
        "name" : "Kemerovo",
        "slug" : "yurtdisi/kemerovo-oblasti/kemerovo"
        }
    },
    {
        "_index" : "destinations",
        "_type" : "_doc",
        "_id" : "xe78Y2YBqxTX_xwrVFs9",
        "_score" : 8.61574,
        "_source" : {
        "id" : "c1786",
        "name" : "Kemi",
        "slug" : "yurtdisi/rovaniemi/kemi"
        }
    },
    {
        "_index" : "destinations",
        "_type" : "_doc",
        "_id" : "Tu78Y2YBqxTX_xwrVG-X",
        "_score" : 8.61574,
        "_source" : {
        "id" : "c1900",
        "name" : "Kempsey",
        "slug" : "yurtdisi/new-south-wales/kempsey"
        }
    },
    {
        "_index" : "destinations",
        "_type" : "_doc",
        "_id" : "Bu78Y2YBqxTX_xwrVILt",
        "_score" : 8.61574,
        "_source" : {
        "id" : "c3000010982",
        "name" : "Kempley",
        "slug" : "yurtdisi/dymock/kempley"
        }
    },
    {
        "_index" : "destinations",
        "_type" : "_doc",
        "_id" : "B-78Y2YBqxTX_xwrVILt",
        "_score" : 8.61574,
        "_source" : {
        "id" : "c3000010983",
        "name" : "Kemnay",
        "slug" : "yurtdisi/inverurie/kemnay"
        }
    },
    {
        "_index" : "destinations",
        "_type" : "_doc",
        "_id" : "CO78Y2YBqxTX_xwrVIb_",
        "_score" : 8.61574,
        "_source" : {
        "id" : "c3000013079",
        "name" : "Kemerburgaz",
        "slug" : "eyup/kemerburgaz"
        }
    },
    {
        "_index" : "destinations",
        "_type" : "_doc",
        "_id" : "-fL8Y2YBqxTX_xwrZQxf",
        "_score" : 8.61574,
        "_source" : {
        "id" : "c6190744",
        "name" : "Kembs",
        "slug" : "yurtdisi/haut-rhin-bolge/kembs"
        }
    },
    {
        "_index" : "destinations",
        "_type" : "_doc",
        "_id" : "xfL8Y2YBqxTX_xwrZSG-",
        "_score" : 8.61574,
        "_source" : {
        "id" : "c6216986",
        "name" : "Kemang",
        "slug" : "yurtdisi/cakarta/kemang"
        }
    }
    ]
}
}

现在他们的得分是一样的,因为我猜每个人都有“kem”。但是如果我做“匹配”或“匹配短语”,结果是一样的。

Python社区是高质量的Python/Django开发社区
本文地址:http://www.python88.com/topic/30450
 
780 次点击  
文章 [ 1 ]  |  最新文章 5 年前
Tim
Reply   •   1 楼
Tim    5 年前

在您的示例中,您似乎希望按长度排序结果。你可以用脚本来完成。

POST your_index/_doc/_search
{
  "from": 0,
  "size": 10,
  "query": {
    "bool": {
      "must": [
        {
          "match": {
            "name": "Kem"
          }
        }
      ],
      "should": [
        {
          "term": {
            "name.keyword": {
              "value": "Kem"
            }
          }
        }
      ]
    }
  },
  "sort": [
    {
      "_score": {"order": "desc"}
    },
    {
      "_script": {
        "script": "doc['name.keyword'].value.length()",
        "type": "number",
        "order": "asc"
      }
    },
    {
      "name.keyword": {"order": "asc"}
    }
  ]
}