Elasticsearch---DSL搜索实践

Domain Specific Language
特定领域语言,基于JSON格式的数据查询,查询更灵活,有利于复杂查询

一、普通url路径参数搜索

  • 数据准备
1.建立名字为 shop 的索引
2.手动建立mappings
POST        http://192.168.2.223:9200/shop/_mapping
{
    "properties": {
        "id": {
            "type": "long"
        },
        "age": {
            "type": "integer"
        },
        "username": {
            "type": "keyword"
        },
        "nickname": {
            "type": "text",
            "analyzer": "ik_max_word"
        },
        "money": {
            "type": "float"
        },
        "desc": {
            "type": "text",
            "analyzer": "ik_max_word"
        },
        "sex": {
            "type": "byte"
        },
        "birthday": {
            "type": "date"
        },
        "face": {
            "type": "text",
            "index": false
        }
    }
}
3.添加数据

POST   http://192.168.2.223:9200/shop/_doc/1001
{
"id": 1011,
"age": 31,
"username": "sprder",
"nickname": "皮特帕克",
"money": 180.8,
"desc": "它是一个超级英雄",
"sex": 1,
"birthday": "1989-08-14",
"face": "https://www.zhouhong.com/static/img/index/logo.png"
}
{
"id": 1008,
"age": 19,
"username": "zhoujiang",
"nickname": "周江",
"money": 1056.8,
"desc": "周江大学毕业后,进了阿里",
"sex": 1,
"birthday": "1995-06-14",
"face": "https://www.zhouhong.com/static/img/index/logo.png"
}
{
"id": 1007,
"age": 19,
"username": "msgame",
"nickname": "gamexbox",
"money": 1056.8,
"desc": "明天去进货,最近微软处理很多游戏机,还要买xbox游戏卡带",
"sex": 1,
"birthday": "1985-05-14",
"face": "https://www.zhouhong.com/static/img/index/logo.png"    
}
{
"id": 1003,
"age": 20,
"username": "bigFace",
"nickname": "飞翔的巨鹰",
"money": 66.8,
"desc": "周江和导游坐飞机去海外旅游,去了新马泰和欧洲",
"sex": 1,
"birthday": "1996-01-14",
"face": "https://www.zhouhong.com/static/img/index/logo.png"    
}
{
"id": 1002,
"age": 19,
"username": "zhouhong",
"nickname": "周红",
"money": 77.8,
"desc": "今天上下班都很堵,车流量很大",
"sex": 1,
"birthday": "1993-01-24",
"face": "https://www.zhouhong.com/static/img/index/logo.png"    
}
{
 "id": 1012,
"age": 31,
"username": "super hero",
"nickname": "super hero",
"money": 188.8,
"desc": "BatMan, GreenArrow, SpiderMan, IronMan... are all Super Hero",
"sex": 1,
"birthday": "1980-08-14",
"face": "https://www.zhouhong.com/static/img/index/logo.png"   
}
{
"id": 1010,
"age": 30,
"username": "tata",
"nickname": "隔壁老王",
"money": 100.8,
"desc": "隔壁老外去国外出差,带给我很多好吃的",
"sex": 1,
"birthday": "1988-07-14",
"face": "https://www.zhouhong.com/static/img/index/logo.png"    
}
{
 "id": 1009,
"age": 22,
"username": "shaonian",
"nickname": "骚年轮",
"money": 96.8,
"desc": "骚年在大学毕业后,考研究生去了",
"sex": 1,
"birthday": "1998-07-14",
"face": "https://www.zhouhong.com/static/img/index/logo.png"   
}
{
"id": 1006,
"age": 19,
"username": "zhouhong",
"nickname": "我叫周红",
"money": 156.8,
"desc": "我叫周红,今年20岁,是一名毕业生,我在琦䯲星球做演讲",
"sex": 1,
"birthday": "1993-04-14",
"face": "https://www.zhouhong.com/static/img/index/logo.png"    
}
{
"id": 1005,
"age": 25,
"username": "gotoplay",
"nickname": "ps游戏机",
"money": 155.8,
"desc": "今年生日,女友送了我一台play station游戏机,非常好玩,非常不错",
"sex": 1,
"birthday": "1989-03-14",
"face": "https://www.zhouhong.com/static/img/index/logo.png"    
}
{
 "id": 1004,
"age": 22,
"username": "flyfish",
"nickname": "水中鱼",
"money": 55.8,
"desc": "昨天周红在学校的池塘里,看到有很多鱼在游泳",
"sex": 0,
"birthday": "1988-02-14",
"face": "https://www.zhouhong.com/static/img/index/logo.png"   
}
{
 "id": 1001,
"age": 18,
"username": "zhoujiang",
"nickname": "周江",
"money": 88.8,
"desc": "周江在大学学习java和前端",
"sex": 0,
"birthday": "1992-12-24",
"face": "https://www.zhouhong.com/static/img/index/logo.png"   
}
 4、普通检索:
  • http://192.168.2.223:9200/shop/_search?q=desc:周红&q=age:20

{
    "took": 8,
    "timed_out": false,
    "_shards": {
        "total": 3,
        "successful": 3,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": {
            "value": 1,
            "relation": "eq"
        },
        "max_score": 1,
        "hits": [
            {
                "_index": "shop",
                "_type": "_doc",
                "_id": "1003",
                "_score": 1,
                "_source": {
                    "id": 1003,
                    "age": 20,
                    "username": "bigFace",
                    "nickname": "飞翔的巨鹰",
                    "money": 66.8,
                    "desc": "周江和导游坐飞机去海外旅游,去了新马泰和欧洲",
                    "sex": 1,
                    "birthday": "1996-01-14",
                    "face": "https://www.zhouhong.com/static/img/index/logo.png"
                }
            }
        ]
    }
}

二、DSL搜索

{
	"query": {
		"match_all": {}
	}
}

{
	"query": {
		"match_all": {}
	},
	"_source": ["id","username","age"]
}

{
	"query": {
		"match_all": {}
	},
	"_source": ["id","username","age"],
	"from": 0,
	"size": 5
}

{
	"query": {
		"match_phrase": {
			"desc": {
				"query": "今天 车流量",
				"slop": 100
			}
		}
	}
}

{
	"query": {
		"term": {
			"desc": "学习"
		}
	}
}
  •  terms 对多个关键字查询

{
	"query": {
		"terms": {
			"desc": ["学习","周红","周江"]
		}
	}
}

{
	"query": {
		"match": {
			"desc": "周红"
		}
	},
	"_source": ["id","username","age"]
}

{
	"query": {
		"match": {
			"desc": {
				"query": "周红",
				"operator": "and"
			}
		}
	},
	"_source": ["id","username","age"]
}
    • POST http://192.168.2.223:9200/shop/_doc/_search
    • ​minimum_should_match: 最低匹配精度,至少有[分词后的词语个数]x百分百,得出一个数据值取整。举个例子:当前属性设置为70,若一个用户查询检索内容分词后有10个词语,那么匹配度按照 10x70%=7,则desc中至少需要有7个词语匹配,就展示;若分词后有8个,则 8x70%=5.6,则desc中至少需要有5个词语匹配,就展示。
    • minimum_should_match 也能设置具体的数字,表示拆分出来的词在一个字段中个数

{
    "query": {
        "match": {
            "desc": {
                "query": "女友生日送我好玩的xbox游戏机",
                "minimum_should_match": "60%"
            }
        }
    }
} 

{
	"query": {
		"ids": {
			"type": "_doc",
			"values": ["1001","1005","1006"]
		}
	},
	"_source": ["id","username","desc"]
}
  • multi_match 对多个字段进行检索
    • POST http://192.168.2.223:9200/shop/_doc/_search ^10 表示权重,权重,为某个字段设置权重,权重越高,文档相关性得分就越高。通畅来说搜索商品名称要比商品简介的权重更高。

{
    "query": {
        "multi_match": {
            "query": "游戏",
            "fields": [
            	"desc^10","nickname"
            ]
        }
    }
}

{
    "query": {
        "bool": {
            "must": [
            	{
            		"multi_match": {
            			"query": "游戏",
            			"fields": ["desc","nickname"]
            		}
            	},
            	{
            		"term": {
            			"age": "19"
            		}
            	}
            ]
        }
    }
}
  • post_filter 过滤器
    • POST http://192.168.2.223:9200/shop/_doc/_search
    • 对搜索出来的结果进行数据过滤。不会到es库里去搜,不会去计算文档的相关度分数,所以过滤的性能会比较高,过滤器可以和全文搜索结合在一起使用。
  • post_filter元素是一个顶层元素,只会对搜索结果进行过滤。不会计算数据的匹配度相关性分数,不会根据分数去排序,query则相反,会计算分数,也会按照分数去排序。
  • 使用场景:
        • ​query:根据用户搜索条件检索匹配记录
        • post_filter:用于查询后,对结果数据的筛选
实操:查询账户金额大于80元,小于等于155.8元的用户
gte:大于等于
lte:小于等于
gt:大于
lt:小于

{
	"query": {
		"match": {
			"sex": "1"
		}
	},
	"post_filter": {
		"range": {
			"money": {
				"gte": 60,
				"lte": 155.8
			}
		}
	}
}

{
	"query": {
		"match": {
			"sex": "1"
		}
	},
	"sort": [
		{
			"money": "asc"
		},
		{
			"age": "asc"
		}	
	]
}
    • 对文本排序
    • 需要对排序字段加一个附加属性,类型选择为keyword

1.创建索引
POST        /shop2/_mapping
{
    "properties": {
        "id": {
            "type": "long"
        },
        "nickname": {
            "type": "text",
            "analyzer": "ik_max_word",
            "fields": {
                "keyword": {
                    "type": "keyword"
                }
            }
        }
    }
}
2.插入数据
POST         /shop2/_doc
{
    "id": 1001,
    "nickname": "美丽的风景"
}
{
    "id": 1002,
    "nickname": "漂亮的小哥哥"
}
{
    "id": 1003,
    "nickname": "飞翔的巨鹰"
}
{
    "id": 1004,
    "nickname": "完美的天空"
}
{
    "id": 1005,
    "nickname": "广阔的海域"
}
3.排序     POST    http://192.168.2.223:9200/shop2/_doc/_search
{
    "sort": [
        {
            "nickname.keyword": "desc"
        }
    ]
}

{
	"query": {
		"exists": {
			"field": "desc"
		}
	}
}

{
    "query": {
        "match": {
            "desc": "周红"
        }
    },
    "highlight": {
    	"pre_tags": ["<span>"],
    	"post_tags": ["</span>"],
    	"fields": {
    		"desc": {}
    	}
    }
}
结果:默认为em标签,上面设置为自定义的<span>标签,对页面 em/span 标签做一个颜色设置就可以实现高亮显示了。
{
    "took": 110,
    "timed_out": false,
    "_shards": {
        "total": 3,
        "successful": 3,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": {
            "value": 2,
            "relation": "eq"
        },
        "max_score": 1.1329247,
        "hits": [
            {
                "_index": "shop",
                "_type": "_doc",
                "_id": "1004",
                "_score": 1.1329247,
                "_source": {
                    "id": 1004,
                    "age": 22,
                    "username": "flyfish",
                    "nickname": "水中鱼",
                    "money": 55.8,
                    "desc": "昨天周红在学校的池塘里,看到有很多鱼在游泳",
                    "sex": 0,
                    "birthday": "1988-02-14",
                    "face": "https://www.zhouhong.com/static/img/index/logo.png"
                },
                "highlight": {
                    "desc": [
                        "昨天<em>周红</em>在学校的池塘里,看到有很多鱼在游泳"
                    ]
                }
            },
            {
                "_index": "shop",
                "_type": "_doc",
                "_id": "1006",
                "_score": 0.9585575,
                "_source": {
                    "id": 1006,
                    "age": 19,
                    "username": "zhouhong",
                    "nickname": "我叫周红",
                    "money": 156.8,
                    "desc": "我叫周红,今年20岁,是一名毕业生,我在琦䯲星球做演讲",
                    "sex": 1,
                    "birthday": "1993-04-14",
                    "face": "https://www.zhouhong.com/static/img/index/logo.png"
                },
                "highlight": {
                    "desc": [
                        "我叫<em>周红</em>,今年20岁,是一名毕业生,我在琦䯲星球做演讲"
                    ]
                }
            }
        ]
    }
}

三、深度分页与批量操作

  • 1、深度分页
    • 之前的一个分页操作可以实现,但是会存在一个问题,如下面的一个查询就会出现问题

{
    "query": {
        "match_all": {}
    }
    "from": 9999,
    "size": 10
}

 查询从9999开始的10条数据(9999---100009),Elasticsearch会做一个判断当查询深度到9999的时候会限制查询。Elasticsearch从shard上拿取数据的过程:如下架构模式,它会从每个shard上面拿取10009条数据然后对这30027条数据进行排序,然后拿到最优的十条数据

 

我们在获取第9999条到10009条数据的时候,其实每个分片都会拿到10009条数据,然后集合在一起,总共是10009*3=30027条数据,针对30027数据再次做排序处理,最终会获取最后10条数据。而不是从每个shard上拿取十条数据。
如此一来,搜索得太深,就会造成性能问题,会耗费内存和占用cpu。而且es为了性能,他不支持超过一万条数据以上的分页查询。那么如何解决深度分页带来的性能呢?其实我们应该避免深度分页操作(限制分页页数),比如最多只能提供100页的展示,从第101页开始就没了,毕竟用户也不会搜的那么深,我们平时搜索淘宝或者百度,一般也就看个10来页就顶多了。这样对于内存的消耗和CPU的占用是非常大的。

{
	"index.max_result_window": 100000
}
这样就可以最大分页100000分页数了。
  • 2、Scroll 滚动搜索(可以做导入操作)
scroll滚动查询可以用来对Elasticsearch 有效的进行大批量的文档查询,而且又不用付出深度分页那种代价。
第一次查询指定过期时间、每次查询数据条数,查询出来后会分配一个scroll_id,拿着这个scroll_id 再进行第 二次查询。

一次性查询1万+数据,往往会造成性能影响,因为数据量太多了。这个时候可以使用滚动搜索,也就是 scroll。
滚动搜索可以先查询出一些数据,然后再紧接着依次往下查询。在第一次查询的时候会有一个滚动id,相当于一个锚标记,随后再次滚动搜索会需要上一次搜索的锚标记,根据这个进行下一次的搜索请求。每次搜索都是基于一个历史的数据快照,查询数据的期间,如果有数据变更,那么和搜索是没有关系的,搜索的内容还是快照中的数据。
scroll=1m,相当于是一个session会话时间,搜索保持的上下文时间为1分钟。
  1. 首次查询 POST http://192.168.2.223:9200/shop/_search?scroll=1m
  2. scroll表示生成的scroll_id有效期为1分钟

{
    "query": {
        "match_all": {}
    },
    "sort": ["_doc"],
    "size": 5
}
2.后续查询 POST http://192.168.2.223:9200/_search/scroll scroll_id 表示第一次查询结果返回的id,后续查询一样。scroll 表示这次生成的scroll_id的有效时间为1分钟

{
	"scroll_id": "FGluY2x1ZGVfY29udGV4dF91dWlkDnF1ZXJ5VGhlbkZldGNoAxYzZ3UtNU1IblNCYVVPcEtUTGtvX3J3AAAAAAAAAA4WME1qbFN3SFZRX3laaXpfejBsUTBMURYzZ3UtNU1IblNCYVVPcEtUTGtvX3J3AAAAAAAAAA0WME1qbFN3SFZRX3laaXpfejBsUTBMURYzZ3UtNU1IblNCYVVPcEtUTGtvX3J3AAAAAAAAAA8WME1qbFN3SFZRX3laaXpfejBsUTBMUQ==",
	"scroll": "1m"
}

{
    "ids": ["1001","1003","1008"]
}
  • 4. bulk 批量操作
批量操作的类型
action 必须是以下选项之一:
create:如果文档不存在,那么就创建它。存在会报错。发生异常报错不会影响其他操作。
index:创建一个新文档或者替换一个现有的文档。
update:部分更新一个文档。
delete:删除一个文档。
metadata 中需要指定要操作的文档的_index 、 _type 和 _id,_index 、 _type也可以在url中指定
实操
create新增文档数据,在metadata中指定index以及type

POST    /_bulk
{"create": {"_index": "shop2", "_type": "_doc", "_id": "2001"}}
{"id": "2001", "nickname": "name2001"}
{"create": {"_index": "shop2", "_type": "_doc", "_id": "2002"}}
{"id": "2002", "nickname": "name2002"}
{"create": {"_index": "shop2", "_type": "_doc", "_id": "2003"}}
{"id": "2003", "nickname": "name2003"}
create创建已有id文档,在url中指定index和type

POST    /shop/_doc/_bulk
{"create": {"_id": "2003"}}
{"id": "2003", "nickname": "name2003"}
{"create": {"_id": "2004"}}
{"id": "2004", "nickname": "name2004"}
{"create": {"_id": "2005"}}
{"id": "2005", "nickname": "name2005"}
index创建,已有文档id会被覆盖,不存在的id则新增

POST    /shop/_doc/_bulk
{"index": {"_id": "2004"}}
{"id": "2004", "nickname": "index2004"}
{"index": {"_id": "2007"}}
{"id": "2007", "nickname": "name2007"}
{"index": {"_id": "2008"}}
{"id": "2008", "nickname": "name2008"}
update跟新部分文档数据

POST    /shop/_doc/_bulk
{"update": {"_id": "2004"}}
{"doc":{ "id": "3004"}}
{"update": {"_id": "2007"}}
{"doc":{ "nickname": "nameupdate"}}
delete批量删除

POST    /shop/_doc/_bulk
{"delete": {"_id": "2004"}}
{"delete": {"_id": "2007"}}
综合批量各种操作

POST    /shop/_doc/_bulk
{"create": {"_id": "8001"}}
{"id": "8001", "nickname": "name8001"}
{"update": {"_id": "2001"}}
{"doc":{ "id": "20010"}}
{"delete": {"_id": "2003"}}
{"delete": {"_id": "2005"}}

 

 

 


已有 0 条评论

    欢迎您,新朋友,感谢参与互动!