Go项目优化——使用Elasticsearch搜索引擎

Posted 2022-10-25 小象裤衩

tags:

篇首语：本文由小常识网(cha138.com)小编为大家整理，主要介绍了Go项目优化——使用Elasticsearch搜索引擎相关的知识，希望对你有一定的参考价值。

本文为通过实例（图书项目）来学习go中Elasticsearch的使用，以及对项目带来的性能的提升

案例：

http准备

util/http.go
用于向es服务器发送json格式的Put和Post请求

package util

import (
	"errors"
	"github.com/astaxie/beego/httplib"
	"github.com/bitly/go-simplejson"
	"io"
	"time"
)

// HttpPutJson
// @Title HttpPutJson
// @Description 用于向es服务器发送put请求（新建索引or添加文档）
func HttpPutJson(url, body string) error 
	resp, err := httplib.Put(url).
		Header("Content-Type", "application/json").
		SetTimeout(10*time.Second, 10*time.Second).
		Body(body).
		Response()
	if err == nil 
		defer resp.Body.Close()
		// 不正常的响应状态码
		if resp.StatusCode >= 300 || resp.StatusCode < 200 
			// es会将错误信息写在body里 打印错误信息
			bodyErr, _ := io.ReadAll(resp.Body)
			body = string(bodyErr)
			err = errors.New(resp.Status + ";" + body)
		
	
	return err



// HttpPostJson
// @Title HttpPostJson
// @Description 用于向es服务器请求数据，查询数据
// @Param url string
// @Param body string 条件
// @Return *simplejson.Json es服务器返回的信息
func HttpPostJson(url, body string) (*simplejson.Json, error) 
	resp, err := httplib.Post(url).
		Header("Content-Type", "application/json").
		SetTimeout(10*time.Second, 10*time.Second).
		Body(body).
		Response()
	var sj *simplejson.Json
	if err == nil 
		defer resp.Body.Close()
		// 不正常的响应状态码
		if resp.StatusCode >= 300 || resp.StatusCode < 200 
			bodyErr, _ := io.ReadAll(resp.Body)
			body = string(bodyErr)
			err = errors.New(resp.Status + ";" + body)
		 else 
			bodyBytes, _ := io.ReadAll(resp.Body)
			sj, err = simplejson.NewJson(bodyBytes)
		
	
	return sj, err

案例（新增）：

建立索引+添加文档
发布图书的时候为图书和章节文档内容建立索引。
models/elasticSearch.go

package models

import (
	"es.study/util"
	"fmt"
	"github.com/PuerkitoBio/goquery"
	"github.com/astaxie/beego/logs"
	"strconv"
	"strings"
)

var (
	// (应写在配置文件里)搜索引擎配置，后面要加'/'
	elasticHost = "http://localhost:9200/"
)

// ElasticBuildIndex
// localhost:9200/index/_doc/doc_id
// index: 索引 对应sql里的表
// _doc: 文档类型，ES 7.0 以后的版本 已经废弃文档类型了，一个 index 中只有一个默认的 type，即 _doc。
// @Title ElasticBuildIndex
// @Description  指定id的图书增加索引
// @Author hyy 2022-10-14 21:06:27
// @Param bookId int 图书 id
func ElasticBuildIndex(bookId int) 
	// func(m *Book) Select(filed string, value interface, cols ...string)
	// SELECT [cols...] FROM books WHERE filed=value;
	book, _ := NewBook().Select("book_id", bookId, "book_id", "book_name", "description")
	addBookToIndex(book.BookId, book.BookName, book.Description)

	// index document
	var documents []Document
	fields := []string"document_id", "book_id", "document_name", "release"
	GetOrm("r").QueryTable(TNDoucments()).Filter("book_id", bookId).All(documents, fields...)
	if len(documents) > 0 
		for _, document := range documents 
			// release: 已发布的章节
			addDocumentToIndex(document.DocumentId, document.BookId, flathtml(document.Release))
		
	



// addBookToIndex
// @Title addBookToIndex
// @Description 向图书索引（相当于图书表）中，添加图书
// @Author hyy 2022-10-14 21:07:38
// @Param bookId int 图书id
// @Param bookName string 图书名
// @Param description string 图书描述
func addBookToIndex(bookId int, bookName, description string) 
	queryJson := `
		
			"book_id":%v,
			"book_name":"%v",
			"description":"%v"
		
	`
	// ElasticSearch API
	host := elasticHost
	api := host + "mbooks/_doc/" + strconv.Itoa(bookId)
	// 发起请求：
	queryJson = fmt.Sprintf(queryJson, bookId, bookName, description)
	err := util.HttpPutJson(api, queryJson)
	if err != nil 
		logs.Debug(err)
	


// addDocumentToIndex
// @Title addDocumentToIndex
// @Description  向章节文档索引（相当于章节文档表）中，添加章节文档
// @Author hyy 2022-10-14 21:09:09
// @Param documentId int 文档id
// @Param bookId int 所属图书id
// @Param release string 文档发布内容
func addDocumentToIndex(documentId, bookId int, release string) 
	queryJson := `
		
			"document:_id":%v,
			"book_id":%v,
			"release":"%v"
		
	`
	// ElasticSearch API
	host := elasticHost
	api := host + "mdocument/_doc/" + strconv.Itoa(documentId)

	// 发起请求：
	queryJson = fmt.Sprintf(queryJson, documentId, bookId, release)
	err := util.HttpPutJson(api, queryJson)
	if err != nil 
		logs.Debug(err)
	


// flatHtml
// 剔除章节里的html标签，取出文本
func flatHtml(htmlStr string) string 
	htmlStr = strings.Replace(htmlStr, "\\n", " ", -1)
	htmlStr = strings.Replace(htmlStr, "\\"", "", -1)
	gq, err := goquery.NewDocumentFromReader(strings.NewReader(htmlStr))
	// 如果不为空，说明没有
	if err != nil 
		return htmlStr
	
	return gq.Text()

在发布图书的时候，调用ElasticBuildIndex(bookId)接口，将图书信息以及章节内容添加到es中。

案例（查询）：

搜索图书：

package models

import (
	"es.study/util"
	"fmt"
	"github.com/PuerkitoBio/goquery"
	"github.com/astaxie/beego/logs"
	"strconv"
	"strings"
)

var (
	// (应写在配置文件里)搜索引擎配置，后面要加'/'
	elasticHost = "http://localhost:9200/"
)

// ... 新增索引or添加文档

// ElasticSearchBook
// localhost:9200/index/_doc/_search
// index: 索引 对应sql里的表
// _doc: 文档类型，ES 7.0 以后的版本 已经废弃文档类型了，一个 index 中只有一个默认的 type，即 _doc。
// @Title ElasticSearchBook
// @Description 根据关键字搜索图书，获取图书的id
// @Author hyy 2022-10-14 20:49:37
// @Param kw string 关键字
// @Param pageSize int 页大小
// @Param page int 页码（可选）
// @Return []int bookId的数组
// @Return int 书的总数
// @Return error 错误
func ElasticSearchBook(kw string, pageSize, page int) ([]int, int, error) 
	var bookIds []int
	count := 0
	if page > 0 
		// 第一页对应搜索引擎里的第0页
		page = page - 1
	 else 
		page = 0
	
	queryJson := `
		
			"query":
				"multi_match":
					"query":"%v",
					"fields":["bookName","description"]
				
			,
			"_source":["book_id"],
			"size":%v,
			"from":%v
		
	`

	// elasticSearch api
	host := elasticHost
	api := host + "mbook/_doc/_search"
	queryJson = fmt.Sprintf(queryJson, kw, pageSize, page)
	sj, err := util.HttpPostJson(api, queryJson)
	if err == nil 
		count = sj.GetPath("hits", "total").MustInt()
		resultArray := sj.GetPath("hits", "hits").MustArray()
		for _, result := range resultArray 
			if eachMap, ok := result.(map[string]interface); ok 
				id, _ := strconv.Atoi(eachMap["_id"].(string))
				bookIds = append(bookIds, id)
			
		
	
	return bookIds, count, err


// ElasticSearchDocument
// @Title ElasticSearchDocument
// @Description 根据关键字搜索章节文档，返回章节文档的id，
// 该函数提供两种搜索：
//  1. 搜所有图书的章节文档
//  2. 搜某一本图书的章节文档，所以有个可选参数bookId
//
// @Author hyy 2022-10-14 20:56:54
// @Param kw string 关键字
// @Param pageSize int 页大小
// @Param page int 页码
// @Param bookId ...int 图书id（可选）
// @Return []int 章节文档的id数组
// @Return int 总数
// @Return error 错误
func ElasticSearchDocument(kw string, pageSize, page int, bookId ...int) ([]int, int, error) 
	var documentIds []int
	count := 0
	if page > 0 
		// 第一页对应搜索引擎里的第0页
		page = page - 1
	 else 
		page = 0
	
	queryJson := `
		
			"query":
				"match":
					"release":"%v",
					
			,
			"_source":["document_id"],
			"size":%v,
			"from":%v
		
	`
	queryJson = fmt.Sprintf(queryJson, kw, pageSize, page)

	if len(bookId) > 0 && bookId[0] > 0 
		queryJson = `
			
				"query":
					"bool":
						"filter":[
							"term":
								"book_id":%v
							
						],
						"must":
							"multi_match":
								"query":"%v",
								"fields":["release"]
							
						
						
				,
				"_source":["document_id"],
				"size":%v,
				"from":%v
			
		`
		queryJson = fmt.Sprintf(queryJson, kw, pageSize, page)
	
	// elasticSearch api
	host := elasticHost
	api := host + "mdocument/_doc/_search"
	sj, err := util.HttpPostJson(api, queryJson)
	if err==nil
		count =  sj.GetPath("hits","total").MustInt()
		resultArray := sj.GetPath("hits", "hits").MustArray()
		for _, result := range resultArray 
			if eachMap, ok := result.(map[string]interface); ok 
				id, _ := strconv.Atoi(eachMap["_id"].(string))
				documentIds = append(documentIds, id)
			
		
	
	return documentIds, count, err

关于sj.GetPath("hits","hits")原因如下：
es查询到多个结果的时候，返回结果如下：


    "took": 4,
    "timed_out": false,
    "_shards": 
        "total": 1,
        "successful": 1,
        "skipped": 0,
        "failed": 0
    ,
———>"hits": 
        "total": 
            "value": 2,
            "relation": "eq"
        ,
        "max_score": 1.0,
———————>"hits": [ // 原始数据
            
                "_index": "shopping",
                "_type": "_doc",
                "_id": "TYu9pn8BfWqG58AR7Mzw",
                "_score": 1.0,
                "_source": 
                    "title": "小米手机",
                    "category": "小米",
                    "images": "http://xxx.com/xm.jpg",
                    "price": 3999.00
                
            ,
            ...
        ]

结果：

优化前：

优化后：

性能的具体提升使用（ab自行进行压力测试。

以上是关于Go项目优化——使用Elasticsearch搜索引擎的主要内容，如果未能解决你的问题，请参考以下文章

Go项目优化——使用Elasticsearch搜索引擎

目录

案例：

http准备

案例（新增）：

案例（查询）：

结果：