# 深入搜索

# 控制精度

在单字段检索、多字段检索中（非and模式），可以使用minimum_should_match这个参数进行精度控制，可设置为数字（例如：3）或百分比（例如75%），均表示至少匹配多少项。

// 单字段检索
'field'=>[
    // ...
  'minimum_should_match' => '75%',//控制精度 最少应该匹配几个，值可以为整型或百分比
]
    
// 多字段检索
'field'=>[
  [
      // 字段1
      'minimum_should_match' => '75%',//控制精度 最少应该匹配几个，值可以为整型或百分比
  ],
  [
      // 字段2
      'minimum_should_match' => '75%',//控制精度 最少应该匹配几个，值可以为整型或百分比
  ],
  
]
    
//bool检索
    
//should
'should' => [
    'match' => [
        [
            'area' => '中国',
        ],
        [
            'area' => '韩国',
        ],
    ],
    'minimum_should_match' => '1', //最少应该同时满足几个

],

//should嵌套
'should' => [
    [
        'bool' => [
            'should' => [
                'match' => [
                    [
                        'area' => '中国',
                    ],
                    [
                        'area' => '韩国',
                    ],
                ],
                'minimum_should_match' => 1, //最少应该同时满足几个
            ],

        ]
    ],
    [
        'bool' => [
            'should' => [
                'match' => [
                    [
                        'area' => '美国',
                    ],
                    [
                        'area' => '新加坡',
                    ],
                ],
                'minimum_should_match' => 1, //最少应该同时满足几个
            ],
            // 'filter'=>[],

        ]
    ],
    'minimum_should_match' => 1, //最少应该同时满足几个

],

# 匹配逻辑

# operator

在单字段检索、多字段检索中，可以手动设置字段内分词的匹配方式，包括and、or，and表示必须所有词都命中，or表示至少命中一个词。

// 单字段检索
$query = [
    'match' => [
        'field' => [
            'name' => 'title',
            'query' => $text,
            'operator' => 'or', // and 或 or，不填默认为or
        ]
    ]
];


// 多字段检索
$query = [
    'multi_match' => [
        'field' => [
            [
                'name' => 'title',
                'query' => $text,
                'operator' => 'and', // and 或 or，不填默认为or
            ], [
                'name' => 'descr',
                'query' => $text,
                'operator' => 'or', // and 或 or，不填默认为or
            ]
        ]
    ]
];

# field_operator

在多字段检索中，可配置field_operator，来控制搜索结果的匹配，field_operator代表字段间的匹配方式，and代表所有字段必须都有命中，or代表只要有一个字段出现命中就行

// 多字段检索
$query = [
    'multi_match' => [
        'field' => [
            [
                'name' => 'title',
                'query' => $text,
                'operator' => 'and', // and 或 or，不填默认为or
            ], [
                'name' => 'descr',
                'query' => $text,
                'operator' => 'or', // and 或 or，不填默认为or
            ]
        ],
        'field_operator' => 'and',// and 或 or，不填默认为or
    ]
];

# 同义词

可以配置同义词的检索模式：单字段检索、多字段检索。

使用synonym参数来灵活设置是否使用同义词，例如某个字段需要开启同义词，这个字段这样配置即可：'synonym' => true，不想使用同义词，设置为false或不配置即可。

示例：

// 在具体的搜索中，打开具体字段的同义词开关，只有text类型才可以配置同义词，否则无效

// 单字段检索
$query = [
    'match' => [
        'field' => [
            'name' => 'title',
            'query' => $text,
            'synonym' => true,// 开启title字段的同义词匹配
        ]
    ]
];


// 多字段检索
$query = [
    'multi_match' => [
        'field' => [
            [
                'name' => 'title',
                'query' => $text,
                'synonym' => true,// 开启title字段的同义词匹配
            ], [
                'name' => 'descr',
                'query' => $text,
               	// descr字段未设置同义词
            ]
        ],
    ]
];

# 字段权重

在进行多字段检索时，如果需要让某个字段的权重更高，可以这样配置：

    
// 多字段检索
$query = [
    'multi_match' => [
        'field' => [
            [
                'name' => 'title',
                'query' => $text,
                'weight' => 2, // title字段权重为2（相对其它字段）
            ], [
                'name' => 'descr',
                'query' => $text,
                'weight' => 1, // descr字段权重为1（相对其它字段）
            ]
        ],
    ]
];

这样，字段2的权重就会更高，排序时会更靠前。

注意，weight的值是相对的。

# 自定义返回字段

// 单字段检索
$query = [
    'match' => [
        'field' => [
            'name' => 'title',
            'query' => $text,
            'synonym' => true,
        ],
        // 结果排序
        'sort' => [
            'time' => 'desc'
        ],

        // 添加此项，来自定义返回的字段；为空或不设置，代表全部返回；
        '_source' => ['title', 'descr'],

        'list_rows' => $listRows, //每页多少条数据
        'page' => $page, //第几页
    ]
];

# 数据脱敏

配置脱敏的字段

// 搜索单个字段
$query = [
    'match' => [
        'field' => [
           'name' => 'title',
           'query' => $text,
        ],
		// 自定义返回字段 不设置则返回全部字段
        '_source' => ['title',  'phone_number'],
        // 数据脱敏
        'desensitization'=>[
            // 对phone_number字段进行脱敏
            // 3表示开头要保留的字符长度
            // 4表示结尾要保留的字符长度
            // 第三个参数表示中间拼接的字符串，可选，默认会以****拼接
            'phone_number'=>[3,4,'****']
        ],
        
        'list_rows' => $listRows, //每页多少条数据
        'page' => $page, //第几页


    ]

];

# 日期格式化

WindSearch中，date数据类型会强制将日期数据转为时间戳存储，所以WS也提供了格式化日期的功能

任何会返回原始数据的检索模式，都可以配置formatter

// 搜索单个字段
$query = [
    'match' => [
        'field' => [
           'name' => 'title',
           'query' => $text,
        ],
		// 自定义返回字段 不设置则返回全部字段
        '_source' => ['title',  'phone_number'],
        
        // 返回结果格式化
        'formatter' => [
             // 日期格式化，time为自设的字段名称
            'time' => [
                // 格式化类型：日期
                'type' => 'date',
                // 格式
                'format' => 'Y-m-d'
            ],

        ],
        // 分页
        'list_rows' => $listRows, //每页多少条数据
        'page' => $page, //第几页


    ]

];

如果原始数据不存在正确的时间戳字段，则格式化结果会出现错误；如果format配置的字段错误，则不会进行任何处理。

# 结果高亮

// 单字段检索
$query = [
    'match' => [
        'field' => [
            'name' => 'title',
            'query' => $text,
            'synonym' => true,
        ],
        // 配置高亮
        'highlight' => [
            'is_cut' => true, // 是否将未匹配的地方截取掉
            'fixed_length' => '45', //保留前45个字符，若为空，则保持原长度
        ],
        // 结果排序
        // ...
        // 自定义返回的字段
        // ...
		// 分页
        'list_rows' => $listRows, //每页多少条数据
        'page' => $page, //第几页
    ]
];

// 搜索结果
$res = $Wind->search($query, $page, $listRows);
// $res包含result、info
// result 包括两个内容：
// 1，_source 搜索结果
// 2，_highlight 所有高亮字段（如果配置了高亮）
// 通过_source里面数据的主键，在_highlight里面获取对应的高亮数据
// 命中的词被<em>标签包裹，可设置em标签的css样式达到高亮效果

# 结果分页

// 单字段检索
$query = [
    'match' => [
        'field' => [
            'name' => 'title',
            'query' => $text,
            'synonym' => true,
        ],
        // 配置高亮
      	// ...
        // 结果排序
        // ...
        // 自定义返回的字段
        // ...
        
		// 分页
        'list_rows' => $listRows, //每页多少条数据
        'page' => $page, //第几页
    ]
];

# 结果排序

# 指定字段排序

// 单字段检索
$query = [
    'match' => [
        'field' => [
            'name' => 'title',
            'query' => $text,
            'synonym' => true,
        ],
        // 配置高亮
        // ...
        
        // 结果排序
        // 指定日期字段排序
        'sort' => [
            'time' => 'desc' //asc 按字段值正序 desc 按字段值倒序
        ],

        // 或 指定数值字段排序
        'sort' => [
            'num' => 'desc' //asc 按字段值正序 desc 按字段值倒序
        ],

        // 或 指定经纬度字段排序（根据距离排序）
        // 注意，需要存在geo_point字段，并且配置了索引，且过滤条件中，存在geo_point字段过滤
        'sort' => [
            '_distance' => 'desc' //asc 按距离正序 desc 按距离倒序
        ],
        // 自定义返回的字段
        // ...
        // 分页
        // ...
    ]
];

# 打分排序

打分排序只支持单字段检索。配置 _bm25_score 关键字，则会进行bm25打分排序，排序方式支持asc、desc；

不设置，则会默认进行默认综合排序。

// 单字段检索
$query = [
    'match' => [
        'field' => [
            'name' => 'title',
            'query' => $text,
            'synonym' => true,
        ],
        // 配置高亮
        // ...

        // 结果打分排序
        'sort' => [
            '_bm25_score' => 'desc', // 配置_bm25_score关键字，则会进行bm25打分排序，排序方式支持asc、desc
        ],
        // 自定义返回的字段
        // ...
        // 分页
        // ...
    ]
];

# 默认排序

// 单字段检索
$query = [
    'match' => [
        'field' => [
            'name' => 'title',
            'query' => $text,
            'synonym' => true,
        ],
        // 配置高亮
        // ...

        // 如果不设置任何排序条件，则默认就是_score（命中数）排序
        'sort' => [
            '_score' => 'desc' //asc 相关度正序 desc 按相关度倒序
        ],
        // 自定义返回的字段
        // ...
        // 分页
        // ...
    ]
];

# 综合排序

如果没有指定排序字段，那么引擎会按命中数倒序排序，而且term越紧密的，会越靠前，比如:

1，xxxx明天xxxx会xxxxxxx更好

2，xxx明天会更好xxxxxxx

排序后，2会在1前面

← 中文分词增量索引 →