solr php里中文分词怎么用
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了solr php里中文分词怎么用相关的知识,希望对你有一定的参考价值。
参考技术A 可以看手册。希望我的回答可以帮到你,有什么不懂可以追问。
Solr配置中文分词器IKAnalyzer及增删改查调用
一、配置IKAnalyzer中文分词器
Solr 版本5.2.1
IKAnalyzer2012_u6.jar报错 ,用IKAnalyzer2012_FF_hf1.jar 替换可解决
解决lucene4.0与IKAnalyzer的冲突。解决Exception in thread "main" java.lang.VerifyError: class org.wltea.analyzer.lucene.IKAnalyzer overrides final method tokenStream.(Ljava/lang/String;Ljava/io/Reader;)Lorg/apache/lucene/analysis/TokenStream;原因IKAnalyzer中参考手册中的例子是使用的lucene3.4,与4.0已经是不兼容了。使用IKAnalyzer2012_FF_hf1.jar可以解决问题
java.lang.AbstractMethodError
Caused by: java.lang.AbstractMethodError at org.apache.lucene.analysis.Analyzer.tokenStream(Analyzer.java:179)
lucene版本跟IK分词版本不一致导致,这块版本一定要对应上
由于solr版本较高暂时无法解决
<field name="messageId" type="string" indexed="true" stored="true" required="true" multiValued="false"/> <field name="msg_title" type="text_ik" stored="true" indexed="true"/> <field name="msg_content" type="text_ik" stored="true" indexed="true" multiValued="true"/> <uniqueKey>messageId</uniqueKey> <!-- IKAnalyzer 中文分词--> <fieldType name="text_ik" class="solr.TextField"> <analyzer type="index" class="org.wltea.analyzer.lucene.IKAnalyzer" /> <analyzer type="query" class="org.wltea.analyzer.lucene.IKAnalyzer" /> </fieldType>
二、添加、删除、查询索引
需要除了solr相关包还需要下面的jar包
Message.java
package org.itat.lucene.solr.test; import org.apache.solr.client.solrj.beans.Field; public class Message { private String messageId; private String title; private String[] content; public Message() { super(); } public Message(String messageId, String title, String[] content) { super(); this.messageId = messageId; this.title = title; this.content = content; } public String getMessageId() { return messageId; } @Field public void setMessageId(String messageId) { this.messageId = messageId; } public String getTitle() { return title; } @Field("msg_title") public void setTitle(String title) { this.title = title; } public String[] getContent() { return content; } @Field("msg_content") public void setContent(String[] content) { this.content = content; } }
增删改查demo
package org.itat.lucene.solr.test; import java.io.IOException; import java.net.MalformedURLException; import java.util.ArrayList; import java.util.List; import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.impl.CloudSolrServer; import org.apache.solr.client.solrj.impl.ConcurrentUpdateSolrServer; import org.apache.solr.client.solrj.impl.HttpSolrClient; import org.apache.solr.client.solrj.impl.HttpSolrServer; import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.client.solrj.response.UpdateResponse; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocumentList; import org.apache.solr.common.SolrInputDocument; import org.junit.Before; import org.junit.Test; public class SolrTest { private final static String URL = "http://localhost:8080/solr/mail"; private HttpSolrServer server = null; @Before public void init() { server = new HttpSolrServer(URL); } @Test public void addOneOrDelete() { /*HttpSolrServer server=new HttpSolrServer(URL);*/ /*ConcurrentUpdateSolrServer cs=new ConcurrentUpdateSolrServer(URL,1,1); *新版已被ConcurrentUpdateSolrClient取代 * */ /*CloudSolrServer css=new CloudSolrServer("192.168.0.1");//zookeeper地址 *已被CloudSolrClient */ HttpSolrClient server= new HttpSolrClient(URL);//新版已经取代了HttpSolrServer SolrInputDocument doc = new SolrInputDocument(); //id是唯一的主键,当多次添加的时候,最后添加的相同id的域会覆盖前面的域 doc.addField("messageId","3"); doc.addField("msg_title", "这是我的第一个solrj的程序"); doc.addField("msg_content","我的solrj的程序究竟能不能跑得起来呢?"); try { UpdateResponse ur=server.add(doc); System.out.println(ur); /*server.deleteByQuery("*:*");*/ server.commit(null,true,true,false); server.close(); } catch (SolrServerException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } //一下添加多个 @Test public void addList() { try { List<SolrInputDocument> docs = new ArrayList<SolrInputDocument>(); SolrInputDocument doc = new SolrInputDocument(); doc.addField("messageId", "2"); doc.addField("msg_title", "很好!solr可以工作了"); doc.addField("msg_content","slor总算可以正式工作了"); docs.add(doc); doc = new SolrInputDocument(); doc.addField("messageId", "3"); doc.addField("msg_title", "测试一下solr的添加"); doc.addField("msg_content","看看能不能添加一个列表信息"); docs.add(doc); server.add(docs); server.commit(); } catch (SolrServerException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } //基于java bean的添加 @Test public void addBean() { try { List<Message> msgs = new ArrayList<Message>(); msgs.add(new Message("4","基于java bean的添加", new String[]{"通过java bean完成添加","java bean的添加附件"})); msgs.add(new Message("5","基于java bean的列表数据的添加", new String[]{"测试如何通过一个对象完成添加","通过对象完成添加的附件"})); server.addBeans(msgs); server.commit(); } catch (SolrServerException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } //查询 @Test public void test04() { try { //定义查询字符串 SolrQuery query = new SolrQuery("*"); query.setStart(0); query.setRows(5); QueryResponse resp=null; try { resp = server.query(query); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } //查询出来的结果都保存在SolrDocumentList中 SolrDocumentList sdl = resp.getResults(); System.out.println(sdl.getNumFound()); for(SolrDocument sd:sdl) { // System.out.println(sd); System.out.println(sd.getFieldValue("msg_title")+","+sd.getFieldValue("msg_content")); } } catch (SolrServerException e) { e.printStackTrace(); } } //基于javabean查询 @Test public void test05() { try { SolrQuery query = new SolrQuery("*"); query.setStart(0); query.setRows(5); QueryResponse resp=null; try { resp = server.query(query); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } //可以直接查询相应的bean对象,但是不是很常用,无法得到查询总条数 List<Message> list = resp.getBeans(Message.class); System.out.println(list.size()); for(Message msg:list) { System.out.println(msg.getTitle()); } } catch (SolrServerException e) { e.printStackTrace(); } } //高亮 @Test public void test06() { try { SolrQuery query = new SolrQuery("msg_content:测试 OR msg_title:测试"); query.setHighlight(true).setHighlightSimplePre("<span class=‘highligter‘>") .setHighlightSimplePost("</span>") .setStart(0).setRows(5); query.setParam("hl.fl", "msg_title,msg_content"); QueryResponse resp=null; try { resp = server.query(query); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } //查询出来的结果都保存在SolrDocumentList中 SolrDocumentList sdl = resp.getResults(); System.out.println(sdl.getNumFound()); for(SolrDocument sd:sdl) { String id = (String)sd.getFieldValue("messageId"); System.out.println(resp.getHighlighting().get(id).get("msg_content")); //高亮结果以唯一索引为key } } catch (SolrServerException e) { e.printStackTrace(); } } }
本文出自 “点滴积累” 博客,请务必保留此出处http://tianxingzhe.blog.51cto.com/3390077/1753167
以上是关于solr php里中文分词怎么用的主要内容,如果未能解决你的问题,请参考以下文章