(1.1)——获取词语首字字母
Posted 两只橙
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了(1.1)——获取词语首字字母相关的知识,希望对你有一定的参考价值。
一、maven依赖
<dependency>
<groupId>net.sourceforge.pinyin4j</groupId>
<artifactId>pinyin4j</artifactId>
<version>2.5.0</version>
</dependency>
二、示例代码
import com.pingan.lcloud.ark.log.LoggerUtil;
import net.sourceforge.pinyin4j.PinyinHelper;
import org.apache.commons.lang3.CharUtils;
import org.apache.commons.lang3.StringUtils;
import java.lang.annotation.Native;
import java.util.Objects;
/**
* <code>Details determine success.</code>
* by Liang ZC., Phd@Stanford
* 中文工具类
*
* @author LIANGZHICHENG035
* @date 2019-11-6 15:57
* @see http://www.stanford.edu
*/
public class ChineseUtils
/*
* N777777777NO
* N7777777777777N
* M777777777777777N
* *N877777777D77777M
* N M77777777ONND777M
* MN777777777NN D777
* N7ZN777777777NN ~M7778
* N777777777777MMNN88777N
* N777777777777MNZZZ7777O
* DZN7777O77777777777777
* N7OONND7777777D77777N
* 8*M++++?N???$77777$
* M7++++N+M77777777N
* N77O777777777777$ M
* DNNM$$$$777777N D
* N*N:=N$777N7777M NZ
* 77Z::::N777777777 ODZZZ
* 77N::::::N77777777M NNZZZ$
* $777:::::::77777777MN ZM8ZZZZZ
* 777M::::::Z7777777Z77 N++ZZZZNN
* 7777M:::::M7777777$777M $++IZZZZM
* M777$:::::N777777*M7777M +++++ZZZDN
* NN$::::::7777$*M777777N N+++ZZZZNZ
* N::::::N:7*O:77777777 N++++ZZZZN
* M::::::::::::N77777777+ +?+++++ZZZM
* 8::::::::::::D77777777M O+++++ZZ
* ::::::::::::M777777777N O+?D
* M:::::::::::M77777777778 77=
* D=::::::::::N7777777777N 777
* INN===::::::=77777777777N I777N
* ?777N========N7777777777787M N7777
* 77777*D======N77777777777N777N? N777777
* I77777$$*N7===M$$77777777$77777777*MMZ77777777N
* $$$$$$$$$$*NIZN$$$$$$$$*M$$7777777777777777ON
* M$$$$$$$*M M$$$$$$$*N=N$$$$7777777$$*ND
* O77Z$$$$$$$ M$$$$$$$*MNI==*DNNNNM=~N
* 7 :N MNN$$$*M$ $$$777$8 8D8I
* NMM.:7O 777777778
* 7777777MN
* M NO .7:
* M : M
* 8
*/
// Constant matcher factory methods
public ChineseUtils()
private static final String PUNCTUATION = "\\\\pP";
/***
* <p>get chinese initail, if the first char is number return the number, if it is a polysyllabic character,
* take only the first one, if the chinese initail is empty return @param defaultValue.<p/>
*
* <pre>
* ChineseUtils.getChineseInitial("我爱中国")) = W
* ChineseUtils.getChineseInitial("爱中国")) = A
* ChineseUtils.getChineseInitial("1爱中国") = 1
* ChineseUtils.getChineseInitial("中国")) = Z
* ChineseUtils.getChineseInitial("@#国")) = G
* ChineseUtils.getChineseInitial("国%$")) = G
* ChineseUtils.getChineseInitial("国")) = G
* ChineseUtils.getChineseInitial("W我爱中国")) = W
* ChineseUtils.getChineseInitial("I我爱中国")) = I
* ChineseUtils.getChineseInitial("null")) = N
* ChineseUtils.getChineseInitial(null)) = ""
* ChineseUtils.getChineseInitial("")) = ""
* ChineseUtils.getChineseInitial(",")) = ""
* <pre/>
*
* @param chinese
* @param defaultValue
* @return the pinyin of first chinese char,if @param chinese is't chinese,return @link StringUtils.EMPTY.
*/
public static String getChineseInitialDefaultIfEmpty(String chinese, String defaultValue)
String result = getChineseInitial(chinese, true);
return StringUtils.isEmpty(result) ? defaultValue : result;
/***
* <p>get chinese initail, if the first char is number return the number, if it is a polysyllabic character,
* take only the first one.<p/>
*
* <pre>
* ChineseUtils.getChineseInitial("我爱中国")) = W
* ChineseUtils.getChineseInitial("爱中国")) = A
* ChineseUtils.getChineseInitial("1爱中国") = 1
* ChineseUtils.getChineseInitial("中国")) = Z
* ChineseUtils.getChineseInitial("@#国")) = G
* ChineseUtils.getChineseInitial("国%$")) = G
* ChineseUtils.getChineseInitial("国")) = G
* ChineseUtils.getChineseInitial("W我爱中国")) = W
* ChineseUtils.getChineseInitial("I我爱中国")) = I
* ChineseUtils.getChineseInitial("null")) = N
* ChineseUtils.getChineseInitial(null)) = ""
* ChineseUtils.getChineseInitial("")) = ""
* ChineseUtils.getChineseInitial(",")) = ""
* <pre/>
*
* @param chinese
* @return the pinyin of first chinese char,if @param chinese is't chinese,return @link StringUtils.EMPTY.
*/
public static String getChineseInitial(String chinese)
return getChineseInitial(chinese, true);
/***
* <p>get chinese initail, if the first char is number return the number, if it is a polysyllabic character,
* take only the first one.<p/>
*
* <pre>
* ChineseUtils.getChineseInitial("我爱中国") = W
* ChineseUtils.getChineseInitial("爱中国") = A
* ChineseUtils.getChineseInitial("1爱中国") = 1
* ChineseUtils.getChineseInitial("中国") = Z
* ChineseUtils.getChineseInitial("@#国") = G
* ChineseUtils.getChineseInitial("国%$" = G
* ChineseUtils.getChineseInitial("国") = G
* ChineseUtils.getChineseInitial("W我爱中国") = W
* ChineseUtils.getChineseInitial("I我爱中国") = I
* ChineseUtils.getChineseInitial("null") = N
* ChineseUtils.getChineseInitial(null) = ""
* ChineseUtils.getChineseInitial("") = ""
* ChineseUtils.getChineseInitial(",") = ""
* <pre/>
*
* @param chinese
* @param removePunctuation is remove the punctuation in @param chinese.
* @return the pinyin of first chinese char,if @param chinese is't chinese,return @link StringUtils.EMPTY.
*/
public static String getChineseInitial(String chinese, boolean removePunctuation)
// if need remove punctuation.
if (removePunctuation)
chinese = removePunctuation(chinese);
// if chinese is blank
if (StringUtils.isBlank(chinese))
return StringUtils.EMPTY;
// first char.
char firstChar = chinese.charAt(0);
// if first char in [a-z,A-Z,0-9]
if (CharUtils.isAsciiAlphanumeric(firstChar))
return CharUtils.toString(firstChar).toUpperCase();
// if is chinese.
boolean isChinese = CharUtils.toString(firstChar).matches("[\\u4E00-\\u9FA5]+");
if (!isChinese)
return StringUtils.EMPTY;
// chinese to pinyin and get first char.
try
String[] res = PinyinHelper.toHanyuPinyinStringArray(firstChar);
return res[0].substring(0, 1).toUpperCase();
catch (Exception e)
LoggerUtil.warn("get " + chinese + " chinese initial fail.", e);
return StringUtils.EMPTY;
/**
* <p>replace @param str punctuation to "", if @param str is empty , return @link StringUtils.EMPTY.</p>
*
* <pre>
* ChineseUtils.removePunctuation(null) = ""
* ChineseUtils.removePunctuation("") = ""
* ChineseUtils.removePunctuation(" ") = ""
* ChineseUtils.removePunctuation("我爱中国") = 我爱中国
* ChineseUtils.removePunctuation("我爱中国!") = 我爱中国
* ChineseUtils.removePunctuation("我爱中国。") = 我爱中国
* ChineseUtils.removePunctuation("我爱中国.") = 我爱中国
* ChineseUtils.removePunctuation(" 我爱中国. ") = 我爱中国
* </pre>
*
* @param str
* @return string
*/
public static String removePunctuation(String str)
if (StringUtils.isEmpty(str))
return StringUtils.EMPTY;
return str.trim().replaceAll(PUNCTUATION, StringUtils.EMPTY);
三、运行结果
public static void main(String[] args)
System.out.println(ChineseUtils.getChineseInitial("我爱中国"));
System.out.println(ChineseUtils.getChineseInitial("爱中国"));
System.out.println(ChineseUtils.getChineseInitial("1爱中国"));
System.out.println(ChineseUtils.getChineseInitial("中国"));
System.out.println(ChineseUtils.getChineseInitial("@#国"));
System.out.println(ChineseUtils.getChineseInitial("国%$"));
System.out.println(ChineseUtils.getChineseInitial("国"));
System.out.println(ChineseUtils.getChineseInitial("W我爱中国"));
System.out.println(ChineseUtils.getChineseInitial("I我爱中国"));
System.out.println(ChineseUtils.getChineseInitial("null"));
System.out.println(ChineseUtils.getChineseInitial(null));
System.out.println(ChineseUtils.getChineseInitial(""));
System.out.println(ChineseUtils.getChineseInitial(","));
W
A
1
Z
G
G
G
W
I
N
https://pan.baidu.com/s/18R8DGiGAkHdtafYLBlTVvg 提取码: h4jm
以上是关于(1.1)——获取词语首字字母的主要内容,如果未能解决你的问题,请参考以下文章