(1.1)——获取词语首字字母

Posted 两只橙

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了(1.1)——获取词语首字字母相关的知识,希望对你有一定的参考价值。

一、maven依赖

        <dependency>
            <groupId>net.sourceforge.pinyin4j</groupId>
            <artifactId>pinyin4j</artifactId>
            <version>2.5.0</version>
        </dependency>

二、示例代码

import com.pingan.lcloud.ark.log.LoggerUtil;
import net.sourceforge.pinyin4j.PinyinHelper;
import org.apache.commons.lang3.CharUtils;
import org.apache.commons.lang3.StringUtils;

import java.lang.annotation.Native;
import java.util.Objects;

/**
 * <code>Details determine success.</code>
 * by Liang ZC., Phd@Stanford
 * 中文工具类
 *
 * @author LIANGZHICHENG035
 * @date 2019-11-6 15:57
 * @see http://www.stanford.edu
 */
public class ChineseUtils 
    /*
     *           N777777777NO
     *         N7777777777777N
     *        M777777777777777N
     *        *N877777777D77777M
     *       N M77777777ONND777M
     *       MN777777777NN  D777
     *     N7ZN777777777NN ~M7778
     *    N777777777777MMNN88777N
     *    N777777777777MNZZZ7777O
     *    DZN7777O77777777777777
     *     N7OONND7777777D77777N
     *      8*M++++?N???$77777$
     *       M7++++N+M77777777N
     *        N77O777777777777$                              M
     *          DNNM$$$$777777N                              D
     *         N*N:=N$777N7777M                             NZ
     *        77Z::::N777777777                          ODZZZ
     *       77N::::::N77777777M                         NNZZZ$
     *     $777:::::::77777777MN                        ZM8ZZZZZ
     *     777M::::::Z7777777Z77                        N++ZZZZNN
     *    7777M:::::M7777777$777M                       $++IZZZZM
     *   M777$:::::N777777*M7777M                       +++++ZZZDN
     *     NN$::::::7777$*M777777N                      N+++ZZZZNZ
     *       N::::::N:7*O:77777777                      N++++ZZZZN
     *       M::::::::::::N77777777+                   +?+++++ZZZM
     *       8::::::::::::D77777777M                    O+++++ZZ
     *        ::::::::::::M777777777N                      O+?D
     *        M:::::::::::M77777777778                     77=
     *        D=::::::::::N7777777777N                    777
     *       INN===::::::=77777777777N                  I777N
     *      ?777N========N7777777777787M               N7777
     *      77777*D======N77777777777N777N?         N777777
     *     I77777$$*N7===M$$77777777$77777777*MMZ77777777N
     *      $$$$$$$$$$*NIZN$$$$$$$$*M$$7777777777777777ON
     *       M$$$$$$$*M    M$$$$$$$*N=N$$$$7777777$$*ND
     *      O77Z$$$$$$$     M$$$$$$$*MNI==*DNNNNM=~N
     *   7 :N MNN$$$*M$      $$$777$8      8D8I
     *     NMM.:7O           777777778
     *                       7777777MN
     *                       M NO .7:
     *                       M   :   M
     *                            8
     */

    // Constant matcher factory methods

    public ChineseUtils() 
    

    private static final String PUNCTUATION = "\\\\pP";

    /***
     * <p>get chinese initail, if the first char is number return the number, if it is a polysyllabic character,
     * take only the first one, if the chinese initail is empty return @param defaultValue.<p/>
     *
     * <pre>
     *  ChineseUtils.getChineseInitial("我爱中国"))	= 	W
     *  ChineseUtils.getChineseInitial("爱中国"))	= 	A
     *  ChineseUtils.getChineseInitial("1爱中国")	= 	1
     *  ChineseUtils.getChineseInitial("中国"))		= 	Z
     *  ChineseUtils.getChineseInitial("@#国"))		= 	G
     *  ChineseUtils.getChineseInitial("国%$"))		= 	G
     *  ChineseUtils.getChineseInitial("国"))		= 	G
     *  ChineseUtils.getChineseInitial("W我爱中国"))	= 	W
     *  ChineseUtils.getChineseInitial("I我爱中国"))	= 	I
     *  ChineseUtils.getChineseInitial("null"))     = 	N
     *  ChineseUtils.getChineseInitial(null))       = 	""
     *  ChineseUtils.getChineseInitial(""))         = 	""
     *  ChineseUtils.getChineseInitial(","))        =	""
     * <pre/>
     *
     * @param chinese
     * @param defaultValue
     * @return the pinyin of first chinese char,if @param chinese is't chinese,return @link StringUtils.EMPTY.
     */
    public static String getChineseInitialDefaultIfEmpty(String chinese, String defaultValue) 
        String result = getChineseInitial(chinese, true);
        return StringUtils.isEmpty(result) ? defaultValue : result;
    

    /***
     * <p>get chinese initail, if the first char is number return the number, if it is a polysyllabic character,
     * take only the first one.<p/>
     *
     * <pre>
     *  ChineseUtils.getChineseInitial("我爱中国"))	= 	W
     *  ChineseUtils.getChineseInitial("爱中国"))	= 	A
     *  ChineseUtils.getChineseInitial("1爱中国")	= 	1
     *  ChineseUtils.getChineseInitial("中国"))		= 	Z
     *  ChineseUtils.getChineseInitial("@#国"))		= 	G
     *  ChineseUtils.getChineseInitial("国%$"))		= 	G
     *  ChineseUtils.getChineseInitial("国"))		= 	G
     *  ChineseUtils.getChineseInitial("W我爱中国"))	= 	W
     *  ChineseUtils.getChineseInitial("I我爱中国"))	= 	I
     *  ChineseUtils.getChineseInitial("null"))     = 	N
     *  ChineseUtils.getChineseInitial(null))       = 	""
     *  ChineseUtils.getChineseInitial(""))         = 	""
     *  ChineseUtils.getChineseInitial(","))        =	""
     * <pre/>
     *
     * @param chinese
     * @return the pinyin of first chinese char,if @param chinese is't chinese,return @link StringUtils.EMPTY.
     */
    public static String getChineseInitial(String chinese) 
        return getChineseInitial(chinese, true);
    

    /***
     * <p>get chinese initail, if the first char is number return the number, if it is a polysyllabic character,
     * take only the first one.<p/>
     *
     * <pre>
     *  ChineseUtils.getChineseInitial("我爱中国")	= 	W
     *  ChineseUtils.getChineseInitial("爱中国")	    = 	A
     *  ChineseUtils.getChineseInitial("1爱中国")	= 	1
     *  ChineseUtils.getChineseInitial("中国")		= 	Z
     *  ChineseUtils.getChineseInitial("@#国")		= 	G
     *  ChineseUtils.getChineseInitial("国%$"		= 	G
     *  ChineseUtils.getChineseInitial("国")		    = 	G
     *  ChineseUtils.getChineseInitial("W我爱中国")	= 	W
     *  ChineseUtils.getChineseInitial("I我爱中国")	= 	I
     *  ChineseUtils.getChineseInitial("null")      = 	N
     *  ChineseUtils.getChineseInitial(null)        = 	""
     *  ChineseUtils.getChineseInitial("")          = 	""
     *  ChineseUtils.getChineseInitial(",")         =	""
     * <pre/>
     *
     * @param chinese
     * @param removePunctuation is remove the punctuation in @param chinese.
     * @return the pinyin of first chinese char,if @param chinese is't chinese,return @link StringUtils.EMPTY.
     */
    public static String getChineseInitial(String chinese, boolean removePunctuation) 
        // if need remove punctuation.
        if (removePunctuation) 
            chinese = removePunctuation(chinese);
        
        // if chinese is blank
        if (StringUtils.isBlank(chinese)) 
            return StringUtils.EMPTY;
        
        // first char.
        char firstChar = chinese.charAt(0);
        // if first char in [a-z,A-Z,0-9]
        if (CharUtils.isAsciiAlphanumeric(firstChar)) 
            return CharUtils.toString(firstChar).toUpperCase();
        
        // if is chinese.
        boolean isChinese = CharUtils.toString(firstChar).matches("[\\u4E00-\\u9FA5]+");
        if (!isChinese) 
            return StringUtils.EMPTY;
        
        // chinese to pinyin and get first char.
        try 
            String[] res = PinyinHelper.toHanyuPinyinStringArray(firstChar);
            return res[0].substring(0, 1).toUpperCase();
         catch (Exception e) 
            LoggerUtil.warn("get " + chinese + " chinese initial fail.", e);
        

        return StringUtils.EMPTY;
    

    /**
     * <p>replace @param str punctuation to "", if @param str is empty , return @link StringUtils.EMPTY.</p>
     *
     * <pre>
     *  ChineseUtils.removePunctuation(null)			=	""
     *  ChineseUtils.removePunctuation("")				=	""
     *  ChineseUtils.removePunctuation(" ")				=	""
     *  ChineseUtils.removePunctuation("我爱中国")		=	我爱中国
     *  ChineseUtils.removePunctuation("我爱中国!")		=	我爱中国
     *  ChineseUtils.removePunctuation("我爱中国。")		=	我爱中国
     *  ChineseUtils.removePunctuation("我爱中国.")		=	我爱中国
     *  ChineseUtils.removePunctuation("  我爱中国.  ")	=	我爱中国
     * </pre>
     *
     * @param str
     * @return string
     */
    public static String removePunctuation(String str) 
        if (StringUtils.isEmpty(str)) 
            return StringUtils.EMPTY;
        

        return str.trim().replaceAll(PUNCTUATION, StringUtils.EMPTY);
    



三、运行结果

    public static void main(String[] args) 
        System.out.println(ChineseUtils.getChineseInitial("我爱中国"));
        System.out.println(ChineseUtils.getChineseInitial("爱中国"));
        System.out.println(ChineseUtils.getChineseInitial("1爱中国"));
        System.out.println(ChineseUtils.getChineseInitial("中国"));
        System.out.println(ChineseUtils.getChineseInitial("@#国"));
        System.out.println(ChineseUtils.getChineseInitial("国%$"));
        System.out.println(ChineseUtils.getChineseInitial("国"));
        System.out.println(ChineseUtils.getChineseInitial("W我爱中国"));
        System.out.println(ChineseUtils.getChineseInitial("I我爱中国"));
        System.out.println(ChineseUtils.getChineseInitial("null"));
        System.out.println(ChineseUtils.getChineseInitial(null));
        System.out.println(ChineseUtils.getChineseInitial(""));
        System.out.println(ChineseUtils.getChineseInitial(","));
    
W
A
1
Z
G
G
G
W
I
N
 https://pan.baidu.com/s/18R8DGiGAkHdtafYLBlTVvg   提取码: h4jm

以上是关于(1.1)——获取词语首字字母的主要内容,如果未能解决你的问题,请参考以下文章

C#获得字符串首字符字母(大写)

java 如何判断字符串开头首字为字母且若大写自动更改为小写

MySQL数据库中实现对中文字段按照首字字母排序

自动翻译程序员英语

第一章(变量,数据类型,运算符)

mysql 汉字按拼音字母排序获取拼音首字母拼音全拼