String源码学习笔记
Posted Don1911
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了String源码学习笔记相关的知识,希望对你有一定的参考价值。
前言:他山之石,可以攻玉
(1) String最重要的属性,用来存放字符串,很多String的方法就是通过此操作此字符数组实现的
/** 用来存放字符(s)的值 */ private final char value[];
(2) String类的构造器比较多 列举以下两个
1. public String(char value[], int offset, int count) { //通过char[]创建String对象 offset:开端 count:个数 if (offset < 0) { //offset不能小于0 throw new StringIndexOutOfBoundsException(offset); } if (count <= 0) { if (count < 0) { throw new StringIndexOutOfBoundsException(count); } if (offset <= value.length) { //count=0 若(0<=)offset<=value.length,则string="" this.value = "".value; return; } } // Note: offset or count might be near -1>>>1.
//-1>>>1即Integer.MAX_VALUE -1<<31即Integer.MIN_VALUE if (offset > value.length - count) { //maxIndex = value.length - count throw new StringIndexOutOfBoundsException(offset + count); } this.value = Arrays.copyOfRange(value, offset, offset+count); } 2. public String(int[] codePoints, int offset, int count) { //通过int[]创建String对象 if (offset < 0) { throw new StringIndexOutOfBoundsException(offset); } if (count <= 0) { if (count < 0) { throw new StringIndexOutOfBoundsException(count); } if (offset <= codePoints.length) { this.value = "".value; return; } } if (offset > codePoints.length - count) { throw new StringIndexOutOfBoundsException(offset + count); }
final int end = offset + count; //endIndex // 第一遍:计算char[](即value)的精确大小 int n = count; for (int i = offset; i < end; i++) { int c = codePoints[i]; if (Character.isBmpCodePoint(c)) // c >>> 16 == 0 即 c < 2^16 可直接用一个char存储 continue; else if (Character.isValidCodePoint(c)) //c < 1114112 即 c是ASCII码 n++; // count + 1 } // 第二遍:分配/填充char[] final char[] v = new char[n]; for (int i = offset, j = 0; i < end; i++, j++) { int c = codePoints[i]; if (Character.isBmpCodePoint(c)) //若c是BMP码点,即字符c可直接用一个char来表示 v[j] = (char)c; else Character.toSurrogates(c, v, j++); //在j与j+1索引处写入代理对 } this.value = v; }
第二个构造器示例:
int[] a = {65,97,65536,1114110}; String str = new String(a,0,a.length); System.out.println("str的length值:" + str.length()); System.out.println("str字符串:" + str); System.out.println("str在索引2处的字符:" + str.charAt(2)); System.out.println("str在索引3处的字符:" + str.charAt(3)); 结果: //65536是临界值 str的length值:6 str字符串:Aa???? str在索引2处的字符:? str在索引3处的字符:?
(4) offsetByCodePoints(int index, int codePointOffset),从给定的index处偏移codePointOffset个代码点(字符)的索引(结果最大值为length)
public int offsetByCodePoints(int index, int codePointOffset) { if (index < 0 || index > value.length) { //index可为value.length 若codePointOffset>0,则抛出异常 throw new IndexOutOfBoundsException(); } return Character.offsetByCodePointsImpl(value, 0, value.length, index, codePointOffset); }
//Character的类方法: static int offsetByCodePointsImpl(char[]a, int start, int count, int index, int codePointOffset) { int x = index; if (codePointOffset >= 0) { int limit = start + count; int i; for (i = 0; x < limit && i < codePointOffset; i++) { if (isHighSurrogate(a[x++]) && x < limit && isLowSurrogate(a[x])) { //若是代理对的高位,则x+2 x++; } } if (i < codePointOffset) { //若为true,则说明当index<limit,index后字符个数不够(offsetByCodePoints-1)个 throw new IndexOutOfBoundsException(); } } else { int i; for (i = codePointOffset; x > start && i < 0; i++) { //不区别index是否为代理对低位 if (isLowSurrogate(a[--x]) && x > start && isHighSurrogate(a[x-1])) { x--; } } if (i < 0) { //若为true,则说明当start-index中间的字符个数不够offsetByCodePoints个; 若index-1处为代理对高位,则计为一个“字符” throw new IndexOutOfBoundsException(); } } return x; }
(5) getChars(char dst[], int dstBegin) default方法,将this.value复制到dst[]中
void getChars(char dst[], int dstBegin) { System.arraycopy(value, 0, dst, dstBegin, value.length); }
(6) getChars(int srcBegin, int srcEnd, char dst[], int dstBegin) 将string的srcBegin~srcEnd(不含)的字符复制到dst[]以dstBegin索引开始的位置
public void getChars(int srcBegin, int srcEnd, char dst[], int dstBegin) { if (srcBegin < 0) { //参数检查 throw new StringIndexOutOfBoundsException(srcBegin); } if (srcEnd > value.length) { throw new StringIndexOutOfBoundsException(srcEnd); } if (srcBegin > srcEnd) { throw new StringIndexOutOfBoundsException(srcEnd - srcBegin); } System.arraycopy(value, srcBegin, dst, dstBegin, srcEnd - srcBegin); //String类中大量使用了此方法 }
(7) equals(Object obj) 与指定的对象比较。当且仅当该参数不为 null,且value相同时为true
public boolean equals(Object anObject) { if (this == anObject) { //若地址相同,返回true return true; } if (anObject instanceof String) { //String类的实例 String anotherString = (String)anObject; int n = value.length; if (n == anotherString.value.length) { char v1[] = value; char v2[] = anotherString.value; int i = 0; while (n-- != 0) { if (v1[i] != v2[i]) //比较value值 return false; i++; } return true; } } return false; }
(8) contentEquals(CharSequence cs) 与CharSequence接口实现类进行内容比较
public boolean contentEquals(CharSequence cs) { // 参数是 StringBuffer 或 StringBuilder if (cs instanceof AbstractStringBuilder) { if (cs instanceof StringBuffer) { synchronized(cs) { //同步比较 return nonSyncContentEquals((AbstractStringBuilder)cs); } } else { return nonSyncContentEquals((AbstractStringBuilder)cs); } } // 参数类型是String if (cs instanceof String) { return equals(cs); } // 参数是通常的CharSequence char v1[] = value; int n = v1.length; if (n != cs.length()) { return false; } for (int i = 0; i < n; i++) { if (v1[i] != cs.charAt(i)) { return false; } } return true; }
(9) compareTo(String str) 比较两个字符串
public int compareTo(String anotherString) { int len1 = value.length; int len2 = anotherString.value.length; int lim = Math.min(len1, len2); char v1[] = value; //定义局部变量减少getField操作 char v2[] = anotherString.value; int k = 0; while (k < lim) { char c1 = v1[k]; char c2 = v2[k]; if (c1 != c2) { return c1 - c2; //依次比较,若不同,则返回Unicode值之差 } k++; } return len1 - len2; //若可比较的字符相同,则返回长度差 }
(10) compareToIgnoreCase(String str) 忽略大小写进行比较,通过调用String私有静态final类完成
public int compareToIgnoreCase(String str) { return CASE_INSENSITIVE_ORDER.compare(this, str); } public static final Comparator<String> CASE_INSENSITIVE_ORDER = new CaseInsensitiveComparator(); private static class CaseInsensitiveComparator implements Comparator<String>, java.io.Serializable { // use serialVersionUID from JDK 1.2.2 for interoperability private static final long serialVersionUID = 8575799808933029326L; public int compare(String s1, String s2) { int n1 = s1.length(); int n2 = s2.length(); int min = Math.min(n1, n2); for (int i = 0; i < min; i++) { char c1 = s1.charAt(i); char c2 = s2.charAt(i); if (c1 != c2) { c1 = Character.toUpperCase(c1); c2 = Character.toUpperCase(c2); if (c1 != c2) { // conversion to uppercase does not work properly
// for the Georgian alphabet(格鲁吉亚字母), which has strange rules about case
// conversion. So we need to make one last check before exiting.
c1 = Character.toLowerCase(c1); c2 = Character.toLowerCase(c2); if (c1 != c2) { // No overflow because of numeric promotion return c1 - c2; } } } } return n1 - n2; } /** Replaces the de-serialized object. */ private Object readResolve() { return CASE_INSENSITIVE_ORDER; } }
(11) indexOf(String str, int f) 从f处开始查找str,通过调用内部静态默认方法实现
public int indexOf(String str, int fromIndex) { return indexOf(value, 0, value.length, str.value, 0, str.value.length, fromIndex); }
static int indexOf(char[] source, int sourceOffset, int sourceCount, //sourceOffset:source(源字符串)开端(索引) char[] target, int targetOffset, int targetCount, int fromIndex) { //fromIndex是相对sourceOffset而言的 if (fromIndex >= sourceCount) { return (targetCount == 0 ? sourceCount : -1); } if (fromIndex < 0) { fromIndex = 0; } if (targetCount == 0) { //若目标字符串为"",则直接返回fromIndex (>=0) return fromIndex; } char first = target[targetOffset]; //target(目标字符串)的开端字符 int max = sourceOffset + (sourceCount - targetCount); //target在source中最大的索引 for (int i = sourceOffset + fromIndex; i <= max; i++) { //从sourceOffset+fromIndex开始遍历,最大至max处 /* 寻找第一个匹配字符的索引 */ if (source[i] != first) { while (++i <= max && source[i] != first); } /* 找到了第一个字符,现在匹配剩下的字符(s) */ if (i <= max) { int j = i + 1; int end = j + targetCount - 1; for (int k = targetOffset + 1; j < end && source[j] == target[k]; j++, k++); if (j == end) { //上面的循环正常结束 /* 找到整个字符串 */ return i - sourceOffset; //返回相对于sourceOffset的索引值 } } } return -1; }
(12) lastIndexOf(String str, int f) 返回f~length中str的最大索引值 通过调用内部静态默认方法实现
public int lastIndexOf(String str, int fromIndex) { return lastIndexOf(value, 0, value.length, str.value, 0, str.value.length, fromIndex); }
static int lastIndexOf(char[] source, int sourceOffset, int sourceCount, char[] target, int targetOffset, int targetCount, int fromIndex) { /* Check arguments; return immediately where possible. For consistency, don‘t check for null str. */ int rightIndex = sourceCount - targetCount; //最大索引的可能值 若r < 0,则在①处return -1 if (fromIndex < 0) { return -1; } if (fromIndex > rightIndex) { //若f >= rightIndex, 则f = rightIndex fromIndex = rightIndex; } /* 空字符串总是匹配的 */ if (targetCount == 0) { //若targetCount=0,则返回 Math.min(fromIndex,sourceCount) return fromIndex; } int strLastIndex = targetOffset + targetCount - 1; // target(目标字符串)的最后一个字符的索引(记为lastChar) char strLastChar = target[strLastIndex]; // lastChar int min = sourceOffset + targetCount - 1; // lastChar在source(源字符串)中的最小索引 int i = min + fromIndex; // lastChar在source中的最大索引 startSearchForLastChar: //循环标签 while (true) { while (i >= min && source[i] != strLastChar) { //找寻lastChar i--; } if (i < min) { //① return -1; } int j = i - 1; // i为找到的lastChar的索引 int start = j - (targetCount - 1); // 即 start = i - targetCount int k = strLastIndex - 1; while (j > start) { if (source[j--] != target[k--]) { i--; //注意此处是i-- 用j与target遍历比较是比较明智的,不改变i continue startSearchForLastChar; } } return start - sourceOffset + 1; //上面循环正常结束,则找到target 结果表示为start + 1 - sourceOffset更容易理解 } }
(13) subString(int beginIndex) 截取字符串 返回新的String对象
public String substring(int beginIndex) { if (beginIndex < 0) { throw new StringIndexOutOfBoundsException(beginIndex); } int subLen = value.length - beginIndex; //截取长度 if (subLen < 0) { throw new StringIndexOutOfBoundsException(subLen); } return (beginIndex == 0) ? this : new String(value, beginIndex, subLen); //返回新字符串 }
(14) concat(String str) 连接字符串 返回新的String对象
public String concat(String str) { int otherLen = str.length(); if (otherLen == 0) { return this; } int len = value.length; char buf[] = Arrays.copyOf(value, len + otherLen); //将value复制到buf[]中,从0索引处开始 str.getChars(buf, len); //将str复制到buf[]中,从len索引处开始 return new String(buf, true); //返回新字符串 }
(15) join(CharSequence delimiter, Iterable<? extends CharSequence> elements) 类方法,通过遍历拼接CharSequence
public static String join(CharSequence delimiter, // delimiter: 分隔符 Iterable<? extends CharSequence> elements) { Objects.requireNonNull(delimiter); // 要求不为空 Objects.requireNonNull(elements); StringJoiner joiner = new StringJoiner(delimiter); // elements是Iterable的实例,所以可以用forEach()方法
for (CharSequence cs: elements) { // 实际是调用其Iterator()方法的返回值(iterator实例)的hasNext()和next()方法 joiner.add(cs); } return joiner.toString(); }
示例:
System.out.println(String.join(",", new IterTest())); // ","为分隔符 class IterTest implements Iterable<CharSequence>{ @Override public Iterator<CharSequence> iterator() { class It implements Iterator<CharSequence>{ CharSequence[] s = {"today", "is", "Tuesday"}; int next = 0; @Override public boolean hasNext() { return next!=s.length; } @Override public CharSequence next() { return s[next++]; } } return new It(); } } 结果: today,is,Tuesday
(16) trim() 字符串两端去空格 依旧返回新String对象
public String trim() { int len = value.length; int st = 0; char[] val = value; /* avoid getfield opcode */ while ((st < len) && (val[st] <= ‘ ‘)) { // 去掉左边的空格 st++; } while ((st < len) && (val[len - 1] <= ‘ ‘)) { // 若满足st < len表示至少有一个非空字符 len--; } return ((st > 0) || (len < value.length)) ? substring(st, len) : this; //只要有一端有空字符,则返回截取的字符串 }
以上是常用方法...
以上是关于String源码学习笔记的主要内容,如果未能解决你的问题,请参考以下文章
Android内核源码bionic目录下的子目录arch-arm源码分析笔记
[原创]java WEB学习笔记61:Struts2学习之路--通用标签 property,uri,param,set,push,if-else,itertor,sort,date,a标签等(代码片段