Matcher类(java JDK源码记录)
Posted zhangyishu
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了Matcher类(java JDK源码记录)相关的知识,希望对你有一定的参考价值。
1 /* 2 * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved. 3 * ORACLE PROPRIETARY/CONFIDENTIAL. Use is subject to license terms. 4 * 5 * 6 * 7 * 8 * 9 * 10 * 11 * 12 * 13 * 14 * 15 * 16 * 17 * 18 * 19 * 20 * 21 * 22 * 23 * 24 */ 25 26 package java.util.regex; 27 28 import java.util.ConcurrentModificationException; 29 import java.util.Iterator; 30 import java.util.NoSuchElementException; 31 import java.util.Objects; 32 import java.util.Spliterator; 33 import java.util.Spliterators; 34 import java.util.function.Consumer; 35 import java.util.function.Function; 36 import java.util.stream.Stream; 37 import java.util.stream.StreamSupport; 38 39 /** 40 * An engine that performs match operations on a @linkplain 41 * java.lang.CharSequence character sequence by interpreting a @link Pattern. 42 * 43 * <p> A matcher is created from a pattern by invoking the pattern‘s @link 44 * Pattern#matcher matcher method. Once created, a matcher can be used to 45 * perform three different kinds of match operations: 46 * 47 * <ul> 48 * 49 * <li><p> The @link #matches matches method attempts to match the entire 50 * input sequence against the pattern. </p></li> 51 * 52 * <li><p> The @link #lookingAt lookingAt method attempts to match the 53 * input sequence, starting at the beginning, against the pattern. </p></li> 54 * 55 * <li><p> The @link #find find method scans the input sequence looking 56 * for the next subsequence that matches the pattern. </p></li> 57 * 58 * </ul> 59 * 60 * <p> Each of these methods returns a boolean indicating success or failure. 61 * More information about a successful match can be obtained by querying the 62 * state of the matcher. 63 * 64 * <p> A matcher finds matches in a subset of its input called the 65 * <i>region</i>. By default, the region contains all of the matcher‘s input. 66 * The region can be modified via the @link #region(int, int) region method 67 * and queried via the @link #regionStart() regionStart and @link 68 * #regionEnd() regionEnd methods. The way that the region boundaries interact 69 * with some pattern constructs can be changed. See @link 70 * #useAnchoringBounds(boolean) useAnchoringBounds and @link 71 * #useTransparentBounds(boolean) useTransparentBounds for more details. 72 * 73 * <p> This class also defines methods for replacing matched subsequences with 74 * new strings whose contents can, if desired, be computed from the match 75 * result. The @link #appendReplacement appendReplacement and @link 76 * #appendTail appendTail methods can be used in tandem in order to collect 77 * the result into an existing string buffer or string builder. Alternatively, 78 * the more convenient @link #replaceAll replaceAll method can be used to 79 * create a string in which every matching subsequence in the input sequence 80 * is replaced. 81 * 82 * <p> The explicit state of a matcher includes the start and end indices of 83 * the most recent successful match. It also includes the start and end 84 * indices of the input subsequence captured by each <a 85 * href="Pattern.html#cg">capturing group</a> in the pattern as well as a total 86 * count of such subsequences. As a convenience, methods are also provided for 87 * returning these captured subsequences in string form. 88 * 89 * <p> The explicit state of a matcher is initially undefined; attempting to 90 * query any part of it before a successful match will cause an @link 91 * IllegalStateException to be thrown. The explicit state of a matcher is 92 * recomputed by every match operation. 93 * 94 * <p> The implicit state of a matcher includes the input character sequence as 95 * well as the <i>append position</i>, which is initially zero and is updated 96 * by the @link #appendReplacement appendReplacement method. 97 * 98 * <p> A matcher may be reset explicitly by invoking its @link #reset() 99 * method or, if a new input sequence is desired, its @link 100 * #reset(java.lang.CharSequence) reset(CharSequence) method. Resetting a 101 * matcher discards its explicit state information and sets the append position 102 * to zero. 103 * 104 * <p> Instances of this class are not safe for use by multiple concurrent 105 * threads. </p> 106 * 107 * 108 * @author Mike McCloskey 109 * @author Mark Reinhold 110 * @author JSR-51 Expert Group 111 * @since 1.4 112 * @spec JSR-51 113 */ 114 115 public final class Matcher implements MatchResult 116 117 /** 118 * The Pattern object that created this Matcher. 119 */ 120 Pattern parentPattern; 121 122 /** 123 * The storage used by groups. They may contain invalid values if 124 * a group was skipped during the matching. 125 */ 126 int[] groups; 127 128 /** 129 * The range within the sequence that is to be matched. Anchors 130 * will match at these "hard" boundaries. Changing the region 131 * changes these values. 132 */ 133 int from, to; 134 135 /** 136 * Lookbehind uses this value to ensure that the subexpression 137 * match ends at the point where the lookbehind was encountered. 138 */ 139 int lookbehindTo; 140 141 /** 142 * The original string being matched. 143 */ 144 CharSequence text; 145 146 /** 147 * Matcher state used by the last node. NOANCHOR is used when a 148 * match does not have to consume all of the input. ENDANCHOR is 149 * the mode used for matching all the input. 150 */ 151 static final int ENDANCHOR = 1; 152 static final int NOANCHOR = 0; 153 int acceptMode = NOANCHOR; 154 155 /** 156 * The range of string that last matched the pattern. If the last 157 * match failed then first is -1; last initially holds 0 then it 158 * holds the index of the end of the last match (which is where the 159 * next search starts). 160 */ 161 int first = -1, last = 0; 162 163 /** 164 * The end index of what matched in the last match operation. 165 */ 166 int oldLast = -1; 167 168 /** 169 * The index of the last position appended in a substitution. 170 */ 171 int lastAppendPosition = 0; 172 173 /** 174 * Storage used by nodes to tell what repetition they are on in 175 * a pattern, and where groups begin. The nodes themselves are stateless, 176 * so they rely on this field to hold state during a match. 177 */ 178 int[] locals; 179 180 /** 181 * Storage used by top greedy Loop node to store a specific hash set to 182 * keep the beginning index of the failed repetition match. The nodes 183 * themselves are stateless, so they rely on this field to hold state 184 * during a match. 185 */ 186 IntHashSet[] localsPos; 187 188 /** 189 * Boolean indicating whether or not more input could change 190 * the results of the last match. 191 * 192 * If hitEnd is true, and a match was found, then more input 193 * might cause a different match to be found. 194 * If hitEnd is true and a match was not found, then more 195 * input could cause a match to be found. 196 * If hitEnd is false and a match was found, then more input 197 * will not change the match. 198 * If hitEnd is false and a match was not found, then more 199 * input will not cause a match to be found. 200 */ 201 boolean hitEnd; 202 203 /** 204 * Boolean indicating whether or not more input could change 205 * a positive match into a negative one. 206 * 207 * If requireEnd is true, and a match was found, then more 208 * input could cause the match to be lost. 209 * If requireEnd is false and a match was found, then more 210 * input might change the match but the match won‘t be lost. 211 * If a match was not found, then requireEnd has no meaning. 212 */ 213 boolean requireEnd; 214 215 /** 216 * If transparentBounds is true then the boundaries of this 217 * matcher‘s region are transparent to lookahead, lookbehind, 218 * and boundary matching constructs that try to see beyond them. 219 */ 220 boolean transparentBounds = false; 221 222 /** 223 * If anchoringBounds is true then the boundaries of this 224 * matcher‘s region match anchors such as ^ and $. 225 */ 226 boolean anchoringBounds = true; 227 228 /** 229 * Number of times this matcher‘s state has been modified 230 */ 231 int modCount; 232 233 /** 234 * No default constructor. 235 */ 236 Matcher() 237 238 239 /** 240 * All matchers have the state used by Pattern during a match. 241 */ 242 Matcher(Pattern parent, CharSequence text) 243 this.parentPattern = parent; 244 this.text = text; 245 246 // Allocate state storage 247 int parentGroupCount = Math.max(parent.capturingGroupCount, 10); 248 groups = new int[parentGroupCount * 2]; 249 locals = new int[parent.localCount]; 250 localsPos = new IntHashSet[parent.localTCNCount]; 251 252 // Put fields into initial states 253 reset(); 254 255 256 /** 257 * Returns the pattern that is interpreted by this matcher. 258 * 259 * @return The pattern for which this matcher was created 260 */ 261 public Pattern pattern() 262 return parentPattern; 263 264 265 /** 266 * Returns the match state of this matcher as a @link MatchResult. 267 * The result is unaffected by subsequent operations performed upon this 268 * matcher. 269 * 270 * @return a @code MatchResult with the state of this matcher 271 * @since 1.5 272 */ 273 public MatchResult toMatchResult() 274 return toMatchResult(text.toString()); 275 276 277 private MatchResult toMatchResult(String text) 278 return new ImmutableMatchResult(this.first, 279 this.last, 280 groupCount(), 281 this.groups.clone(), 282 text); 283 284 285 private static class ImmutableMatchResult implements MatchResult 286 private final int first; 287 private final int last; 288 private final int[] groups; 289 private final int groupCount; 290 private final String text; 291 292 ImmutableMatchResult(int first, int last, int groupCount, 293 int groups[], String text) 294 295 this.first = first; 296 this.last = last; 297 this.groupCount = groupCount; 298 this.groups = groups; 299 this.text = text; 300 301 302 @Override 303 public int start() 304 checkMatch(); 305 return first; 306 307 308 @Override 309 public int start(int group) 310 checkMatch(); 311 if (group < 0 || group > groupCount) 312 throw new IndexOutOfBoundsException("No group " + group); 313 return groups[group * 2]; 314 315 316 @Override 317 public int end() 318 checkMatch(); 319 return last; 320 321 322 @Override 323 public int end(int group) 324 checkMatch(); 325 if (group < 0 || group > groupCount) 326 throw new IndexOutOfBoundsException("No group " + group); 327 return groups[group * 2 + 1]; 328 329 330 @Override 331 public int groupCount() 332 return groupCount; 333 334 335 @Override 336 public String group() 337 checkMatch(); 338 return group(0); 339 340 341 @Override 342 public String group(int group) 343 checkMatch(); 344 if (group < 0 || group > groupCount) 345 throw new IndexOutOfBoundsException("No group " + group); 346 if ((groups[group*2] == -1) || (groups[group*2+1] == -1)) 347 return null; 348 return text.subSequence(groups[group * 2], groups[group * 2 + 1]).toString(); 349 350 351 private void checkMatch() 352 if (first < 0) 353 throw new IllegalStateException("No match found"); 354 355 356 357 358 /** 359 * Changes the @code Pattern that this @code Matcher uses to 360 * find matches with. 361 * 362 * <p> This method causes this matcher to lose information 363 * about the groups of the last match that occurred. The 364 * matcher‘s position in the input is maintained and its 365 * last append position is unaffected.</p> 366 * 367 * @param newPattern 368 * The new pattern used by this matcher 369 * @return This matcher 370 * @throws IllegalArgumentException 371 * If newPattern is @code null 372 * @since 1.5 373 */ 374 public Matcher usePattern(Pattern newPattern) 375 if (newPattern == null) 376 throw new IllegalArgumentException("Pattern cannot be null"); 377 parentPattern = newPattern; 378 379 // Reallocate state storage 380 int parentGroupCount = Math.max(newPattern.capturingGroupCount, 10); 381 groups = new int[parentGroupCount * 2]; 382 locals = new int[newPattern.localCount]; 383 for (int i = 0; i < groups.length; i++) 384 groups[i] = -1; 385 for (int i = 0; i < locals.length; i++) 386 locals[i] = -1; 387 localsPos = new IntHashSet[parentPattern.localTCNCount]; 388 modCount++; 389 return this; 390 391 392 /** 393 * Resets this matcher. 394 * 395 * <p> Resetting a matcher discards all of its explicit state information 396 * and sets its append position to zero. The matcher‘s region is set to the 397 * default region, which is its entire character sequence. The anchoring 398 * and transparency of this matcher‘s region boundaries are unaffected. 399 * 400 * @return This matcher 401 */ 402 public Matcher reset() 403 first = -1; 404 last = 0; 405 oldLast = -1; 406 for(int i=0; i<groups.length; i++) 407 groups[i] = -1; 408 for(int i=0; i<locals.length; i++) 409 locals[i] = -1; 410 for (int i = 0; i < localsPos.length; i++) 411 if (localsPos[i] != null) 412 localsPos[i].clear(); 413 414 lastAppendPosition = 0; 415 from = 0; 416 to = getTextLength(); 417 modCount++; 418 return this; 419 420 421 /** 422 * Resets this matcher with a new input sequence. 423 * 424 * <p> Resetting a matcher discards all of its explicit state information 425 * and sets its append position to zero. The matcher‘s region is set to 426 * the default region, which is its entire character sequence. The 427 * anchoring and transparency of this matcher‘s region boundaries are 428 * unaffected. 429 * 430 * @param input 431 * The new input character sequence 432 * 433 * @return This matcher 434 */ 435 public Matcher reset(CharSequence input) 436 text = input; 437 return reset(); 438 439 440 /** 441 * Returns the start index of the previous match. 442 * 443 * @return The index of the first character matched 444 * 445 * @throws IllegalStateException 446 * If no match has yet been attempted, 447 * or if the previous match operation failed 448 */ 449 public int start() 450 if (first < 0) 451 throw new IllegalStateException("No match available"); 452 return first; 453 454 455 /** 456 * Returns the start index of the subsequence captured by the given group 457 * during the previous match operation. 458 * 459 * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left 460 * to right, starting at one. Group zero denotes the entire pattern, so 461 * the expression <i>m.</i>@code start(0) is equivalent to 462 * <i>m.</i>@code start(). </p> 463 * 464 * @param group 465 * The index of a capturing group in this matcher‘s pattern 466 * 467 * @return The index of the first character captured by the group, 468 * or @code -1 if the match was successful but the group 469 * itself did not match anything 470 * 471 * @throws IllegalStateException 472 * If no match has yet been attempted, 473 * or if the previous match operation failed 474 * 475 * @throws IndexOutOfBoundsException 476 * If there is no capturing group in the pattern 477 * with the given index 478 */ 479 public int start(int group) 480 if (first < 0) 481 throw new IllegalStateException("No match available"); 482 if (group < 0 || group > groupCount()) 483 throw new IndexOutOfBoundsException("No group " + group); 484 return groups[group * 2]; 485 486 487 /** 488 * Returns the start index of the subsequence captured by the given 489 * <a href="Pattern.html#groupname">named-capturing group</a> during the 490 * previous match operation. 491 * 492 * @param name 493 * The name of a named-capturing group in this matcher‘s pattern 494 * 495 * @return The index of the first character captured by the group, 496 * or @code -1 if the match was successful but the group 497 * itself did not match anything 498 * 499 * @throws IllegalStateException 500 * If no match has yet been attempted, 501 * or if the previous match operation failed 502 * 503 * @throws IllegalArgumentException 504 * If there is no capturing group in the pattern 505 * with the given name 506 * @since 1.8 507 */ 508 public int start(String name) 509 return groups[getMatchedGroupIndex(name) * 2]; 510 511 512 /** 513 * Returns the offset after the last character matched. 514 * 515 * @return The offset after the last character matched 516 * 517 * @throws IllegalStateException 518 * If no match has yet been attempted, 519 * or if the previous match operation failed 520 */ 521 public int end() 522 if (first < 0) 523 throw new IllegalStateException("No match available"); 524 return last; 525 526 527 /** 528 * Returns the offset after the last character of the subsequence 529 * captured by the given group during the previous match operation. 530 * 531 * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left 532 * to right, starting at one. Group zero denotes the entire pattern, so 533 * the expression <i>m.</i>@code end(0) is equivalent to 534 * <i>m.</i>@code end(). </p> 535 * 536 * @param group 537 * The index of a capturing group in this matcher‘s pattern 538 * 539 * @return The offset after the last character captured by the group, 540 * or @code -1 if the match was successful 541 * but the group itself did not match anything 542 * 543 * @throws IllegalStateException 544 * If no match has yet been attempted, 545 * or if the previous match operation failed 546 * 547 * @throws IndexOutOfBoundsException 548 * If there is no capturing group in the pattern 549 * with the given index 550 */ 551 public int end(int group) 552 if (first < 0) 553 throw new IllegalStateException("No match available"); 554 if (group < 0 || group > groupCount()) 555 throw new IndexOutOfBoundsException("No group " + group); 556 return groups[group * 2 + 1]; 557 558 559 /** 560 * Returns the offset after the last character of the subsequence 561 * captured by the given <a href="Pattern.html#groupname">named-capturing 562 * group</a> during the previous match operation. 563 * 564 * @param name 565 * The name of a named-capturing group in this matcher‘s pattern 566 * 567 * @return The offset after the last character captured by the group, 568 * or @code -1 if the match was successful 569 * but the group itself did not match anything 570 * 571 * @throws IllegalStateException 572 * If no match has yet been attempted, 573 * or if the previous match operation failed 574 * 575 * @throws IllegalArgumentException 576 * If there is no capturing group in the pattern 577 * with the given name 578 * @since 1.8 579 */ 580 public int end(String name) 581 return groups[getMatchedGroupIndex(name) * 2 + 1]; 582 583 584 /** 585 * Returns the input subsequence matched by the previous match. 586 * 587 * <p> For a matcher <i>m</i> with input sequence <i>s</i>, 588 * the expressions <i>m.</i>@code group() and 589 * <i>s.</i>@code substring(<i>m.</i>@code start(), <i>m.</i> 590 * @code end()) are equivalent. </p> 591 * 592 * <p> Note that some patterns, for example @code a*, match the empty 593 * string. This method will return the empty string when the pattern 594 * successfully matches the empty string in the input. </p> 595 * 596 * @return The (possibly empty) subsequence matched by the previous match, 597 * in string form 598 * 599 * @throws IllegalStateException 600 * If no match has yet been attempted, 601 * or if the previous match operation failed 602 */ 603 public String group() 604 return group(0); 605 606 607 /** 608 * Returns the input subsequence captured by the given group during the 609 * previous match operation. 610 * 611 * <p> For a matcher <i>m</i>, input sequence <i>s</i>, and group index 612 * <i>g</i>, the expressions <i>m.</i>@code group(<i>g</i>@code ) and 613 * <i>s.</i>@code substring(<i>m.</i>@code start(<i>g</i>@code 614 * ), <i>m.</i>@code end(<i>g</i>@code )) 615 * are equivalent. </p> 616 * 617 * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left 618 * to right, starting at one. Group zero denotes the entire pattern, so 619 * the expression @code m.group(0) is equivalent to @code m.group(). 620 * </p> 621 * 622 * <p> If the match was successful but the group specified failed to match 623 * any part of the input sequence, then @code null is returned. Note 624 * that some groups, for example @code (a*), match the empty string. 625 * This method will return the empty string when such a group successfully 626 * matches the empty string in the input. </p> 627 * 628 * @param group 629 * The index of a capturing group in this matcher‘s pattern 630 * 631 * @return The (possibly empty) subsequence captured by the group 632 * during the previous match, or @code null if the group 633 * failed to match part of the input 634 * 635 * @throws IllegalStateException 636 * If no match has yet been attempted, 637 * or if the previous match operation failed 638 * 639 * @throws IndexOutOfBoundsException 640 * If there is no capturing group in the pattern 641 * with the given index 642 */ 643 public String group(int group) 644 if (first < 0) 645 throw new IllegalStateException("No match found"); 646 if (group < 0 || group > groupCount()) 647 throw new IndexOutOfBoundsException("No group " + group); 648 if ((groups[group*2] == -1) || (groups[group*2+1] == -1)) 649 return null; 650 return getSubSequence(groups[group * 2], groups[group * 2 + 1]).toString(); 651 652 653 /** 654 * Returns the input subsequence captured by the given 655 * <a href="Pattern.html#groupname">named-capturing group</a> during the 656 * previous match operation. 657 * 658 * <p> If the match was successful but the group specified failed to match 659 * any part of the input sequence, then @code null is returned. Note 660 * that some groups, for example @code (a*), match the empty string. 661 * This method will return the empty string when such a group successfully 662 * matches the empty string in the input. </p> 663 * 664 * @param name 665 * The name of a named-capturing group in this matcher‘s pattern 666 * 667 * @return The (possibly empty) subsequence captured by the named group 668 * during the previous match, or @code null if the group 669 * failed to match part of the input 670 * 671 * @throws IllegalStateException 672 * If no match has yet been attempted, 673 * or if the previous match operation failed 674 * 675 * @throws IllegalArgumentException 676 * If there is no capturing group in the pattern 677 * with the given name 678 * @since 1.7 679 */ 680 public String group(String name) 681 int group = getMatchedGroupIndex(name); 682 if ((groups[group*2] == -1) || (groups[group*2+1] == -1)) 683 return null; 684 return getSubSequence(groups[group * 2], groups[group * 2 + 1]).toString(); 685 686 687 /** 688 * Returns the number of capturing groups in this matcher‘s pattern. 689 * 690 * <p> Group zero denotes the entire pattern by convention. It is not 691 * included in this count. 692 * 693 * <p> Any non-negative integer smaller than or equal to the value 694 * returned by this method is guaranteed to be a valid group index for 695 * this matcher. </p> 696 * 697 * @return The number of capturing groups in this matcher‘s pattern 698 */ 699 public int groupCount() 700 return parentPattern.capturingGroupCount - 1; 701 702 703 /** 704 * Attempts to match the entire region against the pattern. 705 * 706 * <p> If the match succeeds then more information can be obtained via the 707 * @code start, @code end, and @code group methods. </p> 708 * 709 * @return @code true if, and only if, the entire region sequence 710 * matches this matcher‘s pattern 711 */ 712 public boolean matches() 713 return match(from, ENDANCHOR); 714 715 716 /** 717 * Attempts to find the next subsequence of the input sequence that matches 718 * the pattern. 719 * 720 * <p> This method starts at the beginning of this matcher‘s region, or, if 721 * a previous invocation of the method was successful and the matcher has 722 * not since been reset, at the first character not matched by the previous 723 * match. 724 * 725 * <p> If the match succeeds then more information can be obtained via the 726 * @code start, @code end, and @code group methods. </p> 727 * 728 * @return @code true if, and only if, a subsequence of the input 729 * sequence matches this matcher‘s pattern 730 */ 731 public boolean find() 732 int nextSearchIndex = last; 733 if (nextSearchIndex == first) 734 nextSearchIndex++; 735 736 // If next search starts before region, start it at region 737 if (nextSearchIndex < from) 738 nextSearchIndex = from; 739 740 // If next search starts beyond region then it fails 741 if (nextSearchIndex > to) 742 for (int i = 0; i < groups.length; i++) 743 groups[i] = -1; 744 return false; 745 746 return search(nextSearchIndex); 747 748 749 /** 750 * Resets this matcher and then attempts to find the next subsequence of 751 * the input sequence that matches the pattern, starting at the specified 752 * index. 753 * 754 * <p> If the match succeeds then more information can be obtained via the 755 * @code start, @code end, and @code group methods, and subsequent 756 * invocations of the @link #find() method will start at the first 757 * character not matched by this match. </p> 758 * 759 * @param start the index to start searching for a match 760 * @throws IndexOutOfBoundsException 761 * If start is less than zero or if start is greater than the 762 * length of the input sequence. 763 * 764 * @return @code true if, and only if, a subsequence of the input 765 * sequence starting at the given index matches this matcher‘s 766 * pattern 767 */ 768 public boolean find(int start) 769 int limit = getTextLength(); 770 if ((start < 0) || (start > limit)) 771 throw new IndexOutOfBoundsException("Illegal start index"); 772 reset(); 773 return search(start); 774 775 776 /** 777 * Attempts to match the input sequence, starting at the beginning of the 778 * region, against the pattern. 779 * 780 * <p> Like the @link #matches matches method, this method always starts 781 * at the beginning of the region; unlike that method, it does not 782 * require that the entire region be matched. 783 * 784 * <p> If the match succeeds then more information can be obtained via the 785 * @code start, @code end, and @code group methods. </p> 786 * 787 * @return @code true if, and only if, a prefix of the input 788 * sequence matches this matcher‘s pattern 789 */ 790 public boolean lookingAt() 791 return match(from, NOANCHOR); 792 793 794 /** 795 * Returns a literal replacement @code String for the specified 796 * @code String. 797 * 798 * This method produces a @code String that will work 799 * as a literal replacement @code s in the 800 * @code appendReplacement method of the @link Matcher class. 801 * The @code String produced will match the sequence of characters 802 * in @code s treated as a literal sequence. Slashes (‘\‘) and 803 * dollar signs (‘$‘) will be given no special meaning. 804 * 805 * @param s The string to be literalized 806 * @return A literal string replacement 807 * @since 1.5 808 */ 809 public static String quoteReplacement(String s) 810 if ((s.indexOf(‘\\‘) == -1) && (s.indexOf(‘$‘) == -1)) 811 return s; 812 StringBuilder sb = new StringBuilder(); 813 for (int i=0; i<s.length(); i++) 814 char c = s.charAt(i); 815 if (c == ‘\\‘ || c == ‘$‘) 816 sb.append(‘\\‘); 817 818 sb.append(c); 819 820 return sb.toString(); 821 822 823 /** 824 * Implements a non-terminal append-and-replace step. 825 * 826 * <p> This method performs the following actions: </p> 827 * 828 * <ol> 829 * 830 * <li><p> It reads characters from the input sequence, starting at the 831 * append position, and appends them to the given string buffer. It 832 * stops after reading the last character preceding the previous match, 833 * that is, the character at index @link 834 * #start() @code - @code 1. </p></li> 835 * 836 * <li><p> It appends the given replacement string to the string buffer. 837 * </p></li> 838 * 839 * <li><p> It sets the append position of this matcher to the index of 840 * the last character matched, plus one, that is, to @link #end(). 841 * </p></li> 842 * 843 * </ol> 844 * 845 * <p> The replacement string may contain references to subsequences 846 * captured during the previous match: Each occurrence of 847 * <code>$</code><i>name</i><code></code> or @code $<i>g</i> 848 * will be replaced by the result of evaluating the corresponding 849 * @link #group(String) group(name) or @link #group(int) group(g) 850 * respectively. For @code $<i>g</i>, 851 * the first number after the @code $ is always treated as part of 852 * the group reference. Subsequent numbers are incorporated into g if 853 * they would form a legal group reference. Only the numerals ‘0‘ 854 * through ‘9‘ are considered as potential components of the group 855 * reference. If the second group matched the string @code "foo", for 856 * example, then passing the replacement string @code "$2bar" would 857 * cause @code "foobar" to be appended to the string buffer. A dollar 858 * sign (@code $) may be included as a literal in the replacement 859 * string by preceding it with a backslash (@code \$). 860 * 861 * <p> Note that backslashes (@code \) and dollar signs (@code $) in 862 * the replacement string may cause the results to be different than if it 863 * were being treated as a literal replacement string. Dollar signs may be 864 * treated as references to captured subsequences as described above, and 865 * backslashes are used to escape literal characters in the replacement 866 * string. 867 * 868 * <p> This method is intended to be used in a loop together with the 869 * @link #appendTail(StringBuffer) appendTail and @link #find() find 870 * methods. The following code, for example, writes @code one dog two dogs 871 * in the yard to the standard-output stream: </p> 872 * 873 * <blockquote><pre> 874 * Pattern p = Pattern.compile("cat"); 875 * Matcher m = p.matcher("one cat two cats in the yard"); 876 * StringBuffer sb = new StringBuffer(); 877 * while (m.find()) 878 * m.appendReplacement(sb, "dog"); 879 * 880 * m.appendTail(sb); 881 * System.out.println(sb.toString());</pre></blockquote> 882 * 883 * @param sb 884 * The target string buffer 885 * 886 * @param replacement 887 * The replacement string 888 * 889 * @return This matcher 890 * 891 * @throws IllegalStateException 892 * If no match has yet been attempted, 893 * or if the previous match operation failed 894 * 895 * @throws IllegalArgumentException 896 * If the replacement string refers to a named-capturing 897 * group that does not exist in the pattern 898 * 899 * @throws IndexOutOfBoundsException 900 * If the replacement string refers to a capturing group 901 * that does not exist in the pattern 902 */ 903 public Matcher appendReplacement(StringBuffer sb, String replacement) 904 // If no match, return error 905 if (first < 0) 906 throw new IllegalStateException("No match available"); 907 StringBuilder result = new StringBuilder(); 908 appendExpandedReplacement(replacement, result); 909 // Append the intervening text 910 sb.append(text, lastAppendPosition, first); 911 // Append the match substitution 912 sb.append(result); 913 lastAppendPosition = last; 914 modCount++; 915 return this; 916 917 918 /** 919 * Implements a non-terminal append-and-replace step. 920 * 921 * <p> This method performs the following actions: </p> 922 * 923 * <ol> 924 * 925 * <li><p> It reads characters from the input sequence, starting at the 926 * append position, and appends them to the given string builder. It 927 * stops after reading the last character preceding the previous match, 928 * that is, the character at index @link 929 * #start() @code - @code 1. </p></li> 930 * 931 * <li><p> It appends the given replacement string to the string builder. 932 * </p></li> 933 * 934 * <li><p> It sets the append position of this matcher to the index of 935 * the last character matched, plus one, that is, to @link #end(). 936 * </p></li> 937 * 938 * </ol> 939 * 940 * <p> The replacement string may contain references to subsequences 941 * captured during the previous match: Each occurrence of 942 * @code $<i>g</i> will be replaced by the result of 943 * evaluating @link #group(int) group@code (<i>g</i>@code ). 944 * The first number after the @code $ is always treated as part of 945 * the group reference. Subsequent numbers are incorporated into g if 946 * they would form a legal group reference. Only the numerals ‘0‘ 947 * through ‘9‘ are considered as potential components of the group 948 * reference. If the second group matched the string @code "foo", for 949 * example, then passing the replacement string @code "$2bar" would 950 * cause @code "foobar" to be appended to the string builder. A dollar 951 * sign (@code $) may be included as a literal in the replacement 952 * string by preceding it with a backslash (@code \$). 953 * 954 * <p> Note that backslashes (@code \) and dollar signs (@code $) in 955 * the replacement string may cause the results to be different than if it 956 * were being treated as a literal replacement string. Dollar signs may be 957 * treated as references to captured subsequences as described above, and 958 * backslashes are used to escape literal characters in the replacement 959 * string. 960 * 961 * <p> This method is intended to be used in a loop together with the 962 * @link #appendTail(StringBuilder) appendTail and 963 * @link #find() find methods. The following code, for example, writes 964 * @code one dog two dogs in the yard to the standard-output stream: </p> 965 * 966 * <blockquote><pre> 967 * Pattern p = Pattern.compile("cat"); 968 * Matcher m = p.matcher("one cat two cats in the yard"); 969 * StringBuilder sb = new StringBuilder(); 970 * while (m.find()) 971 * m.appendReplacement(sb, "dog"); 972 * 973 * m.appendTail(sb); 974 * System.out.println(sb.toString());</pre></blockquote> 975 * 976 * @param sb 977 * The target string builder 978 * @param replacement 979 * The replacement string 980 * @return This matcher 981 * 982 * @throws IllegalStateException 983 * If no match has yet been attempted, 984 * or if the previous match operation failed 985 * @throws IllegalArgumentException 986 * If the replacement string refers to a named-capturing 987 * group that does not exist in the pattern 988 * @throws IndexOutOfBoundsException 989 * If the replacement string refers to a capturing group 990 * that does not exist in the pattern 991 * @since 9 992 */ 993 public Matcher appendReplacement(StringBuilder sb, String replacement) 994 // If no match, return error 995 if (first < 0) 996 throw new IllegalStateException("No match available"); 997 StringBuilder result = new StringBuilder(); 998 appendExpandedReplacement(replacement, result); 999 // Append the intervening text 1000 sb.append(text, lastAppendPosition, first); 1001 // Append the match substitution 1002 sb.append(result); 1003 lastAppendPosition = last; 1004 modCount++; 1005 return this; 1006 1007 1008 /** 1009 * Processes replacement string to replace group references with 1010 * groups. 1011 */ 1012 private StringBuilder appendExpandedReplacement( 1013 String replacement, StringBuilder result) 1014 int cursor = 0; 1015 while (cursor < replacement.length()) 1016 char nextChar = replacement.charAt(cursor); 1017 if (nextChar == ‘\\‘) 1018 cursor++; 1019 if (cursor == replacement.length()) 1020 throw new IllegalArgumentException( 1021 "character to be escaped is missing"); 1022 nextChar = replacement.charAt(cursor); 1023 result.append(nextChar); 1024 cursor++; 1025 else if (nextChar == ‘$‘) 1026 // Skip past $ 1027 cursor++; 1028 // Throw IAE if this "$" is the last character in replacement 1029 if (cursor == replacement.length()) 1030 throw new IllegalArgumentException( 1031 "Illegal group reference: group index is missing"); 1032 nextChar = replacement.charAt(cursor); 1033 int refNum = -1; 1034 if (nextChar == ‘‘) 1035 cursor++; 1036 StringBuilder gsb = new StringBuilder(); 1037 while (cursor < replacement.length()) 1038 nextChar = replacement.charAt(cursor); 1039 if (ASCII.isLower(nextChar) || 1040 ASCII.isUpper(nextChar) || 1041 ASCII.isDigit(nextChar)) 1042 gsb.append(nextChar); 1043 cursor++; 1044 else 1045 break; 1046 1047 1048 if (gsb.length() == 0) 1049 throw new IllegalArgumentException( 1050 "named capturing group has 0 length name"); 1051 if (nextChar != ‘‘) 1052 throw new IllegalArgumentException( 1053 "named capturing group is missing trailing ‘‘"); 1054 String gname = gsb.toString(); 1055 if (ASCII.isDigit(gname.charAt(0))) 1056 throw new IllegalArgumentException( 1057 "capturing group name " + gname + 1058 " starts with digit character"); 1059 if (!parentPattern.namedGroups().containsKey(gname)) 1060 throw new IllegalArgumentException( 1061 "No group with name " + gname + ""); 1062 refNum = parentPattern.namedGroups().get(gname); 1063 cursor++; 1064 else 1065 // The first number is always a group 1066 refNum = nextChar - ‘0‘; 1067 if ((refNum < 0) || (refNum > 9)) 1068 throw new IllegalArgumentException( 1069 "Illegal group reference"); 1070 cursor++; 1071 // Capture the largest legal group string 1072 boolean done = false; 1073 while (!done) 1074 if (cursor >= replacement.length()) 1075 break; 1076 1077 int nextDigit = replacement.charAt(cursor) - ‘0‘; 1078 if ((nextDigit < 0) || (nextDigit > 9)) // not a number 1079 break; 1080 1081 int newRefNum = (refNum * 10) + nextDigit; 1082 if (groupCount() < newRefNum) 1083 done = true; 1084 else 1085 refNum = newRefNum; 1086 cursor++; 1087 1088 1089 1090 // Append group 1091 if (start(refNum) != -1 && end(refNum) != -1) 1092 result.append(text, start(refNum), end(refNum)); 1093 else 1094 result.append(nextChar); 1095 cursor++; 1096 1097 1098 return result; 1099 1100 1101 /** 1102 * Implements a terminal append-and-replace step. 1103 * 1104 * <p> This method reads characters from the input sequence, starting at 1105 * the append position, and appends them to the given string buffer. It is 1106 * intended to be invoked after one or more invocations of the @link 1107 * #appendReplacement(StringBuffer, String) appendReplacement method in 1108 * order to copy the remainder of the input sequence. </p> 1109 * 1110 * @param sb 1111 * The target string buffer 1112 * 1113 * @return The target string buffer 1114 */ 1115 public StringBuffer appendTail(StringBuffer sb) 1116 sb.append(text, lastAppendPosition, getTextLength()); 1117 return sb; 1118 1119 1120 /** 1121 * Implements a terminal append-and-replace step. 1122 * 1123 * <p> This method reads characters from the input sequence, starting at 1124 * the append position, and appends them to the given string builder. It is 1125 * intended to be invoked after one or more invocations of the @link 1126 * #appendReplacement(StringBuilder, String) 1127 * appendReplacement method in order to copy the remainder of the input 1128 * sequence. </p> 1129 * 1130 * @param sb 1131 * The target string builder 1132 * 1133 * @return The target string builder 1134 * 1135 * @since 9 1136 */ 1137 public StringBuilder appendTail(StringBuilder sb) 1138 sb.append(text, lastAppendPosition, getTextLength()); 1139 return sb; 1140 1141 1142 /** 1143 * Replaces every subsequence of the input sequence that matches the 1144 * pattern with the given replacement string. 1145 * 1146 * <p> This method first resets this matcher. It then scans the input 1147 * sequence looking for matches of the pattern. Characters that are not 1148 * part of any match are appended directly to the result string; each match 1149 * is replaced in the result by the replacement string. The replacement 1150 * string may contain references to captured subsequences as in the @link 1151 * #appendReplacement appendReplacement method. 1152 * 1153 * <p> Note that backslashes (@code \) and dollar signs (@code $) in 1154 * the replacement string may cause the results to be different than if it 1155 * were being treated as a literal replacement string. Dollar signs may be 1156 * treated as references to captured subsequences as described above, and 1157 * backslashes are used to escape literal characters in the replacement 1158 * string. 1159 * 1160 * <p> Given the regular expression @code a*b, the input 1161 * @code "aabfooaabfooabfoob", and the replacement string 1162 * @code "-", an invocation of this method on a matcher for that 1163 * expression would yield the string @code "-foo-foo-foo-". 1164 * 1165 * <p> Invoking this method changes this matcher‘s state. If the matcher 1166 * is to be used in further matching operations then it should first be 1167 * reset. </p> 1168 * 1169 * @param replacement 1170 * The replacement string 1171 * 1172 * @return The string constructed by replacing each matching subsequence 1173 * by the replacement string, substituting captured subsequences 1174 * as needed 1175 */ 1176 public String replaceAll(String replacement) 1177 reset(); 1178 boolean result = find(); 1179 if (result) 1180 StringBuilder sb = new StringBuilder(); 1181 do 1182 appendReplacement(sb, replacement); 1183 result = find(); 1184 while (result); 1185 appendTail(sb); 1186 return sb.toString(); 1187 1188 return text.toString(); 1189 1190 1191 /** 1192 * Replaces every subsequence of the input sequence that matches the 1193 * pattern with the result of applying the given replacer function to the 1194 * match result of this matcher corresponding to that subsequence. 1195 * Exceptions thrown by the function are relayed to the caller. 1196 * 1197 * <p> This method first resets this matcher. It then scans the input 1198 * sequence looking for matches of the pattern. Characters that are not 1199 * part of any match are appended directly to the result string; each match 1200 * is replaced in the result by the applying the replacer function that 1201 * returns a replacement string. Each replacement string may contain 1202 * references to captured subsequences as in the @link #appendReplacement 1203 * appendReplacement method. 1204 * 1205 * <p> Note that backslashes (@code \) and dollar signs (@code $) in 1206 * a replacement string may cause the results to be different than if it 1207 * were being treated as a literal replacement string. Dollar signs may be 1208 * treated as references to captured subsequences as described above, and 1209 * backslashes are used to escape literal characters in the replacement 1210 * string. 1211 * 1212 * <p> Given the regular expression @code dog, the input 1213 * @code "zzzdogzzzdogzzz", and the function 1214 * @code mr -> mr.group().toUpperCase(), an invocation of this method on 1215 * a matcher for that expression would yield the string 1216 * @code "zzzDOGzzzDOGzzz". 1217 * 1218 * <p> Invoking this method changes this matcher‘s state. If the matcher 1219 * is to be used in further matching operations then it should first be 1220 * reset. </p> 1221 * 1222 * <p> The replacer function should not modify this matcher‘s state during 1223 * replacement. This method will, on a best-effort basis, throw a 1224 * @link java.util.ConcurrentModificationException if such modification is 1225 * detected. 1226 * 1227 * <p> The state of each match result passed to the replacer function is 1228 * guaranteed to be constant only for the duration of the replacer function 1229 * call and only if the replacer function does not modify this matcher‘s 1230 * state. 1231 * 1232 * @implNote 1233 * This implementation applies the replacer function to this matcher, which 1234 * is an instance of @code MatchResult. 1235 * 1236 * @param replacer 1237 * The function to be applied to the match result of this matcher 1238 * that returns a replacement string. 1239 * @return The string constructed by replacing each matching subsequence 1240 * with the result of applying the replacer function to that 1241 * matched subsequence, substituting captured subsequences as 1242 * needed. 1243 * @throws NullPointerException if the replacer function is null 1244 * @throws ConcurrentModificationException if it is detected, on a 1245 * best-effort basis, that the replacer function modified this 1246 * matcher‘s state 1247 * @since 9 1248 */ 1249 public String replaceAll(Function<MatchResult, String> replacer) 1250 Objects.requireNonNull(replacer); 1251 reset(); 1252 boolean result = find(); 1253 if (result) 1254 StringBuilder sb = new StringBuilder(); 1255 do 1256 int ec = modCount; 1257 String replacement = replacer.apply(this); 1258 if (ec != modCount) 1259 throw new ConcurrentModificationException(); 1260 appendReplacement(sb, replacement); 1261 result = find(); 1262 while (result); 1263 appendTail(sb); 1264 return sb.toString(); 1265 1266 return text.toString(); 1267 1268 1269 /** 1270 * Returns a stream of match results for each subsequence of the input 1271 * sequence that matches the pattern. The match results occur in the 1272 * same order as the matching subsequences in the input sequence. 1273 * 1274 * <p> Each match result is produced as if by @link #toMatchResult(). 1275 * 1276 * <p> This method does not reset this matcher. Matching starts on 1277 * initiation of the terminal stream operation either at the beginning of 1278 * this matcher‘s region, or, if the matcher has not since been reset, at 1279 * the first character not matched by a previous match. 1280 * 1281 * <p> If the matcher is to be used for further matching operations after 1282 * the terminal stream operation completes then it should be first reset. 1283 * 1284 * <p> This matcher‘s state should not be modified during execution of the 1285 * returned stream‘s pipeline. The returned stream‘s source 1286 * @code Spliterator is <em>fail-fast</em> and will, on a best-effort 1287 * basis, throw a @link java.util.ConcurrentModificationException if such 1288 * modification is detected. 1289 * 1290 * @return a sequential stream of match results. 1291 * @since 9 1292 */ 1293 public Stream<MatchResult> results() 1294 class MatchResultIterator implements Iterator<MatchResult> 1295 // -ve for call to find, 0 for not found, 1 for found 1296 int state = -1; 1297 // State for concurrent modification checking 1298 // -1 for uninitialized 1299 int expectedCount = -1; 1300 // The input sequence as a string, set once only after first find 1301 // Avoids repeated conversion from CharSequence for each match 1302 String textAsString; 1303 1304 @Override 1305 public MatchResult next() 1306 if (expectedCount >= 0 && expectedCount != modCount) 1307 throw new ConcurrentModificationException(); 1308 1309 if (!hasNext()) 1310 throw new NoSuchElementException(); 1311 1312 state = -1; 1313 return toMatchResult(textAsString); 1314 1315 1316 @Override 1317 public boolean hasNext() 1318 if (state >= 0) 1319 return state == 1; 1320 1321 // Defer throwing ConcurrentModificationException to when next 1322 // or forEachRemaining is called. The is consistent with other 1323 // fail-fast implementations. 1324 if (expectedCount >= 0 && expectedCount != modCount) 1325 return true; 1326 1327 boolean found = find(); 1328 // Capture the input sequence as a string on first find 1329 if (found && state < 0) 1330 textAsString = text.toString(); 1331 state = found ? 1 : 0; 1332 expectedCount = modCount; 1333 return found; 1334 1335 1336 @Override 1337 public void forEachRemaining(Consumer<? super MatchResult> action) 1338 if (expectedCount >= 0 && expectedCount != modCount) 1339 throw new ConcurrentModificationException(); 1340 1341 int s = state; 1342 if (s == 0) 1343 return; 1344 1345 // Set state to report no more elements on further operations 1346 state = 0; 1347 expectedCount = -1; 1348 1349 // Perform a first find if required 1350 if (s < 0 && !find()) 1351 return; 1352 1353 // Capture the input sequence as a string on first find 1354 textAsString = text.toString(); 1355 1356 do 1357 int ec = modCount; 1358 action.accept(toMatchResult(textAsString)); 1359 if (ec != modCount) 1360 throw new ConcurrentModificationException(); 1361 while (find()); 1362 1363 1364 return StreamSupport.stream(Spliterators.spliteratorUnknownSize( 1365 new MatchResultIterator(), Spliterator.ORDERED | Spliterator.NONNULL), false); 1366 1367 1368 /** 1369 * Replaces the first subsequence of the input sequence that matches the 1370 * pattern with the given replacement string. 1371 * 1372 * <p> This method first resets this matcher. It then scans the input 1373 * sequence looking for a match of the pattern. Characters that are not 1374 * part of the match are appended directly to the result string; the match 1375 * is replaced in the result by the replacement string. The replacement 1376 * string may contain references to captured subsequences as in the @link 1377 * #appendReplacement appendReplacement method. 1378 * 1379 * <p>Note that backslashes (@code \) and dollar signs (@code $) in 1380 * the replacement string may cause the results to be different than if it 1381 * were being treated as a literal replacement string. Dollar signs may be 1382 * treated as references to captured subsequences as described above, and 1383 * backslashes are used to escape literal characters in the replacement 1384 * string. 1385 * 1386 * <p> Given the regular expression @code dog, the input 1387 * @code "zzzdogzzzdogzzz", and the replacement string 1388 * @code "cat", an invocation of this method on a matcher for that 1389 * expression would yield the string @code "zzzcatzzzdogzzz". </p> 1390 * 1391 * <p> Invoking this method changes this matcher‘s state. If the matcher 1392 * is to be used in further matching operations then it should first be 1393 * reset. </p> 1394 * 1395 * @param replacement 1396 * The replacement string 1397 * @return The string constructed by replacing the first matching 1398 * subsequence by the replacement string, substituting captured 1399 * subsequences as needed 1400 */ 1401 public String replaceFirst(String replacement) 1402 if (replacement == null) 1403 throw new NullPointerException("replacement"); 1404 reset(); 1405 if (!find()) 1406 return text.toString(); 1407 StringBuilder sb = new StringBuilder(); 1408 appendReplacement(sb, replacement); 1409 appendTail(sb); 1410 return sb.toString(); 1411 1412 1413 /** 1414 * Replaces the first subsequence of the input sequence that matches the 1415 * pattern with the result of applying the given replacer function to the 1416 * match result of this matcher corresponding to that subsequence. 1417 * Exceptions thrown by the replace function are relayed to the caller. 1418 * 1419 * <p> This method first resets this matcher. It then scans the input 1420 * sequence looking for a match of the pattern. Characters that are not 1421 * part of the match are appended directly to the result string; the match 1422 * is replaced in the result by the applying the replacer function that 1423 * returns a replacement string. The replacement string may contain 1424 * references to captured subsequences as in the @link #appendReplacement 1425 * appendReplacement method. 1426 * 1427 * <p>Note that backslashes (@code \) and dollar signs (@code $) in 1428 * the replacement string may cause the results to be different than if it 1429 * were being treated as a literal replacement string. Dollar signs may be 1430 * treated as references to captured subsequences as described above, and 1431 * backslashes are used to escape literal characters in the replacement 1432 * string. 1433 * 1434 * <p> Given the regular expression @code dog, the input 1435 * @code "zzzdogzzzdogzzz", and the function 1436 * @code mr -> mr.group().toUpperCase(), an invocation of this method on 1437 * a matcher for that expression would yield the string 1438 * @code "zzzDOGzzzdogzzz". 1439 * 1440 * <p> Invoking this method changes this matcher‘s state. If the matcher 1441 * is to be used in further matching operations then it should first be 1442 * reset. 1443 * 1444 * <p> The replacer function should not modify this matcher‘s state during 1445 * replacement. This method will, on a best-effort basis, throw a 1446 * @link java.util.ConcurrentModificationException if such modification is 1447 * detected. 1448 * 1449 * <p> The state of the match result passed to the replacer function is 1450 * guaranteed to be constant only for the duration of the replacer function 1451 * call and only if the replacer function does not modify this matcher‘s 1452 * state. 1453 * 1454 * @implNote 1455 * This implementation applies the replacer function to this matcher, which 1456 * is an instance of @code MatchResult. 1457 * 1458 * @param replacer 1459 * The function to be applied to the match result of this matcher 1460 * that returns a replacement string. 1461 * @return The string constructed by replacing the first matching 1462 * subsequence with the result of applying the replacer function to 1463 * the matched subsequence, substituting captured subsequences as 1464 * needed. 1465 * @throws NullPointerException if the replacer function is null 1466 * @throws ConcurrentModificationException if it is detected, on a 1467 * best-effort basis, that the replacer function modified this 1468 * matcher‘s state 1469 * @since 9 1470 */ 1471 public String replaceFirst(Function<MatchResult, String> replacer) 1472 Objects.requireNonNull(replacer); 1473 reset(); 1474 if (!find()) 1475 return text.toString(); 1476 StringBuilder sb = new StringBuilder(); 1477 int ec = modCount; 1478 String replacement = replacer.apply(this); 1479 if (ec != modCount) 1480 throw new ConcurrentModificationException(); 1481 appendReplacement(sb, replacement); 1482 appendTail(sb); 1483 return sb.toString(); 1484 1485 1486 /** 1487 * Sets the limits of this matcher‘s region. The region is the part of the 1488 * input sequence that will be searched to find a match. Invoking this 1489 * method resets the matcher, and then sets the region to start at the 1490 * index specified by the @code start parameter and end at the 1491 * index specified by the @code end parameter. 1492 * 1493 * <p>Depending on the transparency and anchoring being used (see 1494 * @link #useTransparentBounds(boolean) useTransparentBounds and 1495 * @link #useAnchoringBounds(boolean) useAnchoringBounds), certain 1496 * constructs such as anchors may behave differently at or around the 1497 * boundaries of the region. 1498 * 1499 * @param start 1500 * The index to start searching at (inclusive) 1501 * @param end 1502 * The index to end searching at (exclusive) 1503 * @throws IndexOutOfBoundsException 1504 * If start or end is less than zero, if 1505 * start is greater than the length of the input sequence, if 1506 * end is greater than the length of the input sequence, or if 1507 * start is greater than end. 1508 * @return this matcher 1509 * @since 1.5 1510 */ 1511 public Matcher region(int start, int end) 1512 if ((start < 0) || (start > getTextLength())) 1513 throw new IndexOutOfBoundsException("start"); 1514 if ((end < 0) || (end > getTextLength())) 1515 throw new IndexOutOfBoundsException("end"); 1516 if (start > end) 1517 throw new IndexOutOfBoundsException("start > end"); 1518 reset(); 1519 from = start; 1520 to = end; 1521 return this; 1522 1523 1524 /** 1525 * Reports the start index of this matcher‘s region. The 1526 * searches this matcher conducts are limited to finding matches 1527 * within @link #regionStart() regionStart (inclusive) and 1528 * @link #regionEnd() regionEnd (exclusive). 1529 * 1530 * @return The starting point of this matcher‘s region 1531 * @since 1.5 1532 */ 1533 public int regionStart() 1534 return from; 1535 1536 1537 /** 1538 * Reports the end index (exclusive) of this matcher‘s region. 1539 * The searches this matcher conducts are limited to finding matches 1540 * within @link #regionStart() regionStart (inclusive) and 1541 * @link #regionEnd() regionEnd (exclusive). 1542 * 1543 * @return the ending point of this matcher‘s region 1544 * @since 1.5 1545 */ 1546 public int regionEnd() 1547 return to; 1548 1549 1550 /** 1551 * Queries the transparency of region bounds for this matcher. 1552 * 1553 * <p> This method returns @code true if this matcher uses 1554 * <i>transparent</i> bounds, @code false if it uses <i>opaque</i> 1555 * bounds. 1556 * 1557 * <p> See @link #useTransparentBounds(boolean) useTransparentBounds for a 1558 * description of transparent and opaque bounds. 1559 * 1560 * <p> By default, a matcher uses opaque region boundaries. 1561 * 1562 * @return @code true iff this matcher is using transparent bounds, 1563 * @code false otherwise. 1564 * @see java.util.regex.Matcher#useTransparentBounds(boolean) 1565 * @since 1.5 1566 */ 1567 public boolean hasTransparentBounds() 1568 return transparentBounds; 1569 1570 1571 /** 1572 * Sets the transparency of region bounds for this matcher. 1573 * 1574 * <p> Invoking this method with an argument of @code true will set this 1575 * matcher to use <i>transparent</i> bounds. If the boolean 1576 * argument is @code false, then <i>opaque</i> bounds will be used. 1577 * 1578 * <p> Using transparent bounds, the boundaries of this 1579 * matcher‘s region are transparent to lookahead, lookbehind, 1580 * and boundary matching constructs. Those constructs can see beyond the 1581 * boundaries of the region to see if a match is appropriate. 1582 * 1583 * <p> Using opaque bounds, the boundaries of this matcher‘s 1584 * region are opaque to lookahead, lookbehind, and boundary matching 1585 * constructs that may try to see beyond them. Those constructs cannot 1586 * look past the boundaries so they will fail to match anything outside 1587 * of the region. 1588 * 1589 * <p> By default, a matcher uses opaque bounds. 1590 * 1591 * @param b a boolean indicating whether to use opaque or transparent 1592 * regions 1593 * @return this matcher 1594 * @see java.util.regex.Matcher#hasTransparentBounds 1595 * @since 1.5 1596 */ 1597 public Matcher useTransparentBounds(boolean b) 1598 transparentBounds = b; 1599 return this; 1600 1601 1602 /** 1603 * Queries the anchoring of region bounds for this matcher. 1604 * 1605 * <p> This method returns @code true if this matcher uses 1606 * <i>anchoring</i> bounds, @code false otherwise. 1607 * 1608 * <p> See @link #useAnchoringBounds(boolean) useAnchoringBounds for a 1609 * description of anchoring bounds. 1610 * 1611 * <p> By default, a matcher uses anchoring region boundaries. 1612 * 1613 * @return @code true iff this matcher is using anchoring bounds, 1614 * @code false otherwise. 1615 * @see java.util.regex.Matcher#useAnchoringBounds(boolean) 1616 * @since 1.5 1617 */ 1618 public boolean hasAnchoringBounds() 1619 return anchoringBounds; 1620 1621 1622 /** 1623 * Sets the anchoring of region bounds for this matcher. 1624 * 1625 * <p> Invoking this method with an argument of @code true will set this 1626 * matcher to use <i>anchoring</i> bounds. If the boolean 1627 * argument is @code false, then <i>non-anchoring</i> bounds will be 1628 * used. 1629 * 1630 * <p> Using anchoring bounds, the boundaries of this 1631 * matcher‘s region match anchors such as ^ and $. 1632 * 1633 * <p> Without anchoring bounds, the boundaries of this 1634 * matcher‘s region will not match anchors such as ^ and $. 1635 * 1636 * <p> By default, a matcher uses anchoring region boundaries. 1637 * 1638 * @param b a boolean indicating whether or not to use anchoring bounds. 1639 * @return this matcher 1640 * @see java.util.regex.Matcher#hasAnchoringBounds 1641 * @since 1.5 1642 */ 1643 public Matcher useAnchoringBounds(boolean b) 1644 anchoringBounds = b; 1645 return this; 1646 1647 1648 /** 1649 * <p>Returns the string representation of this matcher. The 1650 * string representation of a @code Matcher contains information 1651 * that may be useful for debugging. The exact format is unspecified. 1652 * 1653 * @return The string representation of this matcher 1654 * @since 1.5 1655 */ 1656 public String toString() 1657 StringBuilder sb = new StringBuilder(); 1658 sb.append("java.util.regex.Matcher") 1659 .append("[pattern=").append(pattern()) 1660 .append(" region=") 1661 .append(regionStart()).append(‘,‘).append(regionEnd()) 1662 .append(" lastmatch="); 1663 if ((first >= 0) && (group() != null)) 1664 sb.append(group()); 1665 1666 sb.append(‘]‘); 1667 return sb.toString(); 1668 1669 1670 /** 1671 * <p>Returns true if the end of input was hit by the search engine in 1672 * the last match operation performed by this matcher. 1673 * 1674 * <p>When this method returns true, then it is possible that more input 1675 * would have changed the result of the last search. 1676 * 1677 * @return true iff the end of input was hit in the last match; false 1678 * otherwise 1679 * @since 1.5 1680 */ 1681 public boolean hitEnd() 1682 return hitEnd; 1683 1684 1685 /** 1686 * <p>Returns true if more input could change a positive match into a 1687 * negative one. 1688 * 1689 * <p>If this method returns true, and a match was found, then more 1690 * input could cause the match to be lost. If this method returns false 1691 * and a match was found, then more input might change the match but the 1692 * match won‘t be lost. If a match was not found, then requireEnd has no 1693 * meaning. 1694 * 1695 * @return true iff more input could change a positive match into a 1696 * negative one. 1697 * @since 1.5 1698 */ 1699 public boolean requireEnd() 1700 return requireEnd; 1701 1702 1703 /** 1704 * Initiates a search to find a Pattern within the given bounds. 1705 * The groups are filled with default values and the match of the root 1706 * of the state machine is called. The state machine will hold the state 1707 * of the match as it proceeds in this matcher. 1708 * 1709 * Matcher.from is not set here, because it is the "hard" boundary 1710 * of the start of the search which anchors will set to. The from param 1711 * is the "soft" boundary of the start of the search, meaning that the 1712 * regex tries to match at that index but ^ won‘t match there. Subsequent 1713 * calls to the search methods start at a new "soft" boundary which is 1714 * the end of the previous match. 1715 */ 1716 boolean search(int from) 1717 this.hitEnd = false; 1718 this.requireEnd = false; 1719 from = from < 0 ? 0 : from; 1720 this.first = from; 1721 this.oldLast = oldLast < 0 ? from : oldLast; 1722 for (int i = 0; i < groups.length; i++) 1723 groups[i] = -1; 1724 for (int i = 0; i < localsPos.length; i++) 1725 if (localsPos[i] != null) 1726 localsPos[i].clear(); 1727 1728 acceptMode = NOANCHOR; 1729 boolean result = parentPattern.root.match(this, from, text); 1730 if (!result) 1731 this.first = -1; 1732 this.oldLast = this.last; 1733 this.modCount++; 1734 return result; 1735 1736 1737 /** 1738 * Initiates a search for an anchored match to a Pattern within the given 1739 * bounds. The groups are filled with default values and the match of the 1740 * root of the state machine is called. The state machine will hold the 1741 * state of the match as it proceeds in this matcher. 1742 */ 1743 boolean match(int from, int anchor) 1744 this.hitEnd = false; 1745 this.requireEnd = false; 1746 from = from < 0 ? 0 : from; 1747 this.first = from; 1748 this.oldLast = oldLast < 0 ? from : oldLast; 1749 for (int i = 0; i < groups.length; i++) 1750 groups[i] = -1; 1751 for (int i = 0; i < localsPos.length; i++) 1752 if (localsPos[i] != null) 1753 localsPos[i].clear(); 1754 1755 acceptMode = anchor; 1756 boolean result = parentPattern.matchRoot.match(this, from, text); 1757 if (!result) 1758 this.first = -1; 1759 this.oldLast = this.last; 1760 this.modCount++; 1761 return result; 1762 1763 1764 /** 1765 * Returns the end index of the text. 1766 * 1767 * @return the index after the last character in the text 1768 */ 1769 int getTextLength() 1770 return text.length(); 1771 1772 1773 /** 1774 * Generates a String from this matcher‘s input in the specified range. 1775 * 1776 * @param beginIndex the beginning index, inclusive 1777 * @param endIndex the ending index, exclusive 1778 * @return A String generated from this matcher‘s input 1779 */ 1780 CharSequence getSubSequence(int beginIndex, int endIndex) 1781 return text.subSequence(beginIndex, endIndex); 1782 1783 1784 /** 1785 * Returns this matcher‘s input character at index i. 1786 * 1787 * @return A char from the specified index 1788 */ 1789 char charAt(int i) 1790 return text.charAt(i); 1791 1792 1793 /** 1794 * Returns the group index of the matched capturing group. 1795 * 1796 * @return the index of the named-capturing group 1797 */ 1798 int getMatchedGroupIndex(String name) 1799 Objects.requireNonNull(name, "Group name"); 1800 if (first < 0) 1801 throw new IllegalStateException("No match found"); 1802 if (!parentPattern.namedGroups().containsKey(name)) 1803 throw new IllegalArgumentException("No group with name <" + name + ">"); 1804 return parentPattern.namedGroups().get(name); 1805 1806
以上是关于Matcher类(java JDK源码记录)的主要内容,如果未能解决你的问题,请参考以下文章