javascript JavaScript regexp正则表达式匹配

Posted

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了javascript JavaScript regexp正则表达式匹配相关的知识,希望对你有一定的参考价值。

//Full example: Creating our Own Location Object
//location object provides you with information about the current page: the href, host, port, protocol, etc. 
//this is purely for practice - in real world just use the preexisting location object!

var l = loc('http://www.somesite.com?somekey=somevalue&anotherkey=anothervalue#theHashGoesHere');

function loc(url) {
    return {
        search : function() {
            // search the url and return everything within the querystring
            return url.match(/\?(.+)/i)[1]; //returns "somekey=somevalue&anotherkey=anothervalue#theHashGoesHere"
            //searches through the string for the question mark, representing the beginning of our querystring. 
            //At this point, trap the remaining characters, (.+) wrapped within parentheses. 
            //Finally, we need to return only that block of characters, so we use [1] to target it.
        },

        hash : function() {
            return url.match(/#(.+)/i)[1]; // returns "theHashGoesHere"
            //search for the pound sign and trap the following characters within parentheses 
            //so that we can refer to only that specific subset - with [1].
        },

        protocol : function() {
            return url.match(/(ht|f)tps?:/i)[0]; // returns 'http:'
            // this is slightly more tricky, only because there are a few choices to compensate for: http, https, and ftp. 
            // Though we could do something like - (http|https|ftp) - it would be cleaner to do: (ht|f)tps? 
            // This designates that we should first find either an "ht" or the "f" character. 
            // Next, we match the "tp" characters. 
            // The final "s" should be optional, so we append a question mark, which signifies that 
            // there may be zero or one instance of the preceding character. 
        },

        href : function() {
            return url.match(/(.+\.[a-z]{2,4})/ig); // returns "http://www.somesite.com"
            // we're matching all characters up to the point where we find a period followed by two-four characters 
            // (representing com, au, edu, name, etc.). It's important to realize that we can make these expressions 
            // as complicated or as simple as we'd like. It all depends on how strict we must be.
        }
    }
}

//With that function created, we can easily alert each subsection by doing:

var l = loc('http://www.net.tutsplus.edu?key=value#hash');
alert(l.href()); // http://www.net.tutsplus.com
alert(l.protocol()); // http:

// Basics
. 		// Matches any character, except for line breaks if dotall is false.
* 		// Matches 0 or more of the preceding character.
+ 		// Matches 1 or more of the preceding character.
? 		// Preceding character is optional. Matches 0 or 1 occurrence.
\d 		// Matches any single digit
\w 		// Matches any word character (alphanumeric & underscore).
[XYZ] 	// Matches any single character from the character class.
[XYZ]+ 	// Matches one or more of any of the characters in the set.
$ 		// Matches the end of the string.
^ 		// Matches the beginning of a string.
[^a-z] 	// When inside of a character class, the ^ means NOT; in this case, match anything that is NOT a lowercase letter.


// Example RegExp for matching the HTML5 email format (what the input[type=email] looks for in its native validation)
/^[a-zA-Z0-9.!#$%&’*+/=?^_`{|}~-]+@[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*$/
// Use cases for testing RegExps in a few ways, including the most popular JavaScript APIs: .test(), .match() and .exec().



// --------------------
// Test()
//

// The fastest and the simplest to use -runs a search for a match between a RegExp and a String.
// returns a boolean (true/false) if the pattern you’re matching is present. *Doesn't return any data*
//If you don't necessarily need to perform an operation with the a specific matched result 
//- for instance, when validating a username - "test" will do the job just fine.
var username = 'JohnSmith';
alert(/[A-Za-z_-]+/.test(username)); // returns true
//regular expression: 
//- only allows upper and lower case letters, an underscore, and a dash.
//- wrap these accepted characters within [], which designates a character class. 
//- The "+" which proceeds it, signifies that we're looking for one or more of any of the preceding characters.

if (/^localhost/.test(window.location.host)) {
  // returns true if working locally
}


// --------------------
//
// Match()
// 
// best when you require or are expecting data back in a test result
// returns an array with the matches, or null if there are none.
// With .match(), you won’t just be testing for the presence of data,
// you’ll want to see if a data pattern exists, and return that data.
// Generally doesn’t describe the actual data you want, uses capture groups
// Capture groups match your data, and return the data inside the group to you
// Think of it as a kind of validator

// An example might be matching a credit card’s four-digit pattern and returning those digits.
var creditCardNumber = document.querySelector('input').value;
// returns the values in the input
// inside an ARRAY *if* matched
creditCardNumber.match(/\b(4[0-9]{12}(?:[0-9]{3})?)\b/);
// You can access items in the array instantly by specifying the index:
// notice the array index [1]
creditCardNumber.match(/\b(4[0-9]{12}(?:[0-9]{3})?)\b/)[1];


//will return an array containing each match found.
var name = 'JeffreyWay';
alert(name.match(/e/)); // alerts "e"
alert(name.match(/e/g)); // alerts "e,e"
alert(name.match(/e/g)[1]); // alerts "e"
//another example
var string = 'This is just a string with some 12345 and some !@#$ mixed in.';
alert(string.match(/[a-z]+/gi)); // alerts "This,is,just,a,string,with,some,and,some,mixed,in"
//matches one or more upper or lowercase letters - thanks to the "i" modifier. 
alert(matches[2]); // alerts "just"
// split an email address into its respective username and domain name: "nettuts," and "tutsplus."
var email = 'nettuts@tutsplus.com';
alert(email.replace(/([a-z\d_-]+)@([a-z\d_-]+)\.[a-z]{2,4}/ig, '$1, $2')); // alerts "nettuts, tutsplus"
//let's take it piece by piece
/([a-z\d_-]+)
//Starting from the middle, search for any letter, number, underscore, or dash, and match one ore more of them (+). 
//We'd like to access the value of whatever is matched here, so we wrap it within parentheses. 
//That way, we can reference this matched set later!
@([a-z\d_-]+)
//find the @ symbol, and then another set of one or more letters, numbers, underscore, and dashes. 
//Once again, we wrap that set within parentheses in order to access it later.
\.[a-z]{2,4}/ig,
//find a single period (we must escape it with "\" due to the fact that, in regular expressions, 
//it matches any character (sometimes excluding a line break). 
//The last part is to find the ".com." We know that the majority, if not all, domains will have a suffix range 
//of two - four characters (com, edu, net, name, etc.). 
//If we're aware of that specific range, we can forego using a more generic symbol like * or +, 
//and instead wrap the two numbers within curly braces, representing the minimum and maximum, respectively.
'$1, $2'
//second parameter of the replace method: what we'd like to replace the matched sets with. 
//we're using $1 and $2 to refer to what was stored within the first and second sets of parentheses


// --------------------
// .exec()
//
// similar to .match(), although it actually returns the part of the string you were looking to match. For instance, if I wanted to search a string for the word ‘Todd’, I could get it returned if it matches.
//Returns the matching pattern, almost ‘removing it from a string’

// Example: returns 'Todd'
/todd/i.exec('Hello, my name is Todd Motto');
You’ll notice I added in the /i at the end, this means it’s not case sensitive.



// --------------------
// .search()
//
// similar to the .exec() method, but will tell you the index value of where the match was found too.
// Returns the matching pattern’s index value, how far into the string it occurred

// Example: returns '18'
var str = 'Hello, my name is Todd Motto';
str.search(/todd/i);



// --------------------
// Split()
//
// Split is absolutely perfect for neat little tricks / splitting chunks of data when dealing with returned data, 
// using .split() will cut your string into two (or more) pieces -- Returns a new array
//Accepts a single regular expression which represents where the "split" should occur. 
//We can also use a string if we'd prefer.
var str = 'this is my string';
alert(str.split(/\s/)); // alerts "this, is, my, string"
// \s represents a single space - we've now split our string into an array.
alert(str.split(/\s/)[3]); // alerts "string"

// Example of splitting a string by its RegExp equivalent of whitespace:
// returns ["Hello,", "my", "name", "is", "Todd", "Motto"]
'Hello, my name is Todd Motto'.split(/\s/g);


// --------------------
// Replace()
//
//allows you to replace a certain block of text, represented by a string or regular expression, with a different string.
//change the string "Hello, World" to "Hello, Universe,
var someString = 'Hello, World';
someString = someString.replace(/World/, 'Universe');
//note that the replace method does not automatically overwrite the value the variable,
//we must reassign the returned value back to the variable

//another example: remove any symbols, quotation marks, semi-colons, etc. from a username
var username = 'J;ohnSmith;@%';
username = username.replace(/[^A-Za-z\d_-]+/, '');
alert(username); // JohnSmith;@%
//To tell the engine to continue searching the string for more matches, we add a "g" after our closing slash
//this modifier, or flag, stands for "global." Our revised code should now look like so:
username = username.replace(/[^A-Za-z\d_-]+/g, '');
alert(username); // alerts JohnSmith
// the carot ^ symbol inside a character class [brackets], means "find anything that IS NOT..." 
// so we say: find anything that is NOT a letter, number (represented by \d), an underscore, or a dash; 
// if you find a match, replace it with nothing (delete the character entirely).


// --------------------
// *It’s also interesting to note, that doing the following still produce 
// the same result and pass the if statement check:

// .test()
if (/^localhost/.test(window.location.host)) {
  // true, let's me through
}

// .match()
if (window.location.host.match(/^localhost/)) {
  // returns data (so, true in the if statement)
  // let's me through
}

// .search()
if (window.location.host.search(/^localhost/)) {
  // returns data (so, true in the if statement)
  // let's me through
}

以上是关于javascript JavaScript regexp正则表达式匹配的主要内容,如果未能解决你的问题,请参考以下文章

javascript reg_add_middle_start

javascript reg 不加入分组

javascript REG EX信用卡到期MM / YY

javascript RegExp_reg_exec_VS_str_match

JavaScript连载26-window和document

正则表达式备忘(基于JavaScript)