JavaScript 解析URI

Posted 2021-05-20
tags:
篇首语：本文由小常识网(cha138.com)小编为大家整理，主要介绍了JavaScript 解析URI相关的知识，希望对你有一定的参考价值。
//****************************************************************
//**************************** URI *******************************
//****************************************************************

//splits a URI into its parts
//returns null if str is not a valid URI
//does not support IPvFuture domains
//see RFC 3986 http://www.faqs.org/rfcs/rfc3986.html
function parseURI(str)
{
	if(!str) return null;
	
	var regexUri = /^([a-z0-9+.-]+):(?:\/\/(?:((?:[a-z0-9-._~!$&amp;'()*+,;=:]|%[0-9A-F]{2})*)@)?((?:[a-z0-9-._~!$&amp;'()*+,;=]|%[0-9A-F]{2})*|\[(?:[0-9A-F:.]{2,})\])(?::(\d*))?(\/(?:[a-z0-9-._~!$&amp;'()*+,;=:@\/]|%[0-9A-F]{2})*)?|(\/?(?:[a-z0-9-._~!$&amp;'()*+,;=:@]|%[0-9A-F]{2})+(?:[a-z0-9-._~!$&amp;'()*+,;=:@\/]|%[0-9A-F]{2})*)?)(?:\?((?:[a-z0-9-._~!$&amp;'()*+,;=:\/?@]|%[0-9A-F]{2})*))?(?:#((?:[a-z0-9-._~!$&amp;'()*+,;=:\/?@]|%[0-9A-F]{2})*))?$/i;
	//'
	/*composed as follows:
		^
		([a-z0-9+.-]+):											#scheme
		(?:
			\/\/												#it has an authority:
			(?:((?:[a-z0-9-._~!$&amp;'()*+,;=:]|%[0-9A-F]{2})*)@)?	#userinfo
			((?:[a-z0-9-._~!$&amp;'()*+,;=]|%[0-9A-F]{2})*|\[(?:[0-9A-F:.]{2,})\])	#host (loose check to allow for IPv6 addresses)
			(?::(\d*))?											#port
			(\/(?:[a-z0-9-._~!$&amp;'()*+,;=:@\/]|%[0-9A-F]{2})*)?	#path
			|
																#it doesn't have an authority:
			(\/?(?:[a-z0-9-._~!$&amp;'()*+,;=:@]|%[0-9A-F]{2})+(?:[a-z0-9-._~!$&amp;'()*+,;=:@\/]|%[0-9A-F]{2})*)?	#path
		)
		(?:
			\?((?:[a-z0-9-._~!$&amp;'()*+,;=:/?@]|%[0-9A-F]{2})*)	#query string
		)?
		(?:
			#((?:[a-z0-9-._~!$&amp;'()*+,;=:/?@]|%[0-9A-F]{2})*)	#fragment
		)?
		$
	*/
	if(!regexUri.test(str)) return null;	//invalid URI
	
	//these extra steps are required to check for validity of the host depending on if it's a URL or not,
	// since URLs allow IPv6 addresses (i.e., they allow '[', ':', and ']')
	var scheme = str.replace(regexUri, &quot;$1&quot;).toLowerCase();
	var host = str.replace(regexUri, &quot;$3&quot;);
	if(host &amp;&amp; (scheme == &quot;http&quot; || scheme == &quot;https&quot;))	//if it's a URL
	{
		if(!normalizeURLDomain(host)) return null;	//invalid host
	}
	else if(host)	//host may not include '[', ':', or ']'
	{
		if((/[:\[\]]/).test(host)) return null;	//invalid host
	}
	
	var parts = {
		uri: scheme+str.slice(scheme.length),	//make sure scheme is lower case
		scheme: scheme,
		authority: &quot;&quot;,	//userinfo@host:port
			userinfo: str.replace(regexUri, &quot;$2&quot;),
			host: host,
			port: str.replace(regexUri, &quot;$4&quot;),
		path: str.replace(regexUri, &quot;$5$6&quot;),
		query: str.replace(regexUri, &quot;$7&quot;),
		fragment: str.replace(regexUri, &quot;$8&quot;)
	};
	parts.authority = (parts.userinfo ? parts.userinfo+&quot;@&quot; : &quot;&quot;) + parts.host + (parts.port ? &quot;:&quot;+parts.port : &quot;&quot;);
	
	return parts;
}

//splits a query string into its name/value pairs
//returns a 2-D array
function parseQueryNumeric(str)
{
	var results = [];	//array of objects {name, value}
	
	var pairs = str.split(&quot;&amp;&quot;);
	var pair, j, result;
	for(var i=0; i&lt;pairs.length; i++)
	{
		pair = pairs[i].split(&quot;=&quot;);
		if(!pair[0]) continue;	//if there is no name, skip it
		result = {
			name: pair[0],
			value: &quot;&quot;
		};
		if(pair.length &gt; 0)	//if it has a value
		{
			result.value = pair[1];	//set the value
			for(j=2; j&lt;pair.length; j++)	//if there is more than one &quot;=&quot;, include its encoded form in the value
			{
				result.value += &quot;%3D&quot;+pair[j];
			}
		}
		results.push(result);
	}
	
	return results;
}
//splits a query string into its name/value pairs
//returns an associative array
//if there are multiple pairs with the same name, the last pair is used
function parseQueryAssociative(str)
{
	var results = {};	//associative array
	
	var pairs = str.split(&quot;&amp;&quot;);
	var pair, j, result;
	for(var i=0; i&lt;pairs.length; i++)
	{
		pair = pairs[i].split(&quot;=&quot;);
		if(!pair[0]) continue;	//if there is no name, skip it
		results[pair[0]] = &quot;&quot;;
		if(pair.length &gt; 0)	//if it has a value
		{
			results[pair[0]] = pair[1];	//set the value
			for(j=2; j&lt;pair.length; j++)	//if there is more than one &quot;=&quot;, include its encoded form in the value
			{
				results[pair[0]] += &quot;%3D&quot;+pair[j];
			}
		}
	}
	
	return results;
}

//****************************************************************
//**************************** URL *******************************
//****************************************************************

//splits a URL (i.e., http(s) scheme URI) into its parts
//returns null if str is not a valid URL
//does not support IPvFuture domains
//see RFC 2616 http://tools.ietf.org/html/rfc2616
//note: according to the RFC, fragments aren't part of a URL (they're only used by the browser, never sent to the server)
// but this function allows them anyway, of course
function parseURL(str)
{
	var uri = parseURI(str);
	if(!uri) return null;	//invalid URI
	if((uri.scheme != &quot;http&quot; &amp;&amp; uri.scheme != &quot;https&quot;) || !uri.authority) return null;	//it's not a URL
	if(!uri.host) return null;	//no domain
	
	var parts = {
		url: &quot;&quot;,
		protocol: uri.scheme,
		authority: &quot;&quot;,	//domain:port
			domain: normalizeURLDomain(uri.host),
			port: uri.port,	//defaults: http 80, https 443
		path: (normalizeURLPath(uri.path) || &quot;/&quot;),
		query: uri.query,
		anchor: uri.fragment
	};
	if(!parts.domain) return null;	//invalid domain
	parts.authority = parts.domain + (parts.port ? &quot;:&quot;+parts.port : &quot;&quot;);
	parts.url = parts.protocol + &quot;://&quot; + parts.authority + parts.path + (parts.query ? &quot;?&quot;+parts.query : &quot;&quot;) +
	 (parts.anchor ? &quot;#&quot;+parts.anchor : &quot;&quot;);
	
	return parts;
}

//converts an obscured URL domain to a more readable one
//returns &quot;&quot; if it's not a valid domain
//does not support IPvFuture domains
//see http://www.pc-help.org/obscure.htm
// and RFC 1123 http://tools.ietf.org/html/rfc1123#section-2   (Section 2.1)
// and RFC 952 http://tools.ietf.org/html/rfc952   (ASSUMPTIONS 1, GRAMMATICAL HOST TABLE SPECIFICATION)
// and RFC 2181 http://tools.ietf.org/html/rfc2181#section-11   (Section 11)
function normalizeURLDomain(domain)
{
	if(!domain) return &quot;&quot;;
	if(domain.toLowerCase() == &quot;localhost&quot;) return &quot;localhost&quot;;
	
	domain = domain.replace(/%3(\d)/g, &quot;$1&quot;);	//decimals
	//upper-case letters (converted to lower-case)
	domain = domain.replace(/%41/ig, &quot;a&quot;).replace(/%42/ig, &quot;b&quot;).replace(/%43/ig, &quot;c&quot;).replace(/%44/ig, &quot;d&quot;).replace(/%45/ig, &quot;e&quot;);
	domain = domain.replace(/%46/ig, &quot;f&quot;).replace(/%47/ig, &quot;g&quot;).replace(/%48/ig, &quot;h&quot;).replace(/%49/ig, &quot;i&quot;).replace(/%4A/ig, &quot;j&quot;);
	domain = domain.replace(/%4B/ig, &quot;k&quot;).replace(/%4C/ig, &quot;l&quot;).replace(/%4D/ig, &quot;m&quot;).replace(/%4E/ig, &quot;n&quot;).replace(/%4F/ig, &quot;o&quot;);
	domain = domain.replace(/%50/ig, &quot;p&quot;).replace(/%51/ig, &quot;q&quot;).replace(/%52/ig, &quot;r&quot;).replace(/%53/ig, &quot;s&quot;).replace(/%54/ig, &quot;t&quot;);
	domain = domain.replace(/%55/ig, &quot;u&quot;).replace(/%56/ig, &quot;v&quot;).replace(/%57/ig, &quot;w&quot;).replace(/%58/ig, &quot;x&quot;).replace(/%59/ig, &quot;y&quot;);
	domain = domain.replace(/%5A/ig, &quot;z&quot;);
	//lower-case letters
	domain = domain.replace(/%61/ig, &quot;a&quot;).replace(/%62/ig, &quot;b&quot;).replace(/%63/ig, &quot;c&quot;).replace(/%64/ig, &quot;d&quot;).replace(/%65/ig, &quot;e&quot;);
	domain = domain.replace(/%66/ig, &quot;f&quot;).replace(/%67/ig, &quot;g&quot;).replace(/%68/ig, &quot;h&quot;).replace(/%69/ig, &quot;i&quot;).replace(/%6A/ig, &quot;j&quot;);
	domain = domain.replace(/%6B/ig, &quot;k&quot;).replace(/%6C/ig, &quot;l&quot;).replace(/%6D/ig, &quot;m&quot;).replace(/%6E/ig, &quot;n&quot;).replace(/%6F/ig, &quot;o&quot;);
	domain = domain.replace(/%70/ig, &quot;p&quot;).replace(/%71/ig, &quot;q&quot;).replace(/%72/ig, &quot;r&quot;).replace(/%73/ig, &quot;s&quot;).replace(/%74/ig, &quot;t&quot;);
	domain = domain.replace(/%75/ig, &quot;u&quot;).replace(/%76/ig, &quot;v&quot;).replace(/%77/ig, &quot;w&quot;).replace(/%78/ig, &quot;x&quot;).replace(/%79/ig, &quot;y&quot;);
	domain = domain.replace(/%7A/ig, &quot;z&quot;);
	//allowed symbols
	domain = domain.replace(/%2D/ig, &quot;-&quot;).replace(/%2E/ig, &quot;.&quot;);
	domain = domain.replace(/%3A/ig, &quot;:&quot;).replace(/%5B/ig, &quot;[&quot;).replace(/%5D/ig, &quot;]&quot;);	//for IPv6 addresses
	if((/[^a-z0-9:\[\].-]/i).test(domain)) return &quot;&quot;;	//contains invalid characters
	
	var ip;
	if(ip = normalizeIPv4(domain)) return ip;	//it's a valid IPv4 address
	if(ip = normalizeIPv6(domain)) return ip;	//it's a valid IPv6 address
	
	//it's not an IP address
	if((/[:\[\]]/).test(domain)) return &quot;&quot;;	//contains invalid characters
	if(domain.length &gt; 255) return &quot;&quot;;	//too long
	//note: the spec doesn't allow a name to start with a digit, but this is not enforced
	if((/^[a-z0-9]([a-z0-9-]{0,61}[a-z0-9])?(\.[a-z0-9]([a-z0-9-]{0,61}[a-z0-9])?)+$/i).test(domain))
		return domain;	//valid domain
	return &quot;&quot;;	//invalid domain
}

function normalizeIPv4(ip)
{
	if(!(/^(\d+|0x[0-9A-F]+)(\.(\d+|0x[0-9A-F]+)){3}$/i).test(ip))	return '';	//invalid
	var parts = ip.split(&quot;.&quot;);
	var val, dwordToIp;
	var vals = [];
	for(var i=0; i&lt;parts.length; i++)	//for each part
	{
		val = parseInt(parts[i]);	//convert hex or octal to dword/decimal
		
		//if this is the last part and it's a dword
		//e.g., in an IP of 1192362298 or 71.1179962 or 71.18.314
		if(i == parts.length-1 &amp;&amp; i &lt; 3)
		{
			//convert dword to decimal parts
			//e.g., 1179962 becomes 18.1.58
			dwordToIp = [];
			while(i &lt; 4)
			{
				dwordToIp.unshift(val % 256);
				val = (val-dwordToIp[0]) / 256;
				i++;
			}
			vals = vals.concat(dwordToIp);
			break;
		}
		val = val % 256;
		vals.push(val);
	}
	return vals.join(&quot;.&quot;);	//valid IP address
}

//note: this includes the '[' and ']' characters on the ends of the IP (for use in a URL)
function normalizeIPv6(ip)
{
	if(ip.charAt(0) == '[' &amp;&amp; ip.charAt(ip.length-1) == ']') ip = ip.slice(1,ip.length-1);
	ip = ip.split('::');	//split the IP at the '::' shortcut (if it's used)
	if(ip.length &lt; 1 || ip.length &gt; 2) return '';	//invalid IP
	var x = ip[0].split(':');
	if(x.length &gt; 8 || (ip.length&gt;1 &amp;&amp; x.length+ip[1].split(':').length &gt; 7)) return '';	//invalid IP
	var a = [], b = [];
	for(var i=0; i&lt;x.length; i++)	//for each part left of '::' (or of the entire IP if '::' isn't used)
	{
		if(x[0] == '') break;	//there isn't anything on the left side
		if((/^[0-9A-F]{1,4}$/i).test(x[i])) a.push(normalizeIPv6.pad(x[i]));
		else if(ip.length==1 &amp;&amp; i == x.length-1 &amp;&amp; (x[i] = normalizeIPv6.v4to6(x[i])) != '')	//last part of entire IP is a ver. 4 IP
		{
			//converted x[i] to a ver. 6 IP
			a.push(x[i].substr(0,4));
			a.push(x[i].substr(4,4));
		}
		else return '';	//invalid IP
	}
	if(ip.length&gt;1)	//if the shortcut was used
	{
		x = ip[1].split(':');
		for(i=0; i&lt;x.length; i++)	//for each part right of '::'
		{
			if(x[0] == '') break;	//there isn't anything on the right side
			if((/^[0-9A-F]{1,4}$/i).test(x[i])) b.push(normalizeIPv6.pad(x[i]));
			else if(i == x.length-1 &amp;&amp; (x[i] = normalizeIPv6.v4to6(x[i])) != '')	//last part of entire IP is a ver. 4 IP
			{
				//converted x[i] to a ver. 6 IP
				b.push(x[i].substr(0,4));
				b.push(x[i].substr(5,4));
			}
			else return '';	//invalid IP
		}
		while(a.length+b.length &lt; 8)	//replace the shortcut with the zeroes it represents
		{
			a.push('0000');
		}
	}
	return '['+a.concat(b).join(':')+']';
}
normalizeIPv6.pad = function(x)
{
	x = ''+x;
	while(x.length &lt; 4){ x = '0'+x; }
	return x.toLowerCase();
}
normalizeIPv6.v4to6 = function(ip)
{
	if(!normalizeIPv4(ip)) return '';	//invalid IP
	ip = ip.split('.');
	var h = ['0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f'];
	return '' + h[Math.floor(ip[0]/16)] + h[ip[0]%16] + h[Math.floor(ip[1]/16)] + h[ip[1]%16] + 
		':' + h[Math.floor(ip[2]/16)] + h[ip[2]%16] + h[Math.floor(ip[3]/16)] + h[ip[3]%16];
}

//converts an obscured URL path to a more readable one
function normalizeURLPath(path)
{
	if(!path) return &quot;&quot;;
	
	path = path.replace(/%3(\d)/g, &quot;$1&quot;);	//decimals
	path = path.replace(/%41/ig, &quot;A&quot;).replace(/%42/ig, &quot;B&quot;).replace(/%43/ig, &quot;C&quot;).replace(/%44/ig, &quot;D&quot;).replace(/%45/ig, &quot;E&quot;);
	path = path.replace(/%46/ig, &quot;F&quot;).replace(/%47/ig, &quot;G&quot;).replace(/%48/ig, &quot;H&quot;).replace(/%49/ig, &quot;I&quot;).replace(/%4A/ig, &quot;J&quot;);
	path = path.replace(/%4B/ig, &quot;K&quot;).replace(/%4C/ig, &quot;L&quot;).replace(/%4D/ig, &quot;M&quot;).replace(/%4E/ig, &quot;N&quot;).replace(/%4F/ig, &quot;O&quot;);
	path = path.replace(/%50/ig, &quot;P&quot;).replace(/%51/ig, &quot;Q&quot;).replace(/%52/ig, &quot;R&quot;).replace(/%53/ig, &quot;S&quot;).replace(/%54/ig, &quot;T&quot;);
	path = path.replace(/%55/ig, &quot;U&quot;).replace(/%56/ig, &quot;V&quot;).replace(/%57/ig, &quot;W&quot;).replace(/%58/ig, &quot;X&quot;).replace(/%59/ig, &quot;Y&quot;);
	path = path.replace(/%5A/ig, &quot;Z&quot;);
	path = path.replace(/%61/ig, &quot;a&quot;).replace(/%62/ig, &quot;b&quot;).replace(/%63/ig, &quot;c&quot;).replace(/%64/ig, &quot;d&quot;).replace(/%65/ig, &quot;e&quot;);
	path = path.replace(/%66/ig, &quot;f&quot;).replace(/%67/ig, &quot;g&quot;).replace(/%68/ig, &quot;h&quot;).replace(/%69/ig, &quot;i&quot;).replace(/%6A/ig, &quot;j&quot;);
	path = path.replace(/%6B/ig, &quot;k&quot;).replace(/%6C/ig, &quot;l&quot;).replace(/%6D/ig, &quot;m&quot;).replace(/%6E/ig, &quot;n&quot;).replace(/%6F/ig, &quot;o&quot;);
	path = path.replace(/%70/ig, &quot;p&quot;).replace(/%71/ig, &quot;q&quot;).replace(/%72/ig, &quot;r&quot;).replace(/%73/ig, &quot;s&quot;).replace(/%74/ig, &quot;t&quot;);
	path = path.replace(/%75/ig, &quot;u&quot;).replace(/%76/ig, &quot;v&quot;).replace(/%77/ig, &quot;w&quot;).replace(/%78/ig, &quot;x&quot;).replace(/%79/ig, &quot;y&quot;);
	path = path.replace(/%7A/ig, &quot;z&quot;);
	path = path.replace(/%2D/ig, &quot;-&quot;).replace(/%2E/ig, &quot;.&quot;).replace(/%5F/ig, &quot;_&quot;).replace(/%7E/ig, &quot;~&quot;).replace(/%21/ig, &quot;!&quot;);
	path = path.replace(/%24/ig, &quot;$&quot;).replace(/%27/ig, &quot;'&quot;).replace(/%28/ig, &quot;(&quot;).replace(/%29/ig, &quot;)&quot;).replace(/%2A/ig, &quot;*&quot;);
	path = path.replace(/%2B/ig, &quot;+&quot;).replace(/%2C/ig, &quot;,&quot;).replace(/%3B/ig, &quot;;&quot;).replace(/%40/ig, &quot;@&quot;);
	//path = path.replace(/%20/g, &quot; &quot;);	//more readable, but not valid
	
	return path;
}

//****************************************************************
//************************** Mailto ******************************
//****************************************************************

//splits a mailto scheme URI into its parts
//returns null if str is not a valid mailto URI or there is no destination
//only includes valid email addresses; the rest are removed
//does not support IPv6 or IPvFuture domains
//see RFC 2368 http://tools.ietf.org/html/rfc2368
function parseMailto(str)
{
	var uri = parseURI(str);
	if(!uri || uri.scheme != &quot;mailto&quot; || uri.authority) return null;
	//note: if there is a fragment, it will simply be left out
	
	uri.uri = uri.uri.replace(/%20/g, &quot; &quot;);
	uri.path = uri.path.replace(/%20/g, &quot; &quot;);
	uri.query = uri.query.replace(/%20/g, &quot; &quot;);
	
	var parts = {
		uri: &quot;&quot;,
		scheme: &quot;mailto&quot;,
		to: [],
		cc: [],
		bcc: [],
		subject: &quot;&quot;,
		body: &quot;&quot;,
		headers: []	//other headers besides the above
	};
	var to1 = [], to2 = [];
	
	if(uri.path)
	{
		to1 = to1.concat(splitEmailAddresses(uri.path));
	}
	var headers = parseQueryNumeric(uri.query);
	for(var i=0; i&lt;headers.length; i++)
	{
		if(headers[i].value == &quot;&quot;) continue;
		if(headers[i].name == &quot;to&quot;)
		{
			to2 = to2.concat(splitEmailAddresses(headers[i].value));
		}
		else if(headers[i].name == &quot;cc&quot;)
		{
			parts.cc = parts.cc.concat(splitEmailAddresses(headers[i].value));
		}
		else if(headers[i].name == &quot;bcc&quot;)
		{
			parts.bcc = parts.bcc.concat(splitEmailAddresses(headers[i].value));
		}
		else if(headers[i].name == &quot;subject&quot;)
		{
			parts.subject = headers[i].value;
		}
		else if(headers[i].name == &quot;body&quot;)
		{
			parts.body = headers[i].value;
		}
		else
		{
			parts.headers.push(headers[i]);
		}
	}
	
	parts.to = to1.concat(to2);
	if(parts.to.length == 0 &amp;&amp; parts.cc.length == 0 &amp;&amp; parts.bcc.length == 0) return null;	//no destination
	
	parts.uri = &quot;mailto:&quot;;
	if(to1.length &gt; 0)
	{
		for(i=0; i&lt;to1.length; i++)
		{
			parts.uri += to1[i];
			if(i &lt; to1.length-1) parts.uri += &quot;,&quot;;
		}
	}
	
	var qs = [];
	var q = -1;
	if(to2.length &gt; 0)
	{
		qs[++q] = &quot;to=&quot;;
		for(i=0; i&lt;to2.length; i++)
		{
			qs[q] += to2[i];
			if(i &lt; to2.length-1) qs[q] += &quot;,&quot;;
		}
	}
	if(parts.cc.length &gt; 0)
	{
		qs[++q] = &quot;cc=&quot;;
		for(i=0; i&lt;parts.cc.length; i++)
		{
			qs[q] += parts.cc[i];
			if(i &lt; parts.cc.length-1) qs[q] += &quot;,&quot;;
		}
	}
	if(parts.bcc.length &gt; 0)
	{
		qs[++q] = &quot;bcc=&quot;;
		for(i=0; i&lt;parts.bcc.length; i++)
		{
			qs[q] += parts.bcc[i];
			if(i &lt; parts.bcc.length-1) qs[q] += &quot;,&quot;;
		}
	}
	if(parts.subject) qs[++q] = &quot;subject=&quot;+parts.subject;
	if(parts.body) qs[++q] = &quot;body=&quot;+parts.body;
	for(i=0; i&lt;parts.headers.length; i++)
	{
		qs[++q] = parts.headers[i].name+&quot;=&quot;+parts.headers[i].value;
	}
	if(qs.length &gt; 0) parts.uri += &quot;?&quot;+qs.join(&quot;&amp;&quot;);
	
	return parts;
}
//helper function for parseMailto
//splits the string at the commas, but ignores commas within quoted strings
//only returns valid email addresses
function splitEmailAddresses(str)
{
	var addresses = [];
	var a = 0, c, m;
	var parts = str.split(&quot;\&quot;&quot;);	//split the string at the quotes
	str = &quot;&quot;;
	var inQuote = false;
	for(var i=0; i&lt;parts.length; i++)
	{
		if(inQuote)	//currently inside a pair of quotes
		{
			str += &quot;\&quot;&quot;;
			if((/(^|[^\\])(\\\\)*\\$/).test(parts[i]))	//part ends with the escape character (\)
			{
				str += parts[i];
			}
			else	//end quote
			{
				str += parts[i];
				if(i &lt; parts.length-1)
				{
					str += &quot;\&quot;&quot;;
					inQuote = false;
				}
			}
		}
		else	//not inside a pair of quotes
		{
			//if((c=parts[i].search(/,|%2C/i)) &gt; -1)	//comma is found
			if((c=parts[i].search(/,/i)) &gt; -1)	//comma is found
			{
				addresses[a++] = str + parts[i].slice(0, c);	//add the address that ends at the comma
				//m = parts[i].match(/(,|%2C)(\s|%20)*/i)[0].length;
				//str = parts[i].slice(c+m);
				str = parts[i].slice(c+1);
			}
			else str += parts[i];
			if(i &lt; parts.length-1) inQuote = true;	//if there are more parts
			else addresses[a] = str;
		}
	}
	if(inQuote) return [];	//no closing quote
	//verify the email addresses
	for(i=0; i&lt;addresses.length; i++)
	{
		addresses[i] = normalizeEmailAddress(addresses[i]);
		if(!addresses[i]) addresses.splice(i--,1);	//if it's not valid, remove it
	}
	return addresses;
}

//converts an obscured email address to a more readable one; unfolds and removes comments
//returns &quot;&quot; if it's not a valid address
//does not support IPv6 or IPvFuture domains
//see RFC 2822 http://tools.ietf.org/html/rfc2822
// and http://www.ilovejackdaniels.com/php/email-address-validation/
//obsolete forms are not supported
function normalizeEmailAddress(str)
{
	if(!str) return &quot;&quot;;
	
	//remove comments
	//regular expressions do not support nesting, so I have to do this manually
	var c = 0;	//nesting level of comments
	var s = &quot;&quot;;	//new string
	var p, m, char;	//position, match, end character
	var inQS = false;	//inside a quoted string
	p = str.search(/(^|[^\\]+?)(\\\\)*[()&quot;]/);
	while(p &gt;= 0)
	{
		m = str.match(/(^|[^\\]+?)(\\\\)*[()&quot;]/)[0];
		char = str.charAt(p+m.length-1);
		if(char == &quot;\&quot;&quot;)
		{
			if(c == 0)	//beginning or end of a quoted string (not inside of a comment)
			{
				s += str.slice(0, p+m.length);
				inQS = !inQS;
			}
			str = str.slice(p+m.length);
		}
		else if(char == &quot;(&quot;)
		{
			if(inQS) s += str.slice(0, p+m.length);	//inside a quoted string
			else if(c++ == 0) s += str.slice(0, p+m.length-1);	//beginning of a top-level comment
			str = str.slice(p+m.length);
		}
		else if(char == &quot;)&quot;)
		{
			if(inQS) s += str.slice(0, p+m.length);	//inside a quoted string
			else c--;	//end of a comment
			str = str.slice(p+m.length);
		}
		if(c &lt; 0) return &quot;&quot;;	//invalid comment nesting
		p = str.search(/(^|[^\\]+)(\\\\)*[()&quot;]/);
	}
	str = s + str;
	
	str = str.replace(/\s+/g, &quot; &quot;);	//replace whitespace with a single space
	str = str.replace(/[\\x01-\\x1F\\x7F]+/g, &quot;&quot;);	//remove remaining (non-whitespace) control characters
	
	var atext = &quot;[!#$%&amp;'*+`/0-9=?A-Z^_a-z{|}~-]&quot;;
	var qtext = &quot;[!#$%&amp;'()*+`./0-9:;&lt;=&gt;?@A-Z\\[\\]^_,a-z{|}~-]&quot;;
	var qptext = &quot;(&quot;+qtext+&quot;|[\&quot;\\\\])&quot;;
	//var dtext = &quot;[!\&quot;#$%&amp;'()*+`./0-9:;&lt;=&gt;?@A-Z^_,a-z{|}~-]&quot;;	//for IPv6 or IPvFuture formatted domains
	
	var dotAtom = &quot;( ?&quot;+atext+&quot;+(\\.&quot;+atext+&quot;+)* ?)&quot;;
	var quotedString = &quot;( ?\&quot;( ?(&quot;+qtext+&quot;|\\\\&quot;+qptext+&quot;))* ?\&quot; )&quot;;
	//var domainLiteral = &quot;( ?\\[( ?(&quot;+dtext+&quot;|\\\\&quot;+qptext+&quot;))* ?\\] ?)&quot;;	//for IPv6 or IPvFuture formatted domains
	
	var localPart = &quot;(&quot;+dotAtom+&quot;|&quot;+quotedString+&quot;)&quot;;
	//var domain = &quot;(&quot;+dotAtom+&quot;|&quot;+domainLiteral+&quot;)&quot;;	//we won't support IPv6 or IPvFuture formatted domains
	var domain = dotAtom;
	var addrSpec = &quot;(&quot;+localPart+&quot;@&quot;+domain+&quot;)&quot;;
	
	var displayName = &quot;(( ?&quot;+atext+&quot;+ ?|&quot;+quotedString+&quot;)+)&quot;;
	var nameAddr = &quot;(&quot;+displayName+&quot;? ?&lt;&quot;+addrSpec+&quot;&gt; ?)&quot;;
	
	var mailbox = &quot;^(&quot;+nameAddr+&quot;|&quot;+addrSpec+&quot;)$&quot;;
	
	rxp = new RegExp(mailbox);
	if(rxp.test(str))	//valid mailbox so far
	{
		//get the domain
		rxp = new RegExp(&quot;@(&quot;+domain+&quot;)(&gt; ?)?$&quot;);
		var d = str.match(rxp)[0];
		var dn = d.replace(rxp, &quot;$2&quot;);
		d = d.replace(rxp, &quot;$1&quot;);
		d = d.replace(/^ +| +$/g, &quot;&quot;);	//remove spaces from ends
		
		//normalize the domain
		var normalizedDomain = normalizeURLDomain(d);
		if(!normalizedDomain) return &quot;&quot;;	//invalid domain
		
		//replace the domain with the normalized version
		str = str.replace(rxp, &quot;@&quot;+normalizedDomain+(dn?&quot;&gt;&quot;:&quot;&quot;));
		
		//get the local part
		rxp = new RegExp(&quot;(&quot;+localPart+&quot;)@&quot;+normalizedDomain+&quot;&gt;?&quot;);
		var lp = str.match(rxp)[0].replace(rxp, &quot;$1&quot;);
		lp = lp.replace(/^ +| +$/g, &quot;&quot;);	//remove spaces from ends
		
		//replace local part with cleaned-up version
		str = str.replace(rxp, lp+&quot;@&quot;+normalizedDomain+(dn?&quot;&gt;&quot;:&quot;&quot;));
		
		if(dn)
		{
			//get the display name, if there is one
			rxp = new RegExp(&quot;^&quot;+displayName);
			var dn = str.match(rxp);
			if(dn)
			{
				dn = dn[0].replace(/^ +| +$/g, &quot;&quot;);	//remove spaces from ends of display name
				str = str.replace(rxp, dn);	//replace display name with cleaned-up version
			}
		}
		
		return str;	//valid mailbox
	}
	return &quot;&quot;;	//invalid mailbox
}

//****************************************************************
//*************************** Fixes ******************************
//****************************************************************

//attempts to fix a URL if needed
//domain: domain to use if the url is relative
//returns null if it can't be fixed
function fixURL(str, domain)
{
	str = str.replace(/ /g, &quot;%20&quot;);	//make sure all spaces are escaped
	var url = parseURL(str);
	if(url) return url;	//valid URL
	
	domain = normalizeURLDomain(domain);
	str = str.replace(/&quot;/g, &quot;%22&quot;);
	str = str.replace(/&lt;/g, &quot;%3C&quot;);
	str = str.replace(/&gt;/g, &quot;%3E&quot;);
	url = parseURI(str);
	if(!url &amp;&amp; str.charAt(0) == &quot;/&quot;)	//relative path
	{
		if(!domain) return null;	//invalid URL; can't fix it since no valid domain was given
		str = &quot;http://&quot;+domain+str;
		url = parseURL(str);
		if(url) return url;	//it's now a valid URL
		url = parseURI(str);
	}
	if(!url &amp;&amp; str.slice(0,7) != &quot;http://&quot; &amp;&amp; str.slice(0,8) != &quot;https://&quot;)
	{
		str = &quot;http://&quot;+str;
		url = parseURL(str);
		if(url) return url;	//it's now a valid URL
		url = parseURI(str);
	}
	if(!url) return null;	//invalid URI; can't be fixed
	
	//valid URI; try to make it a valid URL
	str = url.scheme+&quot;://&quot;;
	str += url.domain || domain;
	str += url.port ? &quot;:&quot;+url.port : &quot;&quot;;
	str += normalizeURLPath(url.path)+(url.query ? &quot;?&quot;+url.query : &quot;&quot;)+(url.fragment ? &quot;#&quot;+url.fragment : &quot;&quot;);
	
	url = parseURL(str);
	if(url) return url;	//it's now a valid URL
	return null;	//invalid URL; can't be fixed
}

//attempts to fix a hyperlink address (http(s) or mailto) if needed
//domain = domain to use if the url is relative
//returns &quot;&quot; if it can't be fixed
function fixHyperlink(str, domain, allowMailto)
{
	domain = domain || &quot;&quot;;
	
	//get the scheme
	var matches = str.match(/^[a-z0-9+.-]+:/i);
	var scheme = (matches ? matches[0].slice(0, matches[0].length-1).toLowerCase() : &quot;&quot;);
	if(scheme != &quot;http&quot; &amp;&amp; scheme != &quot;https&quot; &amp;&amp; (allowMailto ? scheme!=&quot;mailto&quot; : true)) scheme = &quot;&quot;;
	
	if(!scheme || scheme == &quot;http&quot; || scheme == &quot;https&quot;)	//URL or unknown scheme (assume unknown is meant to be a URL)
	{
		var lnk = fixURL(str, domain);
		if(lnk) return lnk.url;
	}
	else if(allowMailto)	//mailto address
	{
		var lnk = parseMailto(str);
		if(lnk) return lnk.uri;
	}
	return &quot;&quot;;	//can't be fixed
}
以上是关于JavaScript 解析URI的主要内容，如果未能解决你的问题，请参考以下文章
JavaScript 解析URI
如何在 nodejs/javascript 中解析 Azure Blob URI？
使用 Javascript 解析 JSON。来自 Laravel 的 JSON
解析变量 URI (RegEx, Uri, String-Functions?) c#
使用 JSP，taglib URI 是不是意味着我的站点依赖于 URI 解析？
在 RESTful API 中解析 URI 参数