Extracting 1 or more hyperlinks from paragraph text in Javascript using Regular Expression
Sorry to bother you guys again, but here's my dilemma.
There must be a "better" regular expression to identify HTML link from a paragraph text (there can be more than 1 html links in the text). How do I extract all the link and anchor it in javascript?
My attempt (in javascrip开发者_运维知识库t) is like this:
var urlPattern = "(https?|ftp)://(www\\.)?(((([a-zA-Z0-9.-]+\\.){1,}[a-zA-Z]{2,4}|localhost))|((\\d{1,3}\\.){3}(\\d{1,3})))(:(\\d+))?(/([a-zA-Z0-9-._~!$&'()*+,;=:@/]|%[0-9A-F]{2})*)?(\\?([a-zA-Z0-9-._~!$&'()*+,;=:/?@]|%[0-9A-F]{2})*)?(#([a-zA-Z0-9._-]|%[0-9A-F]{2})*)?";
function extractURLs(s) {
return s.match(new RegExp(urlPattern));
}
//s is of type String
//For testing...
var text = "Check this video out http://ww w.youtube.com/watch?v=y3U3R3b1dOg or http://www.youtube.com/watch?v=sX6Vm0MoPCY";
alert(extractURLs(text));
(spaces on hyperlink has been deliberately added here to allow posting of question in SO). Result: I only get the 1st hyperlink and not the second one.... Has anybody done something similar or better that I can utilize?
Thanks in advance.
Use the "g" modifier:
function extractURLs(s) {
return s.match(new RegExp(urlPattern, "g"));
}
var urlPattern = "(https?|ftp)://(www\\.)?(((([a-zA-Z0-9.-]+\\.){1,}[a-zA-Z]{2,4}|localhost))|((\\d{1,3}\\.){3}(\\d{1,3})))(:(\\d+))?(/([a-zA-Z0-9-._~!$&'()*+,;=:@/]|%[0-9A-F]{2})*)?(\\?([a-zA-Z0-9-._~!$&'()*+,;=:/?@]|%[0-9A-F]{2})*)?(#([a-zA-Z0-9._-]|%[0-9A-F]{2})*)?";
function extractURLs(s) {
return s.match(new RegExp(urlPattern));
}
var text = "Check this video out http://www.youtube.com/watch?v=y3U3R3b1dOg or http://www.youtube.com/watch?v=sX6Vm0MoPCY";
var results = extractURLs(text);
alert(extractURLs(results[0] + ", " + results[1]));
It is better to write it as,
var urlPattern = /(https?|ftp)://(www\\.)?(((([a-zA-Z0-9.-]+\\.){1,}[a-zA-Z]{2,4}|localhost))|((\\d{1,3}\\.){3}(\\d{1,3})))(:(\\d+))?(/([a-zA-Z0-9-._~!$&'()*+,;=:@/]|%[0-9A-F]{2})*)?(\\?([a-zA-Z0-9-._~!$&'()*+,;=:/?@]|%[0-9A-F]{2})*)?(#([a-zA-Z0-9._-]|%[0-9A-F]{2})*)?/g;
function extractURLs(s) {
return s.match(urlPattern);
}
Here urlPattern
is pre-compiled, rather than compiling the RegEx everytime the function is called, hence results in petter performance.
精彩评论