Thursday, May 29, 2008

URI / URL Parsing Using RegExp in JavaScript

Sometime back when I was writing a web crawler in JavaScript, I had to parse URIs into their constituents. And for that task I modified Flog's URI Parser class for my needs.

Well, as part of the licensing, and for sharing of information, I thought I'd post the JavaScript code here.

function UriParser(uri){
//define class (for use with prototype.js) to do URI parsing
//modified from FlogUriParser found at http://www.flog.co.nz/index.php/journal/prototype-uri-parser-class/
this._regExp = /^((\w+):\/\/\/?)?((\w+):?(\w+)?@)?([^\/\?:]+):?(\d+)?(\/?[^\?#;\|]+)?([;\|])?([^\?#]+)?\??([^#]+)?#?(\w*)/;
this.username = "";
this.password = "";
this.port = "";
this.protocol = "";
this.host = "";
this.pathname = "";
this.url = "";
this.urlparamseparator = "";
this.urlparam = "";
this.querystring = {};
this.fragment = "";
this.results = null;

this._getVal = function(r, i) {
if(!r) return null;
return (typeof(r[i]) == 'undefined' ? "" : r[i]);
};

this.parse = function(uri) {
var r = this._regExp.exec(uri);
this.results = r;
this.url = this._getVal(r,0);
this.protocol = this._getVal(r,2);
this.username = this._getVal(r,4);
this.password = this._getVal(r,5);
this.host = this._getVal(r,6);
this.port = this._getVal(r,7);
this.pathname = this._getVal(r,8);
this.urlparamseparator = this._getVal(r,9);
this.urlparam = this._getVal(r,10);
this.querystring = this._getVal(r,11);
this.fragment = this._getVal(r,12);
return r;
}

if(uri) this.parse(uri);
}

2 comments:

Anonymous said...

An alternative is parseUri
http://blog.stevenlevithan.com/archives/parseuri

Robert said...

Just wanted to say THANKS for this post.
Your regular expression works perfectly :)