Uniform Resource Locators

URL specification in BNF form

BNF for specific URL schemes
This is a BNF-like description of the Uniform Resource Locator syntax, using the conventions of RFC822, except that "|" is used to  designate alternatives, and brackets [] are used around optional or   repeated elements. Briefly, literals are quoted with "", optional elements are enclosed in [brackets], and elements may be preceded with * to designate n or more repetitions of the following element; n defaults to 0. ; The generic form of a URL is: genericurl    = scheme ":" schemepart ; Specific predefined schemes are defined here; new schemes ; may be registered with IANA url           = httpurl | ftpurl | newsurl | nntpurl | telneturl | gopherurl | waisurl | mailtourl | fileurl | prosperourl | otherurl ; new schemes follow the general syntax otherurl      = genericurl ; the scheme is in lower case; interpreters should use case-ignore scheme        = 1*[ lowalpha | digit | "+" | "-" | "." ] schemepart    = *xchar | ip-schemepart ; URL schemeparts for ip based protocols: ip-schemepart = "//" login [ "/" urlpath ] login         = [ user [ ":" password ] "@" ] hostport hostport      = host [ ":" port ] host          = hostname | hostnumber hostname      = *[ domainlabel "." ] toplabel domainlabel   = alphadigit | alphadigit *[ alphadigit | "-" ] alphadigit toplabel      = alpha | alpha *[ alphadigit | "-" ] alphadigit alphadigit    = alpha | digit hostnumber    = digits "." digits "." digits "." digits port          = digits user          = *[ uchar | ";" | "?" | "&" | "=" ] password      = *[ uchar | ";" | "?" | "&" | "=" ] urlpath       = *xchar    ; depends on protocol see section 3.1 ; The predefined schemes:

FTP (see also RFC959)
ftpurl        = "ftp://" login [ "/" fpath [ ";type=" ftptype ]] fpath         = fsegment *[ "/" fsegment ] fsegment      = *[ uchar | "?" | ":" | "@" | "&" | "=" ] ftptype       = "A" | "I" | "D" | "a" | "i" | "d"

FILE
fileurl       = "file://" [ host | "localhost" ] "/" fpath

HTTP
httpurl       = "http://" hostport [ "/" hpath [ "?" search ]] hpath         = hsegment *[ "/" hsegment ] hsegment      = *[ uchar | ";" | ":" | "@" | "&" | "=" ] search        = *[ uchar | ";" | ":" | "@" | "&" | "=" ]

GOPHER (see also RFC1436)
gopherurl     = "gopher://" hostport [ / [ gtype [ selector [ "%09" search [ "%09" gopher+_string ] ] ] ] ] gtype         = xchar selector      = *xchar gopher+_string = *xchar

MAILTO (see also RFC822)
mailtourl     = "mailto:" encoded822addr encoded822addr = 1*xchar              ; further defined in RFC822

NEWS (see also RFC1036)
newsurl       = "news:" grouppart grouppart     = "*" | group | article group         = alpha *[ alpha | digit | "-" | "." | "+" | "_" ] article       = 1*[ uchar | ";" | "/" | "?" | ":" | "&" | "=" ] "@" host

NNTP (see also RFC977)
nntpurl       = "nntp://" hostport "/" group [ "/" digits ]

TELNET
telneturl     = "telnet://" login [ "/" ]

WAIS (see also RFC1625)
waisurl       = waisdatabase | waisindex | waisdoc waisdatabase  = "wais://" hostport "/" database waisindex     = "wais://" hostport "/" database "?" search waisdoc       = "wais://" hostport "/" database "/" wtype "/" wpath database      = *uchar wtype         = *uchar wpath         = *uchar

PROSPERO
prosperourl   = "prospero://" hostport "/" ppath *[ fieldspec ] ppath         = psegment *[ "/" psegment ] psegment      = *[ uchar | "?" | ":" | "@" | "&" | "=" ] fieldspec     = ";" fieldname "=" fieldvalue fieldname     = *[ uchar | "?" | ":" | "@" | "&" ] fieldvalue    = *[ uchar | "?" | ":" | "@" | "&" ]

Miscellaneous definitions
lowalpha      = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z" hialpha       = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z" alpha         = lowalpha | hialpha digit         = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" safe          = "$" | "-" | "_" | "." | "+" extra         = "!" | "*" | "'" | "(" | ")" | "," national      = "{" | "}" | "|" | "\" | "^" | "~" | "[" | "]" | "`" punctuation   = "<" | ">" | "#" | "%" | <"> reserved       = ";" | "/" | "?" | ":" | "@" | "&" | "=" hex            = digit | "A" | "B" | "C" | "D" | "E" | "F" | 		 "a" | "b" | "c" | "d" | "e" | "f" escape         = "%" hex hex unreserved     = alpha | digit | safe | extra uchar          = unreserved | escape xchar          = unreserved | reserved | escape digits         = 1*digit