123 lines
3.2 KiB
Ragel
123 lines
3.2 KiB
Ragel
// Package fasturl is a Go URL parser using a [Ragel](http://www.colm.net/open-source/ragel/) state-machine instead of regex, or the built in standard library `url.Parse`.
|
|
//
|
|
package fasturl
|
|
|
|
import "fmt"
|
|
|
|
%%{
|
|
machine url_parser;
|
|
|
|
action mark { mark = fpc }
|
|
action mark_port { port_mark = fpc }
|
|
|
|
action save_port {
|
|
if port_mark > host_mark{
|
|
u.Port = data[port_mark:fpc]
|
|
}
|
|
}
|
|
|
|
action save_scheme {
|
|
u.Protocol = data[0:fpc-1]
|
|
}
|
|
|
|
action mark_host {
|
|
host_mark = fpc;
|
|
}
|
|
|
|
action save_host {
|
|
u.Host = data[host_mark:fpc]
|
|
}
|
|
|
|
action save_query {
|
|
if u.Query == "" {
|
|
u.Query = data[mark:fpc]
|
|
}
|
|
}
|
|
|
|
action save_path {
|
|
if u.Path == "" {
|
|
u.Path = data[mark:fpc]
|
|
}
|
|
}
|
|
|
|
action save_fragment {
|
|
u.Fragment = data[mark:fpc]
|
|
}
|
|
|
|
pct_encoded = "%" xdigit xdigit;
|
|
|
|
gen_delims = ":" | "/" | "?" | "#" | "[" | "]" | "@";
|
|
sub_delims = "!" | "$" | "&" | "'" | "(" | ")" | "*" | "+" | "," | ";" | "=";
|
|
|
|
reserved = gen_delims | sub_delims;
|
|
unreserved = alpha | digit | "-" | "." | "_" | "~";
|
|
|
|
# many clients don't encode these, e.g. curl, wget, ...
|
|
delims = "<" | ">" | "%" | "#" | '"';
|
|
unwise = " " | "{" | "}" | "|" | "\\" | "^" | "[" | "]" | "`";
|
|
|
|
pchar = unreserved | pct_encoded | sub_delims | ":" | "@" | delims | unwise;
|
|
slash = "/" | "\\";
|
|
path = (slash ( (pchar - ("?" | "#")) + ( slash (pchar - ("?" | "#"))* )* )? ) >mark %save_path;
|
|
drivepath = ( (slash|(alpha ":" slash)) ( (pchar - ("?" | "#")) + ( slash (pchar - ("?" | "#"))* )* )? ) >mark %save_path;
|
|
scheme = (alpha ( alpha | digit | "+" | "-" | "." )*);
|
|
|
|
#simple ipv4 address
|
|
dec_octet = digit{1,3};
|
|
IPv4address = dec_octet "." dec_octet "." dec_octet "." dec_octet;
|
|
|
|
IPvFuture = "v" xdigit+ "." ( unreserved | sub_delims | ":" )+;
|
|
|
|
# simple ipv6 address
|
|
IPv6address = (":" | xdigit)+ IPv4address?;
|
|
|
|
IP_literal = "[" ( IPv6address | IPvFuture ) "]";
|
|
|
|
reg_name = ( unreserved | pct_encoded | sub_delims )+;
|
|
|
|
userinfo = ( unreserved | pct_encoded | sub_delims | ":" | "@" )*;
|
|
host = IP_literal | IPv4address | reg_name;
|
|
port = (pchar - ("/" | "?" | "#")){1,5} ;
|
|
authority = ( userinfo "@" )? (host >mark_host %save_host) ( ":" port >mark_port %save_port)?;
|
|
|
|
fragment = ( pchar | "/" | "?" )* >mark %save_fragment;
|
|
query = (pchar - "#")* >mark %save_query;
|
|
|
|
full_ref = drivepath? ( "?" query )? ( "#" fragment )?;
|
|
relative_ref = path? ( "?" query )? ( "#" fragment )?;
|
|
absolute_hier_part = ("//")? authority? full_ref?;
|
|
hier_part = ("//")? authority? relative_ref?;
|
|
|
|
absolute_URI = ((scheme ":") >mark %save_scheme)? absolute_hier_part;
|
|
URI = absolute_URI | relative_ref;
|
|
main := URI;
|
|
|
|
write data;
|
|
}%%
|
|
|
|
// URL represents the different parts of a parsed URL
|
|
type URL struct {
|
|
Protocol string
|
|
Host string
|
|
Port string
|
|
Path string
|
|
Query string
|
|
Fragment string
|
|
}
|
|
|
|
// ParseURL parses a given URL and returns a `URL` representing the different parts
|
|
func ParseURL(data string) (*URL, error){
|
|
mark, host_mark, port_mark, cs, p, pe, eof := 0, 0, 0, url_parser_en_main, 0, len(data), len(data)
|
|
|
|
u := &URL{}
|
|
|
|
%% write init;
|
|
%% write exec;
|
|
if cs < url_parser_first_final {
|
|
return nil, fmt.Errorf("Failed to match URL")
|
|
} else {
|
|
return u, nil
|
|
}
|
|
}
|
|
|