/* */ #include "uri_split.h" #include typedef enum { URI_BEFORE_SCHEME, URI_SCHEME, URI_SCHEME_SLASH1, URI_SCHEME_SLASH2, URI_BEFORE_MAYBE_USER, URI_MAYBE_USER, URI_BEFORE_MAYBE_PASSWD, URI_MAYBE_PASSWD, URI_BEFORE_HOST, URI_HOST, URI_BEFORE_IPV6HOST, URI_IPV6HOST, URI_AFTER_IPV6HOST, URI_BEFORE_PORT, URI_PORT, URI_PATH, URI_BEFORE_QUERY, URI_QUERY, URI_BEFORE_FRAGMENT, URI_FRAGMENT } uri_split_state; static void uri_set_field(uri_split_result *res, int field, const char *first, const char *last, const char *uri) { if(first) { res->field_set |= 1 << field; res->fields[field].off = first - uri; res->fields[field].len = last - first; } } static int is_digit(char c) { return '0' <= c && c <= '9'; } int uri_split(uri_split_result *res, const char *uri) { int state = URI_BEFORE_SCHEME; const char *scheme_first = NULL, *scheme_last = NULL, *host_first = NULL, *host_last = NULL, *path_first = NULL, *path_last = NULL, *query_first = NULL, *query_last = NULL, *fragment_first = NULL, *fragment_last = NULL, *user_first = NULL, *user_last = NULL, *passwd_first = NULL, *passwd_last = NULL, *last_atmark = NULL, *last_slash = NULL, *p = uri; int32_t port = -1; uint8_t flags = 0; for(; *p; ++p) { switch(state) { case URI_BEFORE_SCHEME: scheme_first = p; state = URI_SCHEME; break; case URI_SCHEME: if(*p == ':') { scheme_last = p; state = URI_SCHEME_SLASH1; } break; case URI_SCHEME_SLASH1: if(*p == '/') { state = URI_SCHEME_SLASH2; } else { return -1; } break; case URI_SCHEME_SLASH2: if(*p == '/') { state = URI_BEFORE_MAYBE_USER; } else { return -1; } break; case URI_BEFORE_MAYBE_USER: switch(*p) { case '@': case ':': case '/': return -1; case '[': state = URI_BEFORE_IPV6HOST; break; default: user_first = p; state = URI_MAYBE_USER; } break; case URI_MAYBE_USER: switch(*p) { case '@': last_atmark = p; break; case ':': user_last = p; state = URI_BEFORE_MAYBE_PASSWD; break; case '[': if(last_atmark == p - 1) { user_last = last_atmark; state = URI_BEFORE_IPV6HOST; } else { return -1; } break; case '/': case '?': case '#': /* It turns out that this is only host or user + host if last_atmark is not NULL. */ if(last_atmark) { host_first = last_atmark + 1; host_last = p; user_last = last_atmark; } else { host_first = user_first; host_last = p; user_first = user_last = NULL; } switch(*p) { case '/': path_first = last_slash = p; state = URI_PATH; break; case '?': state = URI_BEFORE_QUERY; break; case '#': state = URI_BEFORE_FRAGMENT; break; } break; } break; case URI_BEFORE_MAYBE_PASSWD: passwd_first = p; switch(*p) { case '@': passwd_last = last_atmark = p; state = URI_BEFORE_HOST; break; case '/': return -1; default: /* sums up port number in case of port. */ if(is_digit(*p)) { port = *p - '0'; } state = URI_MAYBE_PASSWD; } break; case URI_MAYBE_PASSWD: switch(*p) { case '@': passwd_last = last_atmark = p; /* Passwd confirmed, reset port to -1. */ port = -1; state = URI_BEFORE_HOST; break; case '[': return -1; case '/': case '?': case '#': /* This is port not password. port is in [passwd_first, p) */ if(port == -1) { return -1; } if(last_atmark) { host_first = last_atmark + 1; host_last = passwd_first - 1; user_last = last_atmark; } else { host_first = user_first; host_last = passwd_first - 1; user_first = user_last = NULL; } passwd_first = passwd_last = NULL; switch(*p) { case '/': path_first = last_slash = p; state = URI_PATH; break; case '?': state = URI_BEFORE_QUERY; break; case '#': state = URI_BEFORE_FRAGMENT; break; } break; default: if(port != -1) { if(is_digit(*p)) { port *= 10; port += *p - '0'; if(port > UINT16_MAX) { port = -1; } } else { port = -1; } } break; } break; case URI_BEFORE_HOST: switch(*p) { case ':': case '/': return -1; case '[': state = URI_BEFORE_IPV6HOST; break; default: host_first = p; state = URI_HOST; break; } break; case URI_HOST: switch(*p) { case ':': host_last = p; state = URI_BEFORE_PORT; break; case '/': host_last = path_first = last_slash = p; state = URI_PATH; break; case '?': host_last = p; state = URI_BEFORE_QUERY; break; case '#': host_last = p; state = URI_BEFORE_FRAGMENT; break; } break; case URI_BEFORE_IPV6HOST: if(*p == ']') { return -1; } host_first = p; state = URI_IPV6HOST; break; case URI_IPV6HOST: if(*p == ']') { flags |= USF_IPV6ADDR; host_last = p; state = URI_AFTER_IPV6HOST; } break; case URI_AFTER_IPV6HOST: switch(*p) { case ':': state = URI_BEFORE_PORT; break; case '/': path_first = last_slash = p; state = URI_PATH; break; case '?': state = URI_BEFORE_QUERY; break; case '#': state = URI_BEFORE_FRAGMENT; break; default: return -1; } break; case URI_BEFORE_PORT: if(is_digit(*p)) { port = *p - '0'; state = URI_PORT; } else { return -1; } break; case URI_PORT: switch(*p) { case '/': path_first = last_slash = p; state = URI_PATH; break; case '?': state = URI_BEFORE_QUERY; break; case '#': state = URI_BEFORE_FRAGMENT; break; default: if(is_digit(*p)) { port *= 10; port += *p - '0'; if(port > UINT16_MAX) { return -1; } } else { return -1; } } break; case URI_PATH: switch(*p) { case '/': last_slash = p; break; case '?': path_last = p; state = URI_BEFORE_QUERY; break; case '#': path_last = p; state = URI_BEFORE_FRAGMENT; break; } break; case URI_BEFORE_QUERY: query_first = p; if(*p == '#') { query_last = p; state = URI_BEFORE_FRAGMENT; } else { state = URI_QUERY; } break; case URI_QUERY: if(*p == '#') { query_last = p; state = URI_BEFORE_FRAGMENT; } break; case URI_BEFORE_FRAGMENT: fragment_first = p; state = URI_FRAGMENT; break; case URI_FRAGMENT: break; } } /* Handle premature states */ switch(state) { case URI_BEFORE_SCHEME: case URI_SCHEME: case URI_SCHEME_SLASH1: case URI_SCHEME_SLASH2: return -1; case URI_BEFORE_MAYBE_USER: return -1; case URI_MAYBE_USER: if(last_atmark) { host_first = last_atmark + 1; host_last = p; if(host_first == host_last) { return -1; } user_last = last_atmark; } else { host_first = user_first; host_last = p; user_first = user_last = NULL; } break; case URI_BEFORE_MAYBE_PASSWD: return -1; case URI_MAYBE_PASSWD: if(port == -1) { return -1; } if(last_atmark) { host_first = last_atmark + 1; host_last = passwd_first - 1; user_last = last_atmark; } else { host_first = user_first; host_last = passwd_first - 1; user_first = user_last = NULL; } passwd_first = passwd_last = NULL; break; case URI_BEFORE_HOST: return -1; case URI_HOST: host_last = p; break; case URI_BEFORE_IPV6HOST: case URI_IPV6HOST: return -1; case URI_AFTER_IPV6HOST: break; case URI_BEFORE_PORT: return -1; case URI_PORT: if(port == -1) { return -1; } break; case URI_PATH: path_last = p; break; case URI_BEFORE_QUERY: query_first = query_last = p; break; case URI_QUERY: query_last = p; break; case URI_BEFORE_FRAGMENT: fragment_first = fragment_last = p; break; case URI_FRAGMENT: fragment_last = p; break; default: return -1; }; if(res) { res->field_set = 0; res->port = 0; res->flags = flags; uri_set_field(res, USR_SCHEME, scheme_first, scheme_last, uri); uri_set_field(res, USR_HOST, host_first, host_last, uri); uri_set_field(res, USR_PATH, path_first, path_last, uri); uri_set_field(res, USR_QUERY, query_first, query_last, uri); uri_set_field(res, USR_FRAGMENT, fragment_first, fragment_last, uri); uri_set_field(res, USR_USER, user_first, user_last, uri); uri_set_field(res, USR_PASSWD, passwd_first, passwd_last, uri); if(res->field_set & (1 << USR_USER)) { uri_set_field(res, USR_USERINFO, user_first, last_atmark, uri); } if(last_slash && last_slash + 1 != path_last) { uri_set_field(res, USR_BASENAME, last_slash + 1, path_last, uri); } if(port != -1) { res->field_set |= 1 << USR_PORT; res->port = port; } } return 0; }