aria2/src/uri_split.c

489 lines
12 KiB
C
Raw Normal View History

2012-10-13 15:14:50 +00:00
/* <!-- copyright */
/*
* aria2 - The high speed download utility
*
* Copyright (C) 2012 Tatsuhiro Tsujikawa
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
* In addition, as a special exception, the copyright holders give
* permission to link the code of portions of this program with the
* OpenSSL library under certain conditions as described in each
* individual source file, and distribute linked combinations
* including the two.
* You must obey the GNU General Public License in all respects
* for all of the code used other than OpenSSL. If you modify
* file(s) with this exception, you may extend this exception to your
* version of the file(s), but you are not obligated to do so. If you
* do not wish to do so, delete this exception statement from your
* version. If you delete this exception statement from all source
* files in the program, then also delete it here.
*/
/* copyright --> */
#include "uri_split.h"
#include <stdlib.h>
typedef enum {
URI_BEFORE_SCHEME,
URI_SCHEME,
URI_SCHEME_SLASH1,
URI_SCHEME_SLASH2,
URI_BEFORE_MAYBE_USER,
URI_MAYBE_USER,
URI_BEFORE_MAYBE_PASSWD,
URI_MAYBE_PASSWD,
URI_BEFORE_HOST,
URI_HOST,
URI_BEFORE_IPV6HOST,
URI_IPV6HOST,
URI_AFTER_IPV6HOST,
URI_BEFORE_PORT,
URI_PORT,
URI_PATH,
URI_BEFORE_QUERY,
URI_QUERY,
URI_BEFORE_FRAGMENT,
URI_FRAGMENT
} uri_split_state;
static void uri_set_field(uri_split_result *res, int field,
const char *first, const char *last, const char *uri)
{
if(first) {
res->field_set |= 1 << field;
res->fields[field].off = first - uri;
res->fields[field].len = last - first;
}
}
static int is_digit(char c)
{
return '0' <= c && c <= '9';
}
int uri_split(uri_split_result *res, const char *uri)
{
int state = URI_BEFORE_SCHEME;
const char *scheme_first = NULL, *scheme_last = NULL,
*host_first = NULL, *host_last = NULL,
*path_first = NULL, *path_last = NULL,
*query_first = NULL, *query_last = NULL,
*fragment_first = NULL, *fragment_last = NULL,
*user_first = NULL, *user_last = NULL,
*passwd_first = NULL, *passwd_last = NULL,
*last_atmark = NULL, *last_slash = NULL,
*p = uri;
int32_t port = -1;
uint8_t flags = 0;
for(; *p; ++p) {
switch(state) {
case URI_BEFORE_SCHEME:
scheme_first = p;
state = URI_SCHEME;
break;
case URI_SCHEME:
if(*p == ':') {
scheme_last = p;
state = URI_SCHEME_SLASH1;
}
break;
case URI_SCHEME_SLASH1:
if(*p == '/') {
state = URI_SCHEME_SLASH2;
} else {
return -1;
}
break;
case URI_SCHEME_SLASH2:
if(*p == '/') {
state = URI_BEFORE_MAYBE_USER;
} else {
return -1;
}
break;
case URI_BEFORE_MAYBE_USER:
switch(*p) {
case '@':
case ':':
case '/':
return -1;
case '[':
state = URI_BEFORE_IPV6HOST;
break;
default:
user_first = p;
state = URI_MAYBE_USER;
}
break;
case URI_MAYBE_USER:
switch(*p) {
case '@':
last_atmark = p;
break;
case ':':
user_last = p;
state = URI_BEFORE_MAYBE_PASSWD;
break;
case '[':
if(last_atmark == p - 1) {
user_last = last_atmark;
state = URI_BEFORE_IPV6HOST;
} else {
return -1;
}
break;
case '/':
case '?':
case '#':
/* It turns out that this is only host or user + host if
last_atmark is not NULL. */
if(last_atmark) {
host_first = last_atmark + 1;
host_last = p;
user_last = last_atmark;
} else {
host_first = user_first;
host_last = p;
user_first = user_last = NULL;
}
switch(*p) {
case '/':
path_first = last_slash = p;
state = URI_PATH;
break;
case '?':
state = URI_BEFORE_QUERY;
break;
case '#':
state = URI_BEFORE_FRAGMENT;
break;
}
break;
}
break;
case URI_BEFORE_MAYBE_PASSWD:
passwd_first = p;
switch(*p) {
case '@':
passwd_last = last_atmark = p;
state = URI_BEFORE_HOST;
break;
case '/':
return -1;
default:
/* sums up port number in case of port. */
if(is_digit(*p)) {
port = *p - '0';
}
state = URI_MAYBE_PASSWD;
}
break;
case URI_MAYBE_PASSWD:
switch(*p) {
case '@':
passwd_last = last_atmark = p;
/* Passwd confirmed, reset port to -1. */
port = -1;
state = URI_BEFORE_HOST;
break;
case '[':
return -1;
case '/':
case '?':
case '#':
/* This is port not password. port is in [passwd_first, p) */
if(port == -1) {
return -1;
}
if(last_atmark) {
host_first = last_atmark + 1;
host_last = passwd_first - 1;
user_last = last_atmark;
} else {
host_first = user_first;
host_last = passwd_first - 1;
user_first = user_last = NULL;
}
passwd_first = passwd_last = NULL;
switch(*p) {
case '/':
path_first = last_slash = p;
state = URI_PATH;
break;
case '?':
state = URI_BEFORE_QUERY;
break;
case '#':
state = URI_BEFORE_FRAGMENT;
break;
}
break;
default:
if(port != -1) {
if(is_digit(*p)) {
port *= 10;
port += *p - '0';
if(port > UINT16_MAX) {
port = -1;
}
} else {
port = -1;
}
}
break;
}
break;
case URI_BEFORE_HOST:
switch(*p) {
case ':':
case '/':
return -1;
case '[':
state = URI_BEFORE_IPV6HOST;
break;
default:
host_first = p;
state = URI_HOST;
break;
}
break;
case URI_HOST:
switch(*p) {
case ':':
host_last = p;
state = URI_BEFORE_PORT;
break;
case '/':
host_last = path_first = last_slash = p;
2012-10-13 15:14:50 +00:00
state = URI_PATH;
break;
case '?':
host_last = p;
state = URI_BEFORE_QUERY;
break;
case '#':
host_last = p;
state = URI_BEFORE_FRAGMENT;
break;
}
break;
case URI_BEFORE_IPV6HOST:
if(*p == ']') {
return -1;
}
host_first = p;
state = URI_IPV6HOST;
break;
case URI_IPV6HOST:
if(*p == ']') {
flags |= USF_IPV6ADDR;
host_last = p;
state = URI_AFTER_IPV6HOST;
}
break;
case URI_AFTER_IPV6HOST:
switch(*p) {
case ':':
state = URI_BEFORE_PORT;
break;
case '/':
path_first = last_slash = p;
2012-10-13 15:14:50 +00:00
state = URI_PATH;
break;
case '?':
state = URI_BEFORE_QUERY;
break;
case '#':
state = URI_BEFORE_FRAGMENT;
break;
default:
return -1;
}
break;
case URI_BEFORE_PORT:
if(is_digit(*p)) {
port = *p - '0';
state = URI_PORT;
} else {
return -1;
}
break;
case URI_PORT:
switch(*p) {
case '/':
path_first = last_slash = p;
2012-10-13 15:14:50 +00:00
state = URI_PATH;
break;
case '?':
state = URI_BEFORE_QUERY;
break;
case '#':
state = URI_BEFORE_FRAGMENT;
break;
default:
if(is_digit(*p)) {
port *= 10;
port += *p - '0';
if(port > UINT16_MAX) {
return -1;
}
} else {
return -1;
}
}
break;
case URI_PATH:
switch(*p) {
case '/':
last_slash = p;
break;
case '?':
path_last = p;
state = URI_BEFORE_QUERY;
break;
case '#':
path_last = p;
state = URI_BEFORE_FRAGMENT;
break;
}
break;
case URI_BEFORE_QUERY:
query_first = p;
if(*p == '#') {
query_last = p;
state = URI_BEFORE_FRAGMENT;
} else {
state = URI_QUERY;
}
break;
case URI_QUERY:
if(*p == '#') {
query_last = p;
state = URI_BEFORE_FRAGMENT;
}
break;
case URI_BEFORE_FRAGMENT:
fragment_first = p;
state = URI_FRAGMENT;
break;
case URI_FRAGMENT:
break;
}
}
/* Handle premature states */
switch(state) {
case URI_BEFORE_SCHEME:
case URI_SCHEME:
case URI_SCHEME_SLASH1:
case URI_SCHEME_SLASH2:
return -1;
case URI_BEFORE_MAYBE_USER:
return -1;
case URI_MAYBE_USER:
if(last_atmark) {
host_first = last_atmark + 1;
host_last = p;
if(host_first == host_last) {
return -1;
}
user_last = last_atmark;
} else {
host_first = user_first;
host_last = p;
user_first = user_last = NULL;
}
break;
case URI_BEFORE_MAYBE_PASSWD:
return -1;
case URI_MAYBE_PASSWD:
if(port == -1) {
return -1;
}
if(last_atmark) {
host_first = last_atmark + 1;
host_last = passwd_first - 1;
user_last = last_atmark;
} else {
host_first = user_first;
host_last = passwd_first - 1;
user_first = user_last = NULL;
}
passwd_first = passwd_last = NULL;
break;
case URI_BEFORE_HOST:
return -1;
case URI_HOST:
host_last = p;
break;
case URI_BEFORE_IPV6HOST:
case URI_IPV6HOST:
return -1;
case URI_AFTER_IPV6HOST:
break;
case URI_BEFORE_PORT:
return -1;
case URI_PORT:
if(port == -1) {
return -1;
}
break;
case URI_PATH:
path_last = p;
break;
case URI_BEFORE_QUERY:
query_first = query_last = p;
break;
case URI_QUERY:
query_last = p;
break;
case URI_BEFORE_FRAGMENT:
fragment_first = fragment_last = p;
break;
case URI_FRAGMENT:
fragment_last = p;
break;
default:
return -1;
};
if(res) {
res->field_set = 0;
res->port = 0;
res->flags = flags;
uri_set_field(res, USR_SCHEME, scheme_first, scheme_last, uri);
uri_set_field(res, USR_HOST, host_first, host_last, uri);
uri_set_field(res, USR_PATH, path_first, path_last, uri);
uri_set_field(res, USR_QUERY, query_first, query_last, uri);
uri_set_field(res, USR_FRAGMENT, fragment_first, fragment_last, uri);
uri_set_field(res, USR_USER, user_first, user_last, uri);
uri_set_field(res, USR_PASSWD, passwd_first, passwd_last, uri);
if(res->field_set & (1 << USR_USER)) {
uri_set_field(res, USR_USERINFO, user_first, last_atmark, uri);
}
if(last_slash && last_slash + 1 != path_last) {
uri_set_field(res, USR_BASENAME, last_slash + 1, path_last, uri);
}
if(port != -1) {
res->field_set |= 1 << USR_PORT;
res->port = port;
}
}
return 0;
}