URLパーサをつくってみた
URLを「スキーム、ホスト、ポート、パス、クエリー」に分解するツールをつくってみました。
ソース
/** * url_parser.c **/ #include <stdio.h> #include <stdlib.h> #include <string.h> #define URL_MAX_LEN 2083 /* IEの制限と同じ数値に設定 */ #define SCHEME_MAX_LEN 32 #define SCHEME_DELIMITER "://" #define DOMAIN_MAX_LEN URL_MAX_LEN int main(int argc, char *argv[]) { char _url[URL_MAX_LEN+1], _scheme[SCHEME_MAX_LEN+1], _domain[DOMAIN_MAX_LEN+1], _path[URL_MAX_LEN+1]; char *url = _url, *scheme = _scheme, *domain = _domain, *path = _path; char *host, *port, *query; char *scheme_pointer; char *str1, *token; char *saveptr1, *saveptr2; size_t scheme_length; if (argc != 2) { fprintf(stderr, "Usage: %s url\n", argv[0]); exit(EXIT_FAILURE); } if (strlen(argv[1]) > URL_MAX_LEN) { fprintf(stderr, "url too long\n"); exit(EXIT_FAILURE); } /* 変数初期化 */ str1 = argv[1]; strcpy(url, argv[1]); strcpy(scheme, ""); strcpy(domain, ""); strcpy(path, "/"); host = ""; query = ""; port = ""; /* schemeが指定されているか確認 */ scheme_pointer = strstr(url, SCHEME_DELIMITER); scheme_length = (scheme_pointer == NULL)? 0 : scheme_pointer-url; /* schemeの設定 */ if(scheme_length > 0) { if (scheme_length > SCHEME_MAX_LEN) { fprintf(stderr, "scheme too long\n"); exit(EXIT_FAILURE); } strncpy(scheme, url, scheme_length); scheme[scheme_length] = '\0'; /* NULL文字を付け加える */ } str1 = (scheme_length == 0)? str1 : &str1[scheme_length+strlen(SCHEME_DELIMITER)]; /* domainの設定 */ token = strtok_r(str1, "/", &saveptr1); if (token == NULL) { fprintf(stderr, "domain is not listed \n"); exit(EXIT_FAILURE); } strcpy(domain, token); /* host */ token = strtok_r(token, ":", &saveptr2); host = token; /* port */ token = strtok_r(NULL, ":", &saveptr2); if (token != NULL) { port = token; } /* path */ token = strtok_r(NULL, "?", &saveptr1); if (token != NULL) { strcat(path, token); } /* query */ token = strtok_r(NULL, "", &saveptr1); if (token != NULL) { query = token; } printf("url : %s\n", url); printf("scheme : %s\n", scheme); printf("host : %s\n", host); printf("port : %s\n", port); printf("path : %s\n", path); printf("query : %s\n", query); exit(EXIT_SUCCESS); }
利用方法
$ ./url_parser "http://google.com" url : http://google.com scheme : http host : google.com port : path : / query : $ ./url parser "http://linuxjm.sourceforge.jp:80/cgi-bin/man.cgi?Pagename=test" url : http://linuxjm.sourceforge.jp:80/cgi-bin/man.cgi?Pagename=test scheme : http host : linuxjm.sourceforge.jp port : 80 path : /cgi-bin/man.cgi query : Pagename=test