diff options
author | Timo Teräs <timo.teras@iki.fi> | 2017-10-05 08:57:18 +0300 |
---|---|---|
committer | Timo Teräs <timo.teras@iki.fi> | 2017-10-05 16:58:09 +0300 |
commit | 9dc6278c7b4ef41810b264ca99b597634511bda1 (patch) | |
tree | 3a96db8a7bba4f91fd017618094860d7f82b0381 /libfetch | |
parent | 8f0938a056b3ccfc5b6971d9637e2188e639d8a3 (diff) | |
download | apk-tools-9dc6278c7b4ef41810b264ca99b597634511bda1.tar.gz apk-tools-9dc6278c7b4ef41810b264ca99b597634511bda1.tar.bz2 apk-tools-9dc6278c7b4ef41810b264ca99b597634511bda1.tar.xz apk-tools-9dc6278c7b4ef41810b264ca99b597634511bda1.zip |
import libfetch-2.38 from NetBSD
ftp://ftp.fu-berlin.de/unix/NetBSD/packages/current-src/pkgsrc/net/libfetch/files
libfetch comes (at least) in netbsd and freebsd flavors which
differing functionality. Alpine and Arch package netbsd one,
but it's not widely packaged across other distributions.
We need NetBSD version as it does not use funopen(3) which is not
supported in musl, and supports connection pooling.
FreeBSD seems to be the orignal and better maintained version
with support for SSL CAs, client certificate authentication,
proxy authentication, and improved http redirect handling.
So this imports NetBSD version, and future commits will pick up
the needed improvements from FreeBSD tree.
Incidentally, this also fixes #7857 and likes for good.
Diffstat (limited to 'libfetch')
-rw-r--r-- | libfetch/Makefile | 45 | ||||
-rw-r--r-- | libfetch/common.c | 1085 | ||||
-rw-r--r-- | libfetch/common.h | 147 | ||||
-rwxr-xr-x | libfetch/errlist.sh | 11 | ||||
-rw-r--r-- | libfetch/fetch.3 | 781 | ||||
-rw-r--r-- | libfetch/fetch.c | 627 | ||||
-rw-r--r-- | libfetch/fetch.cat3 | 526 | ||||
-rw-r--r-- | libfetch/fetch.h | 187 | ||||
-rw-r--r-- | libfetch/file.c | 265 | ||||
-rw-r--r-- | libfetch/ftp.c | 1310 | ||||
-rw-r--r-- | libfetch/ftp.errors | 48 | ||||
-rw-r--r-- | libfetch/http.c | 1552 | ||||
-rw-r--r-- | libfetch/http.errors | 46 |
13 files changed, 6630 insertions, 0 deletions
diff --git a/libfetch/Makefile b/libfetch/Makefile new file mode 100644 index 0000000..cfcef4b --- /dev/null +++ b/libfetch/Makefile @@ -0,0 +1,45 @@ +# $NetBSD: Makefile,v 1.8 2016/10/27 10:05:38 joerg Exp $ + +LIB= fetch +SRCS= fetch.c common.c ftp.c http.c file.c +DPSRCS= ftperr.h httperr.h +INCS= fetch.h +MAN= fetch.3 +CLEANFILES= ftperr.h httperr.h +MKLINT= no +MKPIC= no +MKPROFILE= no + +.include <bsd.own.mk> + +CPPFLAGS+= -I. +CPPFLAGS+= -D_LARGEFILE_SOURCE -D_LARGE_FILES -D_FILE_OFFSET_BITS=64 + +FETCH_WITH_INET6?= no +FETCH_WITH_OPENSSL?= no + +.if !empty(FETCH_WITH_INET6:M[yY][eE][sS]) +CPPFLAGS+= -DINET6 +.endif + +.if !empty(FETCH_WITH_OPENSSL:M[yY][eE][sS]) +CPPFLAGS+= -DWITH_SSL +LDADD= -lssl -lcrypto +.endif + +CPPFLAGS+= -DFTP_COMBINE_CWDS + +WARNS?= 4 + +ftp.o: ftperr.h +http.o: httperr.h + +ftperr.h: ${.CURDIR}/ftp.errors ${.CURDIR}/Makefile ${.CURDIR}/errlist.sh + sh ${.CURDIR}/errlist.sh ftp_errlist FTP \ + ${.CURDIR}/ftp.errors > ${.TARGET} + +httperr.h: ${.CURDIR}/http.errors ${.CURDIR}/Makefile ${.CURDIR}/errlist.sh + sh ${.CURDIR}/errlist.sh http_errlist HTTP \ + ${.CURDIR}/http.errors > ${.TARGET} + +.include <bsd.lib.mk> diff --git a/libfetch/common.c b/libfetch/common.c new file mode 100644 index 0000000..c1e1587 --- /dev/null +++ b/libfetch/common.c @@ -0,0 +1,1085 @@ +/* $NetBSD: common.c,v 1.31 2016/10/20 21:25:57 joerg Exp $ */ +/*- + * Copyright (c) 1998-2004 Dag-Erling Coïdan Smørgrav + * Copyright (c) 2008, 2010 Joerg Sonnenberger <joerg@NetBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD: common.c,v 1.53 2007/12/19 00:26:36 des Exp $ + */ + +#if HAVE_CONFIG_H +#include "config.h" +#endif +#ifndef NETBSD +#include <nbcompat.h> +#endif + +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/time.h> +#include <sys/uio.h> +#if HAVE_POLL_H +#include <poll.h> +#elif HAVE_SYS_POLL_H +#include <sys/poll.h> +#endif +#include <netinet/in.h> +#include <arpa/inet.h> + +#include <ctype.h> +#include <errno.h> +#if defined(HAVE_INTTYPES_H) || defined(NETBSD) +#include <inttypes.h> +#endif +#ifndef NETBSD +#include <nbcompat/netdb.h> +#else +#include <netdb.h> +#endif +#include <pwd.h> +#include <stdarg.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> + +#ifndef MSG_NOSIGNAL +#include <signal.h> +#endif + +#include "fetch.h" +#include "common.h" + +/*** Local data **************************************************************/ + +/* + * Error messages for resolver errors + */ +static struct fetcherr netdb_errlist[] = { +#ifdef EAI_NODATA + { EAI_NODATA, FETCH_RESOLV, "Host not found" }, +#endif + { EAI_AGAIN, FETCH_TEMP, "Transient resolver failure" }, + { EAI_FAIL, FETCH_RESOLV, "Non-recoverable resolver failure" }, + { EAI_NONAME, FETCH_RESOLV, "No address record" }, + { -1, FETCH_UNKNOWN, "Unknown resolver error" } +}; + +/*** Error-reporting functions ***********************************************/ + +/* + * Map error code to string + */ +static struct fetcherr * +fetch_finderr(struct fetcherr *p, int e) +{ + while (p->num != -1 && p->num != e) + p++; + return (p); +} + +/* + * Set error code + */ +void +fetch_seterr(struct fetcherr *p, int e) +{ + p = fetch_finderr(p, e); + fetchLastErrCode = p->cat; + snprintf(fetchLastErrString, MAXERRSTRING, "%s", p->string); +} + +/* + * Set error code according to errno + */ +void +fetch_syserr(void) +{ + switch (errno) { + case 0: + fetchLastErrCode = FETCH_OK; + break; + case EPERM: + case EACCES: + case EROFS: +#ifdef EAUTH + case EAUTH: +#endif +#ifdef ENEEDAUTH + case ENEEDAUTH: +#endif + fetchLastErrCode = FETCH_AUTH; + break; + case ENOENT: + case EISDIR: /* XXX */ + fetchLastErrCode = FETCH_UNAVAIL; + break; + case ENOMEM: + fetchLastErrCode = FETCH_MEMORY; + break; + case EBUSY: + case EAGAIN: + fetchLastErrCode = FETCH_TEMP; + break; + case EEXIST: + fetchLastErrCode = FETCH_EXISTS; + break; + case ENOSPC: + fetchLastErrCode = FETCH_FULL; + break; + case EADDRINUSE: + case EADDRNOTAVAIL: + case ENETDOWN: + case ENETUNREACH: + case ENETRESET: + case EHOSTUNREACH: + fetchLastErrCode = FETCH_NETWORK; + break; + case ECONNABORTED: + case ECONNRESET: + fetchLastErrCode = FETCH_ABORT; + break; + case ETIMEDOUT: + fetchLastErrCode = FETCH_TIMEOUT; + break; + case ECONNREFUSED: + case EHOSTDOWN: + fetchLastErrCode = FETCH_DOWN; + break; +default: + fetchLastErrCode = FETCH_UNKNOWN; + } + snprintf(fetchLastErrString, MAXERRSTRING, "%s", strerror(errno)); +} + + +/* + * Emit status message + */ +void +fetch_info(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + fputc('\n', stderr); +} + + +/*** Network-related utility functions ***************************************/ + +/* + * Return the default port for a scheme + */ +int +fetch_default_port(const char *scheme) +{ + struct servent *se; + + if ((se = getservbyname(scheme, "tcp")) != NULL) + return (ntohs(se->s_port)); + if (strcasecmp(scheme, SCHEME_FTP) == 0) + return (FTP_DEFAULT_PORT); + if (strcasecmp(scheme, SCHEME_HTTP) == 0) + return (HTTP_DEFAULT_PORT); + return (0); +} + +/* + * Return the default proxy port for a scheme + */ +int +fetch_default_proxy_port(const char *scheme) +{ + if (strcasecmp(scheme, SCHEME_FTP) == 0) + return (FTP_DEFAULT_PROXY_PORT); + if (strcasecmp(scheme, SCHEME_HTTP) == 0) + return (HTTP_DEFAULT_PROXY_PORT); + return (0); +} + + +/* + * Create a connection for an existing descriptor. + */ +conn_t * +fetch_reopen(int sd) +{ + conn_t *conn; + + /* allocate and fill connection structure */ + if ((conn = calloc(1, sizeof(*conn))) == NULL) + return (NULL); + conn->ftp_home = NULL; + conn->cache_url = NULL; + conn->next_buf = NULL; + conn->next_len = 0; + conn->sd = sd; + conn->buf_events = POLLIN; + return (conn); +} + + +/* + * Bind a socket to a specific local address + */ +int +fetch_bind(int sd, int af, const char *addr) +{ + struct addrinfo hints, *res, *res0; + + memset(&hints, 0, sizeof(hints)); + hints.ai_family = af; + hints.ai_socktype = SOCK_STREAM; + hints.ai_protocol = 0; + if (getaddrinfo(addr, NULL, &hints, &res0)) + return (-1); + for (res = res0; res; res = res->ai_next) { + if (bind(sd, res->ai_addr, res->ai_addrlen) == 0) + return (0); + } + return (-1); +} + + +/* + * Establish a TCP connection to the specified port on the specified host. + */ +conn_t * +fetch_connect(struct url *url, int af, int verbose) +{ + conn_t *conn; + char pbuf[10]; + const char *bindaddr; + struct addrinfo hints, *res, *res0; + int sd, error; + + if (verbose) + fetch_info("looking up %s", url->host); + + /* look up host name and set up socket address structure */ + snprintf(pbuf, sizeof(pbuf), "%d", url->port); + memset(&hints, 0, sizeof(hints)); + hints.ai_family = af; + hints.ai_socktype = SOCK_STREAM; + hints.ai_protocol = 0; + if ((error = getaddrinfo(url->host, pbuf, &hints, &res0)) != 0) { + netdb_seterr(error); + return (NULL); + } + bindaddr = getenv("FETCH_BIND_ADDRESS"); + + if (verbose) + fetch_info("connecting to %s:%d", url->host, url->port); + + /* try to connect */ + for (sd = -1, res = res0; res; sd = -1, res = res->ai_next) { + if ((sd = socket(res->ai_family, res->ai_socktype, + res->ai_protocol)) == -1) + continue; + if (bindaddr != NULL && *bindaddr != '\0' && + fetch_bind(sd, res->ai_family, bindaddr) != 0) { + fetch_info("failed to bind to '%s'", bindaddr); + close(sd); + continue; + } + if (connect(sd, res->ai_addr, res->ai_addrlen) == 0) + break; + close(sd); + } + freeaddrinfo(res0); + if (sd == -1) { + fetch_syserr(); + return (NULL); + } + + if ((conn = fetch_reopen(sd)) == NULL) { + fetch_syserr(); + close(sd); + return (NULL); + } + conn->cache_url = fetchCopyURL(url); + conn->cache_af = af; + return (conn); +} + +static conn_t *connection_cache; +static int cache_global_limit = 0; +static int cache_per_host_limit = 0; + +/* + * Initialise cache with the given limits. + */ +void +fetchConnectionCacheInit(int global_limit, int per_host_limit) +{ + + if (global_limit < 0) + cache_global_limit = INT_MAX; + else if (per_host_limit > global_limit) + cache_global_limit = per_host_limit; + else + cache_global_limit = global_limit; + if (per_host_limit < 0) + cache_per_host_limit = INT_MAX; + else + cache_per_host_limit = per_host_limit; +} + +/* + * Flush cache and free all associated resources. + */ +void +fetchConnectionCacheClose(void) +{ + conn_t *conn; + + while ((conn = connection_cache) != NULL) { + connection_cache = conn->next_cached; + (*conn->cache_close)(conn); + } +} + +/* + * Check connection cache for an existing entry matching + * protocol/host/port/user/password/family. + */ +conn_t * +fetch_cache_get(const struct url *url, int af) +{ + conn_t *conn, *last_conn = NULL; + + for (conn = connection_cache; conn; conn = conn->next_cached) { + if (conn->cache_url->port == url->port && + strcmp(conn->cache_url->scheme, url->scheme) == 0 && + strcmp(conn->cache_url->host, url->host) == 0 && + strcmp(conn->cache_url->user, url->user) == 0 && + strcmp(conn->cache_url->pwd, url->pwd) == 0 && + (conn->cache_af == AF_UNSPEC || af == AF_UNSPEC || + conn->cache_af == af)) { + if (last_conn != NULL) + last_conn->next_cached = conn->next_cached; + else + connection_cache = conn->next_cached; + return conn; + } + } + + return NULL; +} + +/* + * Put the connection back into the cache for reuse. + * If the connection is freed due to LRU or if the cache + * is explicitly closed, the given callback is called. + */ +void +fetch_cache_put(conn_t *conn, int (*closecb)(conn_t *)) +{ + conn_t *iter, *last; + int global_count, host_count; + + if (conn->cache_url == NULL || cache_global_limit == 0) { + (*closecb)(conn); + return; + } + + global_count = host_count = 0; + last = NULL; + for (iter = connection_cache; iter; + last = iter, iter = iter->next_cached) { + ++global_count; + if (strcmp(conn->cache_url->host, iter->cache_url->host) == 0) + ++host_count; + if (global_count < cache_global_limit && + host_count < cache_per_host_limit) + continue; + --global_count; + if (last != NULL) + last->next_cached = iter->next_cached; + else + connection_cache = iter->next_cached; + (*iter->cache_close)(iter); + } + + conn->cache_close = closecb; + conn->next_cached = connection_cache; + connection_cache = conn; +} + +/* + * Enable SSL on a connection. + */ +int +fetch_ssl(conn_t *conn, const struct url *URL, int verbose) +{ + +#ifdef WITH_SSL + /* Init the SSL library and context */ + if (!SSL_library_init()){ + fprintf(stderr, "SSL library init failed\n"); + return (-1); + } + + SSL_load_error_strings(); + + conn->ssl_meth = SSLv23_client_method(); + conn->ssl_ctx = SSL_CTX_new(conn->ssl_meth); + SSL_CTX_set_mode(conn->ssl_ctx, SSL_MODE_AUTO_RETRY); + + conn->ssl = SSL_new(conn->ssl_ctx); + if (conn->ssl == NULL){ + fprintf(stderr, "SSL context creation failed\n"); + return (-1); + } + conn->buf_events = 0; + SSL_set_fd(conn->ssl, conn->sd); +#if OPENSSL_VERSION_NUMBER >= 0x0090806fL && !defined(OPENSSL_NO_TLSEXT) + if (!SSL_set_tlsext_host_name(conn->ssl, (char *)(uintptr_t)URL->host)) { + fprintf(stderr, + "TLS server name indication extension failed for host %s\n", + URL->host); + return (-1); + } +#endif + if (SSL_connect(conn->ssl) == -1){ + ERR_print_errors_fp(stderr); + return (-1); + } + + if (verbose) { + X509_NAME *name; + char *str; + + fprintf(stderr, "SSL connection established using %s\n", + SSL_get_cipher(conn->ssl)); + conn->ssl_cert = SSL_get_peer_certificate(conn->ssl); + name = X509_get_subject_name(conn->ssl_cert); + str = X509_NAME_oneline(name, 0, 0); + printf("Certificate subject: %s\n", str); + free(str); + name = X509_get_issuer_name(conn->ssl_cert); + str = X509_NAME_oneline(name, 0, 0); + printf("Certificate issuer: %s\n", str); + free(str); + } + + return (0); +#else + (void)conn; + (void)verbose; + fprintf(stderr, "SSL support disabled\n"); + return (-1); +#endif +} + +static int +compute_timeout(const struct timeval *tv) +{ + struct timeval cur; + int timeout; + + gettimeofday(&cur, NULL); + timeout = (tv->tv_sec - cur.tv_sec) * 1000 + (tv->tv_usec - cur.tv_usec) / 1000; + return timeout; +} + +/* + * Read a character from a connection w/ timeout + */ +ssize_t +fetch_read(conn_t *conn, char *buf, size_t len) +{ + struct timeval timeout_end; + struct pollfd pfd; + int timeout_cur; + ssize_t rlen; + int r; + + if (len == 0) + return 0; + + if (conn->next_len != 0) { + if (conn->next_len < len) + len = conn->next_len; + memmove(buf, conn->next_buf, len); + conn->next_len -= len; + conn->next_buf += len; + return len; + } + + if (fetchTimeout) { + gettimeofday(&timeout_end, NULL); + timeout_end.tv_sec += fetchTimeout; + } + + pfd.fd = conn->sd; + for (;;) { + pfd.events = conn->buf_events; + if (fetchTimeout && pfd.events) { + do { + timeout_cur = compute_timeout(&timeout_end); + if (timeout_cur < 0) { + errno = ETIMEDOUT; + fetch_syserr(); + return (-1); + } + errno = 0; + r = poll(&pfd, 1, timeout_cur); + if (r == -1) { + if (errno == EINTR && fetchRestartCalls) + continue; + fetch_syserr(); + return (-1); + } + } while (pfd.revents == 0); + } +#ifdef WITH_SSL + if (conn->ssl != NULL) { + rlen = SSL_read(conn->ssl, buf, len); + if (rlen == -1) { + switch (SSL_get_error(conn->ssl, rlen)) { + case SSL_ERROR_WANT_READ: + conn->buf_events = POLLIN; + break; + case SSL_ERROR_WANT_WRITE: + conn->buf_events = POLLOUT; + break; + default: + errno = EIO; + fetch_syserr(); + return -1; + } + } else { + /* Assume buffering on the SSL layer. */ + conn->buf_events = 0; + } + } else +#endif + rlen = read(conn->sd, buf, len); + if (rlen >= 0) + break; + + if (errno != EINTR || !fetchRestartCalls) + return (-1); + } + return (rlen); +} + + +/* + * Read a line of text from a connection w/ timeout + */ +#define MIN_BUF_SIZE 1024 + +int +fetch_getln(conn_t *conn) +{ + char *tmp, *next; + size_t tmpsize; + ssize_t len; + + if (conn->buf == NULL) { + if ((conn->buf = malloc(MIN_BUF_SIZE)) == NULL) { + errno = ENOMEM; + return (-1); + } + conn->bufsize = MIN_BUF_SIZE; + } + + conn->buflen = 0; + next = NULL; + + do { + /* + * conn->bufsize != conn->buflen at this point, + * so the buffer can be NUL-terminated below for + * the case of len == 0. + */ + len = fetch_read(conn, conn->buf + conn->buflen, + conn->bufsize - conn->buflen); + if (len == -1) + return (-1); + if (len == 0) + break; + next = memchr(conn->buf + conn->buflen, '\n', len); + conn->buflen += len; + if (conn->buflen == conn->bufsize && next == NULL) { + tmp = conn->buf; + tmpsize = conn->bufsize * 2; + if (tmpsize < conn->bufsize) { + errno = ENOMEM; + return (-1); + } + if ((tmp = realloc(tmp, tmpsize)) == NULL) { + errno = ENOMEM; + return (-1); + } + conn->buf = tmp; + conn->bufsize = tmpsize; + } + } while (next == NULL); + + if (next != NULL) { + *next = '\0'; + conn->next_buf = next + 1; + conn->next_len = conn->buflen - (conn->next_buf - conn->buf); + conn->buflen = next - conn->buf; + } else { + conn->buf[conn->buflen] = '\0'; + conn->next_len = 0; + } + return (0); +} + +/* + * Write a vector to a connection w/ timeout + * Note: can modify the iovec. + */ +ssize_t +fetch_write(conn_t *conn, const void *buf, size_t len) +{ + struct timeval now, timeout, waittv; + fd_set writefds; + ssize_t wlen, total; + int r; +#ifndef MSG_NOSIGNAL + static int killed_sigpipe; +#endif + +#ifndef MSG_NOSIGNAL + if (!killed_sigpipe) { + signal(SIGPIPE, SIG_IGN); + killed_sigpipe = 1; + } +#endif + + + if (fetchTimeout) { + FD_ZERO(&writefds); + gettimeofday(&timeout, NULL); + timeout.tv_sec += fetchTimeout; + } + + total = 0; + while (len) { + while (fetchTimeout && !FD_ISSET(conn->sd, &writefds)) { + FD_SET(conn->sd, &writefds); + gettimeofday(&now, NULL); + waittv.tv_sec = timeout.tv_sec - now.tv_sec; + waittv.tv_usec = timeout.tv_usec - now.tv_usec; + if (waittv.tv_usec < 0) { + waittv.tv_usec += 1000000; + waittv.tv_sec--; + } + if (waittv.tv_sec < 0) { + errno = ETIMEDOUT; + fetch_syserr(); + return (-1); + } + errno = 0; + r = select(conn->sd + 1, NULL, &writefds, NULL, &waittv); + if (r == -1) { + if (errno == EINTR && fetchRestartCalls) + continue; + return (-1); + } + } + errno = 0; +#ifdef WITH_SSL + if (conn->ssl != NULL) + wlen = SSL_write(conn->ssl, buf, len); + else +#endif +#ifndef MSG_NOSIGNAL + wlen = send(conn->sd, buf, len, 0); +#else + wlen = send(conn->sd, buf, len, MSG_NOSIGNAL); +#endif + if (wlen == 0) { + /* we consider a short write a failure */ + errno = EPIPE; + fetch_syserr(); + return (-1); + } + if (wlen < 0) { + if (errno == EINTR && fetchRestartCalls) + continue; + return (-1); + } + total += wlen; + buf = (const char *)buf + wlen; + len -= wlen; + } + return (total); +} + + +/* + * Close connection + */ +int +fetch_close(conn_t *conn) +{ + int ret; + + ret = close(conn->sd); + if (conn->cache_url) + fetchFreeURL(conn->cache_url); + free(conn->ftp_home); + free(conn->buf); + free(conn); + return (ret); +} + + +/*** Directory-related utility functions *************************************/ + +int +fetch_add_entry(struct url_list *ue, struct url *base, const char *name, + int pre_quoted) +{ + struct url *tmp; + char *tmp_name; + size_t base_doc_len, name_len, i; + unsigned char c; + + if (strchr(name, '/') != NULL || + strcmp(name, "..") == 0 || + strcmp(name, ".") == 0) + return 0; + + if (strcmp(base->doc, "/") == 0) + base_doc_len = 0; + else + base_doc_len = strlen(base->doc); + + name_len = 1; + for (i = 0; name[i] != '\0'; ++i) { + if ((!pre_quoted && name[i] == '%') || + !fetch_urlpath_safe(name[i])) + name_len += 3; + else + ++name_len; + } + + tmp_name = malloc( base_doc_len + name_len + 1); + if (tmp_name == NULL) { + errno = ENOMEM; + fetch_syserr(); + return (-1); + } + + if (ue->length + 1 >= ue->alloc_size) { + tmp = realloc(ue->urls, (ue->alloc_size * 2 + 1) * sizeof(*tmp)); + if (tmp == NULL) { + free(tmp_name); + errno = ENOMEM; + fetch_syserr(); + return (-1); + } + ue->alloc_size = ue->alloc_size * 2 + 1; + ue->urls = tmp; + } + + tmp = ue->urls + ue->length; + strcpy(tmp->scheme, base->scheme); + strcpy(tmp->user, base->user); + strcpy(tmp->pwd, base->pwd); + strcpy(tmp->host, base->host); + tmp->port = base->port; + tmp->doc = tmp_name; + memcpy(tmp->doc, base->doc, base_doc_len); + tmp->doc[base_doc_len] = '/'; + + for (i = base_doc_len + 1; *name != '\0'; ++name) { + if ((!pre_quoted && *name == '%') || + !fetch_urlpath_safe(*name)) { + tmp->doc[i++] = '%'; + c = (unsigned char)*name / 16; + if (c < 10) + tmp->doc[i++] = '0' + c; + else + tmp->doc[i++] = 'a' - 10 + c; + c = (unsigned char)*name % 16; + if (c < 10) + tmp->doc[i++] = '0' + c; + else + tmp->doc[i++] = 'a' - 10 + c; + } else { + tmp->doc[i++] = *name; + } + } + tmp->doc[i] = '\0'; + + tmp->offset = 0; + tmp->length = 0; + tmp->last_modified = -1; + + ++ue->length; + + return (0); +} + +void +fetchInitURLList(struct url_list *ue) +{ + ue->length = ue->alloc_size = 0; + ue->urls = NULL; +} + +int +fetchAppendURLList(struct url_list *dst, const struct url_list *src) +{ + size_t i, j, len; + + len = dst->length + src->length; + if (len > dst->alloc_size) { + struct url *tmp; + + tmp = realloc(dst->urls, len * sizeof(*tmp)); + if (tmp == NULL) { + errno = ENOMEM; + fetch_syserr(); + return (-1); + } + dst->alloc_size = len; + dst->urls = tmp; + } + + for (i = 0, j = dst->length; i < src->length; ++i, ++j) { + dst->urls[j] = src->urls[i]; + dst->urls[j].doc = strdup(src->urls[i].doc); + if (dst->urls[j].doc == NULL) { + while (i-- > 0) + free(dst->urls[j].doc); + fetch_syserr(); + return -1; + } + } + dst->length = len; + + return 0; +} + +void +fetchFreeURLList(struct url_list *ue) +{ + size_t i; + + for (i = 0; i < ue->length; ++i) + free(ue->urls[i].doc); + free(ue->urls); + ue->length = ue->alloc_size = 0; +} + + +/*** Authentication-related utility functions ********************************/ + +static const char * +fetch_read_word(FILE *f) +{ + static char word[1024]; + + if (fscanf(f, " %1023s ", word) != 1) + return (NULL); + return (word); +} + +/* + * Get authentication data for a URL from .netrc + */ +int +fetch_netrc_auth(struct url *url) +{ + char fn[PATH_MAX]; + const char *word; + char *p; + FILE *f; + + if ((p = getenv("NETRC")) != NULL) { + if (snprintf(fn, sizeof(fn), "%s", p) >= (int)sizeof(fn)) { + fetch_info("$NETRC specifies a file name " + "longer than PATH_MAX"); + return (-1); + } + } else { + if ((p = getenv("HOME")) != NULL) { + struct passwd *pwd; + + if ((pwd = getpwuid(getuid())) == NULL || + (p = pwd->pw_dir) == NULL) + return (-1); + } + if (snprintf(fn, sizeof(fn), "%s/.netrc", p) >= (int)sizeof(fn)) + return (-1); + } + + if ((f = fopen(fn, "r")) == NULL) + return (-1); + while ((word = fetch_read_word(f)) != NULL) { + if (strcmp(word, "default") == 0) + break; + if (strcmp(word, "machine") == 0 && + (word = fetch_read_word(f)) != NULL && + strcasecmp(word, url->host) == 0) { + break; + } + } + if (word == NULL) + goto ferr; + while ((word = fetch_read_word(f)) != NULL) { + if (strcmp(word, "login") == 0) { + if ((word = fetch_read_word(f)) == NULL) + goto ferr; + if (snprintf(url->user, sizeof(url->user), + "%s", word) > (int)sizeof(url->user)) { + fetch_info("login name in .netrc is too long"); + url->user[0] = '\0'; + } + } else if (strcmp(word, "password") == 0) { + if ((word = fetch_read_word(f)) == NULL) + goto ferr; + if (snprintf(url->pwd, sizeof(url->pwd), + "%s", word) > (int)sizeof(url->pwd)) { + fetch_info("password in .netrc is too long"); + url->pwd[0] = '\0'; + } + } else if (strcmp(word, "account") == 0) { + if ((word = fetch_read_word(f)) == NULL) + goto ferr; + /* XXX not supported! */ + } else { + break; + } + } + fclose(f); + return (0); + ferr: + fclose(f); + return (-1); +} + +/* + * The no_proxy environment variable specifies a set of domains for + * which the proxy should not be consulted; the contents is a comma-, + * or space-separated list of domain names. A single asterisk will + * override all proxy variables and no transactions will be proxied + * (for compatability with lynx and curl, see the discussion at + * <http://curl.haxx.se/mail/archive_pre_oct_99/0009.html>). + */ +int +fetch_no_proxy_match(const char *host) +{ + const char *no_proxy, *p, *q; + size_t h_len, d_len; + + if ((no_proxy = getenv("NO_PROXY")) == NULL && + (no_proxy = getenv("no_proxy")) == NULL) + return (0); + + /* asterisk matches any hostname */ + if (strcmp(no_proxy, "*") == 0) + return (1); + + h_len = strlen(host); + p = no_proxy; + do { + /* position p at the beginning of a domain suffix */ + while (*p == ',' || isspace((unsigned char)*p)) + p++; + + /* position q at the first separator character */ + for (q = p; *q; ++q) + if (*q == ',' || isspace((unsigned char)*q)) + break; + + d_len = q - p; + if (d_len > 0 && h_len > d_len && + strncasecmp(host + h_len - d_len, + p, d_len) == 0) { + /* domain name matches */ + return (1); + } + + p = q + 1; + } while (*q); + + return (0); +} + +struct fetchIO { + void *io_cookie; + ssize_t (*io_read)(void *, void *, size_t); + ssize_t (*io_write)(void *, const void *, size_t); + void (*io_close)(void *); +}; + +void +fetchIO_close(fetchIO *f) +{ + if (f->io_close != NULL) + (*f->io_close)(f->io_cookie); + + free(f); +} + +fetchIO * +fetchIO_unopen(void *io_cookie, ssize_t (*io_read)(void *, void *, size_t), + ssize_t (*io_write)(void *, const void *, size_t), + void (*io_close)(void *)) +{ + fetchIO *f; + + f = malloc(sizeof(*f)); + if (f == NULL) + return f; + + f->io_cookie = io_cookie; + f->io_read = io_read; + f->io_write = io_write; + f->io_close = io_close; + + return f; +} + +ssize_t +fetchIO_read(fetchIO *f, void *buf, size_t len) +{ + if (f->io_read == NULL) + return EBADF; + return (*f->io_read)(f->io_cookie, buf, len); +} + +ssize_t +fetchIO_write(fetchIO *f, const void *buf, size_t len) +{ + if (f->io_read == NULL) + return EBADF; + return (*f->io_write)(f->io_cookie, buf, len); +} diff --git a/libfetch/common.h b/libfetch/common.h new file mode 100644 index 0000000..9a07e35 --- /dev/null +++ b/libfetch/common.h @@ -0,0 +1,147 @@ +/* $NetBSD: common.h,v 1.24 2016/10/20 21:25:57 joerg Exp $ */ +/*- + * Copyright (c) 1998-2004 Dag-Erling Coïdan Smørgrav + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD: common.h,v 1.30 2007/12/18 11:03:07 des Exp $ + */ + +#ifndef _COMMON_H_INCLUDED +#define _COMMON_H_INCLUDED + +#define FTP_DEFAULT_PORT 21 +#define HTTP_DEFAULT_PORT 80 +#define FTP_DEFAULT_PROXY_PORT 21 +#define HTTP_DEFAULT_PROXY_PORT 3128 + +#ifdef WITH_SSL +#include <openssl/crypto.h> +#include <openssl/x509.h> +#include <openssl/pem.h> +#include <openssl/ssl.h> +#include <openssl/err.h> +#endif + +#if defined(__GNUC__) && __GNUC__ >= 3 +#define LIBFETCH_PRINTFLIKE(fmtarg, firstvararg) \ + __attribute__((__format__ (__printf__, fmtarg, firstvararg))) +#else +#define LIBFETCH_PRINTFLIKE(fmtarg, firstvararg) +#endif + +#if !defined(__sun) && !defined(__hpux) && !defined(__INTERIX) && \ + !defined(__digital__) && !defined(__linux) && !defined(__MINT__) && \ + !defined(__sgi) && !defined(__minix) && !defined(__CYGWIN__) +#define HAVE_SA_LEN +#endif + +/* Connection */ +typedef struct fetchconn conn_t; + +struct fetchconn { + int sd; /* socket descriptor */ + char *buf; /* buffer */ + size_t bufsize; /* buffer size */ + size_t buflen; /* length of buffer contents */ + int buf_events; /* poll flags for the next cycle */ + char *next_buf; /* pending buffer, e.g. after getln */ + size_t next_len; /* size of pending buffer */ + int err; /* last protocol reply code */ +#ifdef WITH_SSL + SSL *ssl; /* SSL handle */ + SSL_CTX *ssl_ctx; /* SSL context */ + X509 *ssl_cert; /* server certificate */ +# if OPENSSL_VERSION_NUMBER < 0x00909000L + SSL_METHOD *ssl_meth; /* SSL method */ +# else + const SSL_METHOD *ssl_meth; /* SSL method */ +# endif +#endif + + char *ftp_home; + + struct url *cache_url; + int cache_af; + int (*cache_close)(conn_t *); + conn_t *next_cached; +}; + +/* Structure used for error message lists */ +struct fetcherr { + const int num; + const int cat; + const char *string; +}; + +void fetch_seterr(struct fetcherr *, int); +void fetch_syserr(void); +void fetch_info(const char *, ...) LIBFETCH_PRINTFLIKE(1, 2); +int fetch_default_port(const char *); +int fetch_default_proxy_port(const char *); +int fetch_bind(int, int, const char *); +conn_t *fetch_cache_get(const struct url *, int); +void fetch_cache_put(conn_t *, int (*)(conn_t *)); +conn_t *fetch_connect(struct url *, int, int); +conn_t *fetch_reopen(int); +int fetch_ssl(conn_t *, const struct url *, int); +ssize_t fetch_read(conn_t *, char *, size_t); +int fetch_getln(conn_t *); +ssize_t fetch_write(conn_t *, const void *, size_t); +int fetch_close(conn_t *); +int fetch_add_entry(struct url_list *, struct url *, const char *, int); +int fetch_netrc_auth(struct url *url); +int fetch_no_proxy_match(const char *); +int fetch_urlpath_safe(char); + +#define ftp_seterr(n) fetch_seterr(ftp_errlist, n) +#define http_seterr(n) fetch_seterr(http_errlist, n) +#define netdb_seterr(n) fetch_seterr(netdb_errlist, n) +#define url_seterr(n) fetch_seterr(url_errlist, n) + +fetchIO *fetchIO_unopen(void *, ssize_t (*)(void *, void *, size_t), + ssize_t (*)(void *, const void *, size_t), void (*)(void *)); + +/* + * I don't really like exporting http_request() and ftp_request(), + * but the HTTP and FTP code occasionally needs to cross-call + * eachother, and this saves me from adding a lot of special-case code + * to handle those cases. + * + * Note that _*_request() free purl, which is way ugly but saves us a + * whole lot of trouble. + */ +fetchIO *http_request(struct url *, const char *, + struct url_stat *, struct url *, const char *); +fetchIO *ftp_request(struct url *, const char *, const char *, + struct url_stat *, struct url *, const char *); + + +/* + * Check whether a particular flag is set + */ +#define CHECK_FLAG(x) (flags && strchr(flags, (x))) + +#endif diff --git a/libfetch/errlist.sh b/libfetch/errlist.sh new file mode 100755 index 0000000..84779fe --- /dev/null +++ b/libfetch/errlist.sh @@ -0,0 +1,11 @@ +#!/bin/sh +# $NetBSD: errlist.sh,v 1.2 2008/10/06 12:58:29 joerg Exp $ + +printf "static struct fetcherr $1[] = {\n" +while read code type msg; do + [ "${code}" = "#" ] && continue + printf "\t{ ${code}, FETCH_${type}, \"${msg}\" },\n" +done < $3 + +printf "\t{ -1, FETCH_UNKNOWN, \"Unknown $2 error\" }\n" +printf "};\n" diff --git a/libfetch/fetch.3 b/libfetch/fetch.3 new file mode 100644 index 0000000..bb58071 --- /dev/null +++ b/libfetch/fetch.3 @@ -0,0 +1,781 @@ +.\"- +.\" Copyright (c) 1998-2004 Dag-Erling Coïdan Smørgrav +.\" Copyright (c) 2010 Joerg Sonnenberger <joerg@NetBSD.org> +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD: fetch.3,v 1.64 2007/12/18 11:03:26 des Exp $ +.\" $NetBSD: fetch.3,v 1.17 2016/05/31 18:02:36 abhinav Exp $ +.\" +.Dd January 22, 2010 +.Dt FETCH 3 +.Os +.Sh NAME +.Nm fetchMakeURL , +.Nm fetchParseURL , +.Nm fetchCopyURL , +.Nm fetchFreeURL , +.Nm fetchXGetURL , +.Nm fetchGetURL , +.Nm fetchPutURL , +.Nm fetchStatURL , +.Nm fetchListURL , +.Nm fetchXGet , +.Nm fetchGet , +.Nm fetchPut , +.Nm fetchStat , +.Nm fetchList , +.Nm fetchXGetFile , +.Nm fetchGetFile , +.Nm fetchPutFile , +.Nm fetchStatFile , +.Nm fetchListFile , +.Nm fetchXGetHTTP , +.Nm fetchGetHTTP , +.Nm fetchPutHTTP , +.Nm fetchStatHTTP , +.Nm fetchListHTTP , +.Nm fetchXGetFTP , +.Nm fetchGetFTP , +.Nm fetchPutFTP , +.Nm fetchStatFTP , +.Nm fetchListFTP +.Nm fetchInitURLList , +.Nm fetchFreeURLList , +.Nm fetchUnquotePath , +.Nm fetchUnquoteFilename , +.Nm fetchStringifyURL , +.Nm fetchConnectionCacheInit , +.Nm fetchConnectionCacheClose , +.Nm fetch +.Nd file transfer functions +.Sh LIBRARY +.Lb libfetch +.Sh SYNOPSIS +.In stdio.h +.In fetch.h +.Ft struct url * +.Fn fetchMakeURL "const char *scheme" "const char *host" "int port" "const char *doc" "const char *user" "const char *pwd" +.Ft struct url * +.Fn fetchParseURL "const char *URL" +.Ft struct url * +.Fn fetchCopyURL "const struct url *u" +.Ft void +.Fn fetchFreeURL "struct url *u" +.Ft fetchIO * +.Fn fetchXGetURL "const char *URL" "struct url_stat *us" "const char *flags" +.Ft fetchIO * +.Fn fetchGetURL "const char *URL" "const char *flags" +.Ft fetchIO * +.Fn fetchPutURL "const char *URL" "const char *flags" +.Ft int +.Fn fetchStatURL "const char *URL" "struct url_stat *us" "const char *flags" +.Ft int +.Fn fetchListURL "struct url_list *list" "const char *URL" "const char *pattern" "const char *flags" +.Ft fetchIO * +.Fn fetchXGet "struct url *u" "struct url_stat *us" "const char *flags" +.Ft fetchIO * +.Fn fetchGet "struct url *u" "const char *flags" +.Ft fetchIO * +.Fn fetchPut "struct url *u" "const char *flags" +.Ft int +.Fn fetchStat "struct url *u" "struct url_stat *us" "const char *flags" +.Ft int +.Fn fetchList "struct url_list *list" "struct url *u" "const char *pattern" "const char *flags" +.Ft fetchIO * +.Fn fetchXGetFile "struct url *u" "struct url_stat *us" "const char *flags" +.Ft fetchIO * +.Fn fetchGetFile "struct url *u" "const char *flags" +.Ft fetchIO * +.Fn fetchPutFile "struct url *u" "const char *flags" +.Ft int +.Fn fetchStatFile "struct url *u" "struct url_stat *us" "const char *flags" +.Ft int +.Fn fetchListFile "struct url_list *list" "struct url *u" "const char *pattern" "const char *flags" +.Ft fetchIO * +.Fn fetchXGetHTTP "struct url *u" "struct url_stat *us" "const char *flags" +.Ft fetchIO * +.Fn fetchGetHTTP "struct url *u" "const char *flags" +.Ft fetchIO * +.Fn fetchPutHTTP "struct url *u" "const char *flags" +.Ft int +.Fn fetchStatHTTP "struct url *u" "struct url_stat *us" "const char *flags" +.Ft int +.Fn fetchListHTTP "struct url_list *list" "struct url *u" "const char *pattern" "const char *flags" +.Ft fetchIO * +.Fn fetchXGetFTP "struct url *u" "struct url_stat *us" "const char *flags" +.Ft fetchIO * +.Fn fetchGetFTP "struct url *u" "const char *flags" +.Ft fetchIO * +.Fn fetchPutFTP "struct url *u" "const char *flags" +.Ft int +.Fn fetchStatFTP "struct url *u" "struct url_stat *us" "const char *flags" +.Ft int +.Fn fetchListFTP "struct url_list *list" "struct url *u" "const char *pattern" "const char *flags" +.Ft void +.Fn fetchInitURLList "struct url_list *ul" +.Ft int +.Fn fetchAppendURLList "struct url_list *dst" "const struct url_list *src" +.Ft void +.Fn fetchFreeURLList "struct url_list *ul" +.Ft char * +.Fn fetchUnquotePath "struct url *u" +.Ft char * +.Fn fetchUnquoteFilename "struct url *u" +.Ft char * +.Fn fetchStringifyURL "const struct url *u" +.Ft void +.Fn fetchConnectionCacheInit "int global" "int per_host" +.Ft void +.Fn fetchConnectionCacheClose "void" +.Sh DESCRIPTION +These functions implement a high-level library for retrieving and +uploading files using Uniform Resource Locators (URLs). +.Pp +.Fn fetchParseURL +takes a URL in the form of a null-terminated string and splits it into +its components function according to the Common Internet Scheme Syntax +detailed in RFC 1738. +A regular expression which produces this syntax is: +.Bd -literal -offset indent +\*[Lt]scheme\*[Gt]:(//(\*[Lt]user\*[Gt](:\*[Lt]pwd\*[Gt])?@)?\*[Lt]host\*[Gt](:\*[Lt]port\*[Gt])?)?/(\*[Lt]doc\*[Gt])? +.Ed +.Pp +If the URL does not seem to begin with a scheme name, it is assumed to be a local path. +Only absolute path names are accepted. +.Pp +Note that some components of the URL are not necessarily relevant to +all URL schemes. +For instance, the file scheme only needs the +.Aq scheme +and +.Aq doc +components. +.Fn fetchParseURL +quotes any unsafe character in the URL automatically. +This is not done by +.Fn fetchMakeURL . +.Fn fetchCopyURL +copies an existing +.Vt url +structure. +.Pp +.Fn fetchMakeURL , +.Fn fetchParseURL , +and +.Fn fetchCopyURL +return a pointer to a +.Vt url +structure, which is defined as follows in +.In fetch.h : +.Bd -literal +#define URL_SCHEMELEN 16 +#define URL_USERLEN 256 +#define URL_PWDLEN 256 +#define URL_HOSTLEN 255 + +struct url { + char scheme[URL_SCHEMELEN + 1]; + char user[URL_USERLEN + 1]; + char pwd[URL_PWDLEN + 1]; + char host[URL_HOSTLEN + 1]; + int port; + char *doc; + off_t offset; + size_t length; + time_t last_modified; +}; +.Ed +.Pp +The pointer returned by +.Fn fetchMakeURL , +.Fn fetchCopyURL , +and +.Fn fetchParseURL +should be freed using +.Fn fetchFreeURL . +The size of +.Vt struct URL +is not part of the ABI. +.Pp +.Fn fetchXGetURL , +.Fn fetchGetURL , +and +.Fn fetchPutURL +constitute the recommended interface to the +.Nm fetch +library. +They examine the URL passed to them to determine the transfer +method, and call the appropriate lower-level functions to perform the +actual transfer. +.Fn fetchXGetURL +also returns the remote document's metadata in the +.Vt url_stat +structure pointed to by the +.Fa us +argument. +.Pp +The +.Fa flags +argument is a string of characters which specify transfer options. +The +meaning of the individual flags is scheme-dependent, and is detailed +in the appropriate section below. +.Pp +.Fn fetchStatURL +attempts to obtain the requested document's metadata and fill in the +structure pointed to by its second argument. +The +.Vt url_stat +structure is defined as follows in +.In fetch.h : +.Bd -literal +struct url_stat { + off_t size; + time_t atime; + time_t mtime; +}; +.Ed +.Pp +If the size could not be obtained from the server, the +.Fa size +field is set to \-1. +If the modification time could not be obtained from the server, the +.Fa mtime +field is set to the epoch. +If the access time could not be obtained from the server, the +.Fa atime +field is set to the modification time. +.Pp +.Fn fetchListURL +attempts to list the contents of the directory pointed to by the URL provided. +The pattern can be a simple glob-like expression as hint. +Callers should not depend on the server to filter names. +If successful, it appends the list of entries to the +.Vt url_list +structure. +The +.Vt url_list +structure is defined as follows in +.In fetch.h : +.Bd -literal +struct url_list { + size_t length; + size_t alloc_size; + struct url *urls; +}; +.Ed +.Pp +The list should be initialized by calling +.Fn fetchInitURLList +and the entries be freed by calling +.Fn fetchFreeURLList . +The function +.Fn fetchAppendURLList +can be used to append one URL lists to another. +If the +.Ql c +(cache result) flag is specified, the library is allowed to internally +cache the result. +.Pp +.Fn fetchStringifyURL +returns the URL as string. +.Fn fetchUnquotePath +returns the path name part of the URL with any quoting undone. +Query arguments and fragment identifiers are not included. +.Fn fetchUnquoteFilename +returns the last component of the path name as returned by +.Fn fetchUnquotePath . +.Fn fetchStringifyURL , +.Fn fetchUnquotePath , +and +.Fn fetchUnquoteFilename +return a string that should be deallocated with +.Fn free +after use. +.Pp +.Fn fetchConnectionCacheInit +enables the connection cache. +The first argument specifies the global limit on cached connections. +The second argument specifies the host limit. +Entries are considered to specify the same host, if the host name +from the URL is identical, indepent of the address or address family. +.Fn fetchConnectionCacheClose +flushed the connection cache and closes all cached connections. +.Pp +.Fn fetchXGet , +.Fn fetchGet , +.Fn fetchPut , +and +.Fn fetchStat +are similar to +.Fn fetchXGetURL , +.Fn fetchGetURL , +.Fn fetchPutURL , +and +.Fn fetchStatURL , +except that they expect a pre-parsed URL in the form of a pointer to +a +.Vt struct url +rather than a string. +.Pp +All of the +.Fn fetchXGetXXX , +.Fn fetchGetXXX , +and +.Fn fetchPutXXX +functions return a pointer to a stream which can be used to read or +write data from or to the requested document, respectively. +Note that +although the implementation details of the individual access methods +vary, it can generally be assumed that a stream returned by one of the +.Fn fetchXGetXXX +or +.Fn fetchGetXXX +functions is read-only, and that a stream returned by one of the +.Fn fetchPutXXX +functions is write-only. +.Sh PROTOCOL INDEPENDENT FLAGS +If the +.Ql i +(if-modified-since) flag is specified, the library will try to fetch +the content only if it is newer than +.Va last_modified . +For HTTP an +.Li If-Modified-Since +HTTP header is sent. +For FTP a +.Li MTDM +command is sent first and compared locally. +For FILE the source file is compared. +.Sh FILE SCHEME +.Fn fetchXGetFile , +.Fn fetchGetFile , +and +.Fn fetchPutFile +provide access to documents which are files in a locally mounted file +system. +Only the +.Aq document +component of the URL is used. +.Pp +.Fn fetchXGetFile +and +.Fn fetchGetFile +do not accept any flags. +.Pp +.Fn fetchPutFile +accepts the +.Ql a +(append to file) flag. +If that flag is specified, the data written to +the stream returned by +.Fn fetchPutFile +will be appended to the previous contents of the file, instead of +replacing them. +.Sh FTP SCHEME +.Fn fetchXGetFTP , +.Fn fetchGetFTP , +and +.Fn fetchPutFTP +implement the FTP protocol as described in RFC 959. +.Pp +By default +.Nm libfetch +will attempt to use passive mode first and only fallback to active mode +if the server reports a syntax error. +If the +.Ql a +(active) flag is specified, a passive connection is not tried and active mode +is used directly. +.Pp +If the +.Ql l +(low) flag is specified, data sockets will be allocated in the low (or +default) port range instead of the high port range (see +.Xr ip 4 ) . +.Pp +If the +.Ql d +(direct) flag is specified, +.Fn fetchXGetFTP , +.Fn fetchGetFTP , +and +.Fn fetchPutFTP +will use a direct connection even if a proxy server is defined. +.Pp +If no user name or password is given, the +.Nm fetch +library will attempt an anonymous login, with user name "anonymous" +and password "anonymous@\*[Lt]hostname\*[Gt]". +.Sh HTTP SCHEME +The +.Fn fetchXGetHTTP , +.Fn fetchGetHTTP , +and +.Fn fetchPutHTTP +functions implement the HTTP/1.1 protocol. +With a little luck, there is +even a chance that they comply with RFC 2616 and RFC 2617. +.Pp +If the +.Ql d +(direct) flag is specified, +.Fn fetchXGetHTTP , +.Fn fetchGetHTTP , +and +.Fn fetchPutHTTP +will use a direct connection even if a proxy server is defined. +.Pp +Since there seems to be no good way of implementing the HTTP PUT +method in a manner consistent with the rest of the +.Nm fetch +library, +.Fn fetchPutHTTP +is currently unimplemented. +.Sh AUTHENTICATION +Apart from setting the appropriate environment variables and +specifying the user name and password in the URL or the +.Vt struct url , +the calling program has the option of defining an authentication +function with the following prototype: +.Pp +.Ft int +.Fn myAuthMethod "struct url *u" +.Pp +The callback function should fill in the +.Fa user +and +.Fa pwd +fields in the provided +.Vt struct url +and return 0 on success, or any other value to indicate failure. +.Pp +To register the authentication callback, simply set +.Va fetchAuthMethod +to point at it. +The callback will be used whenever a site requires authentication and +the appropriate environment variables are not set. +.Pp +This interface is experimental and may be subject to change. +.Sh RETURN VALUES +.Fn fetchParseURL +returns a pointer to a +.Vt struct url +containing the individual components of the URL. +If it is +unable to allocate memory, or the URL is syntactically incorrect, +.Fn fetchParseURL +returns a +.Dv NULL +pointer. +.Pp +The +.Fn fetchStat +functions return 0 on success and \-1 on failure. +.Pp +All other functions return a stream pointer which may be used to +access the requested document, or +.Dv NULL +if an error occurred. +.Pp +The following error codes are defined in +.In fetch.h : +.Bl -tag -width 18n +.It Bq Er FETCH_ABORT +Operation aborted +.It Bq Er FETCH_AUTH +Authentication failed +.It Bq Er FETCH_DOWN +Service unavailable +.It Bq Er FETCH_EXISTS +File exists +.It Bq Er FETCH_FULL +File system full +.It Bq Er FETCH_INFO +Informational response +.It Bq Er FETCH_MEMORY +Insufficient memory +.It Bq Er FETCH_MOVED +File has moved +.It Bq Er FETCH_NETWORK +Network error +.It Bq Er FETCH_OK +No error +.It Bq Er FETCH_PROTO +Protocol error +.It Bq Er FETCH_RESOLV +Resolver error +.It Bq Er FETCH_SERVER +Server error +.It Bq Er FETCH_TEMP +Temporary error +.It Bq Er FETCH_TIMEOUT +Operation timed out +.It Bq Er FETCH_UNAVAIL +File is not available +.It Bq Er FETCH_UNKNOWN +Unknown error +.It Bq Er FETCH_URL +Invalid URL +.El +.Pp +The accompanying error message includes a protocol-specific error code +and message, e.g.\& "File is not available (404 Not Found)" +.Sh ENVIRONMENT +.Bl -tag -width ".Ev FETCH_BIND_ADDRESS" +.It Ev FETCH_BIND_ADDRESS +Specifies a host name or IP address to which sockets used for outgoing +connections will be bound. +.It Ev FTP_LOGIN +Default FTP login if none was provided in the URL. +.It Ev FTP_PASSIVE_MODE +If set to anything but +.Ql no , +forces the FTP code to use passive mode. +.It Ev FTP_PASSWORD +Default FTP password if the remote server requests one and none was +provided in the URL. +.It Ev FTP_PROXY +URL of the proxy to use for FTP requests. +The document part is ignored. +FTP and HTTP proxies are supported; if no scheme is specified, FTP is +assumed. +If the proxy is an FTP proxy, +.Nm libfetch +will send +.Ql user@host +as user name to the proxy, where +.Ql user +is the real user name, and +.Ql host +is the name of the FTP server. +.Pp +If this variable is set to an empty string, no proxy will be used for +FTP requests, even if the +.Ev HTTP_PROXY +variable is set. +.It Ev ftp_proxy +Same as +.Ev FTP_PROXY , +for compatibility. +.It Ev HTTP_AUTH +Specifies HTTP authorization parameters as a colon-separated list of +items. +The first and second item are the authorization scheme and realm +respectively; further items are scheme-dependent. +Currently, only basic authorization is supported. +.Pp +Basic authorization requires two parameters: the user name and +password, in that order. +.Pp +This variable is only used if the server requires authorization and +no user name or password was specified in the URL. +.It Ev HTTP_PROXY +URL of the proxy to use for HTTP requests. +The document part is ignored. +Only HTTP proxies are supported for HTTP requests. +If no port number is specified, the default is 3128. +.Pp +Note that this proxy will also be used for FTP documents, unless the +.Ev FTP_PROXY +variable is set. +.It Ev http_proxy +Same as +.Ev HTTP_PROXY , +for compatibility. +.It Ev HTTP_PROXY_AUTH +Specifies authorization parameters for the HTTP proxy in the same +format as the +.Ev HTTP_AUTH +variable. +.Pp +This variable is used if and only if connected to an HTTP proxy, and +is ignored if a user and/or a password were specified in the proxy +URL. +.It Ev HTTP_REFERER +Specifies the referrer URL to use for HTTP requests. +If set to +.Dq auto , +the document URL will be used as referrer URL. +.It Ev HTTP_USER_AGENT +Specifies the User-Agent string to use for HTTP requests. +This can be useful when working with HTTP origin or proxy servers that +differentiate between user agents. +.It Ev NETRC +Specifies a file to use instead of +.Pa ~/.netrc +to look up login names and passwords for FTP sites. +See +.Xr ftp 1 +for a description of the file format. +This feature is experimental. +.It Ev NO_PROXY +Either a single asterisk, which disables the use of proxies +altogether, or a comma- or whitespace-separated list of hosts for +which proxies should not be used. +.It Ev no_proxy +Same as +.Ev NO_PROXY , +for compatibility. +.El +.Sh EXAMPLES +To access a proxy server on +.Pa proxy.example.com +port 8080, set the +.Ev HTTP_PROXY +environment variable in a manner similar to this: +.Pp +.Dl HTTP_PROXY=http://proxy.example.com:8080 +.Pp +If the proxy server requires authentication, there are +two options available for passing the authentication data. +The first method is by using the proxy URL: +.Pp +.Dl HTTP_PROXY=http://\*[Lt]user\*[Gt]:\*[Lt]pwd\*[Gt]@proxy.example.com:8080 +.Pp +The second method is by using the +.Ev HTTP_PROXY_AUTH +environment variable: +.Bd -literal -offset indent +HTTP_PROXY=http://proxy.example.com:8080 +HTTP_PROXY_AUTH=basic:*:\*[Lt]user\*[Gt]:\*[Lt]pwd\*[Gt] +.Ed +.Pp +To disable the use of a proxy for an HTTP server running on the local +host, define +.Ev NO_PROXY +as follows: +.Bd -literal -offset indent +NO_PROXY=localhost,127.0.0.1 +.Ed +.Sh SEE ALSO +.\" .Xr fetch 1 , +.\" .Xr ftpio 3 , +.Xr ftp 1 , +.Xr ip 4 +.Rs +.%A J. Postel +.%A J. K. Reynolds +.%D October 1985 +.%B File Transfer Protocol +.%O RFC 959 +.Re +.Rs +.%A P. Deutsch +.%A A. Emtage +.%A A. Marine +.%D May 1994 +.%T How to Use Anonymous FTP +.%O RFC 1635 +.Re +.Rs +.%A T. Berners-Lee +.%A L. Masinter +.%A M. McCahill +.%D December 1994 +.%T Uniform Resource Locators (URL) +.%O RFC 1738 +.Re +.Rs +.%A R. Fielding +.%A J. Gettys +.%A J. Mogul +.%A H. Frystyk +.%A L. Masinter +.%A P. Leach +.%A T. Berners-Lee +.%D January 1999 +.%B Hypertext Transfer Protocol -- HTTP/1.1 +.%O RFC 2616 +.Re +.Rs +.%A J. Franks +.%A P. Hallam-Baker +.%A J. Hostetler +.%A S. Lawrence +.%A P. Leach +.%A A. Luotonen +.%A L. Stewart +.%D June 1999 +.%B HTTP Authentication: Basic and Digest Access Authentication +.%O RFC 2617 +.Re +.Sh HISTORY +The +.Nm fetch +library first appeared in +.Fx 3.0 . +.Sh AUTHORS +.An -nosplit +The +.Nm fetch +library was mostly written by +.An Dag-Erling Sm\(/orgrav Aq Mt des@FreeBSD.org +with numerous suggestions from +.An Jordan K. Hubbard Aq Mt jkh@FreeBSD.org , +.An Eugene Skepner Aq Mt eu@qub.com +and other +.Fx +developers. +It replaces the older +.Nm ftpio +library written by +.An Poul-Henning Kamp Aq Mt phk@FreeBSD.org +and +.An Jordan K. Hubbard Aq Mt jkh@FreeBSD.org . +.Pp +This manual page was written by +.An Dag-Erling Sm\(/orgrav Aq Mt des@FreeBSD.org . +.Sh BUGS +Some parts of the library are not yet implemented. +The most notable +examples of this are +.Fn fetchPutHTTP +and FTP proxy support. +.Pp +There is no way to select a proxy at run-time other than setting the +.Ev HTTP_PROXY +or +.Ev FTP_PROXY +environment variables as appropriate. +.Pp +.Nm libfetch +does not understand or obey 305 (Use Proxy) replies. +.Pp +Error numbers are unique only within a certain context; the error +codes used for FTP and HTTP overlap, as do those used for resolver and +system errors. +For instance, error code 202 means "Command not +implemented, superfluous at this site" in an FTP context and +"Accepted" in an HTTP context. +.Pp +.Fn fetchStatFTP +does not check that the result of an MDTM command is a valid date. +.Pp +The man page is incomplete, poorly written and produces badly +formatted text. +.Pp +The error reporting mechanism is unsatisfactory. +.Pp +Some parts of the code are not fully reentrant. diff --git a/libfetch/fetch.c b/libfetch/fetch.c new file mode 100644 index 0000000..c287097 --- /dev/null +++ b/libfetch/fetch.c @@ -0,0 +1,627 @@ +/* $NetBSD: fetch.c,v 1.19 2009/08/11 20:48:06 joerg Exp $ */ +/*- + * Copyright (c) 1998-2004 Dag-Erling Coïdan Smørgrav + * Copyright (c) 2008 Joerg Sonnenberger <joerg@NetBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD: fetch.c,v 1.41 2007/12/19 00:26:36 des Exp $ + */ + +#if HAVE_CONFIG_H +#include "config.h" +#endif +#ifndef NETBSD +#include <nbcompat.h> +#endif + +#include <ctype.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "fetch.h" +#include "common.h" + +auth_t fetchAuthMethod; +int fetchLastErrCode; +char fetchLastErrString[MAXERRSTRING]; +int fetchTimeout; +volatile int fetchRestartCalls = 1; +int fetchDebug; + + +/*** Local data **************************************************************/ + +/* + * Error messages for parser errors + */ +#define URL_MALFORMED 1 +#define URL_BAD_SCHEME 2 +#define URL_BAD_PORT 3 +static struct fetcherr url_errlist[] = { + { URL_MALFORMED, FETCH_URL, "Malformed URL" }, + { URL_BAD_SCHEME, FETCH_URL, "Invalid URL scheme" }, + { URL_BAD_PORT, FETCH_URL, "Invalid server port" }, + { -1, FETCH_UNKNOWN, "Unknown parser error" } +}; + + +/*** Public API **************************************************************/ + +/* + * Select the appropriate protocol for the URL scheme, and return a + * read-only stream connected to the document referenced by the URL. + * Also fill out the struct url_stat. + */ +fetchIO * +fetchXGet(struct url *URL, struct url_stat *us, const char *flags) +{ + + if (us != NULL) { + us->size = -1; + us->atime = us->mtime = 0; + } + if (strcasecmp(URL->scheme, SCHEME_FILE) == 0) + return (fetchXGetFile(URL, us, flags)); + else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) + return (fetchXGetFTP(URL, us, flags)); + else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0) + return (fetchXGetHTTP(URL, us, flags)); + else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0) + return (fetchXGetHTTP(URL, us, flags)); + url_seterr(URL_BAD_SCHEME); + return (NULL); +} + +/* + * Select the appropriate protocol for the URL scheme, and return a + * read-only stream connected to the document referenced by the URL. + */ +fetchIO * +fetchGet(struct url *URL, const char *flags) +{ + return (fetchXGet(URL, NULL, flags)); +} + +/* + * Select the appropriate protocol for the URL scheme, and return a + * write-only stream connected to the document referenced by the URL. + */ +fetchIO * +fetchPut(struct url *URL, const char *flags) +{ + + if (strcasecmp(URL->scheme, SCHEME_FILE) == 0) + return (fetchPutFile(URL, flags)); + else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) + return (fetchPutFTP(URL, flags)); + else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0) + return (fetchPutHTTP(URL, flags)); + else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0) + return (fetchPutHTTP(URL, flags)); + url_seterr(URL_BAD_SCHEME); + return (NULL); +} + +/* + * Select the appropriate protocol for the URL scheme, and return the + * size of the document referenced by the URL if it exists. + */ +int +fetchStat(struct url *URL, struct url_stat *us, const char *flags) +{ + + if (us != NULL) { + us->size = -1; + us->atime = us->mtime = 0; + } + if (strcasecmp(URL->scheme, SCHEME_FILE) == 0) + return (fetchStatFile(URL, us, flags)); + else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) + return (fetchStatFTP(URL, us, flags)); + else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0) + return (fetchStatHTTP(URL, us, flags)); + else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0) + return (fetchStatHTTP(URL, us, flags)); + url_seterr(URL_BAD_SCHEME); + return (-1); +} + +/* + * Select the appropriate protocol for the URL scheme, and return a + * list of files in the directory pointed to by the URL. + */ +int +fetchList(struct url_list *ue, struct url *URL, const char *pattern, + const char *flags) +{ + + if (strcasecmp(URL->scheme, SCHEME_FILE) == 0) + return (fetchListFile(ue, URL, pattern, flags)); + else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) + return (fetchListFTP(ue, URL, pattern, flags)); + else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0) + return (fetchListHTTP(ue, URL, pattern, flags)); + else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0) + return (fetchListHTTP(ue, URL, pattern, flags)); + url_seterr(URL_BAD_SCHEME); + return -1; +} + +/* + * Attempt to parse the given URL; if successful, call fetchXGet(). + */ +fetchIO * +fetchXGetURL(const char *URL, struct url_stat *us, const char *flags) +{ + struct url *u; + fetchIO *f; + + if ((u = fetchParseURL(URL)) == NULL) + return (NULL); + + f = fetchXGet(u, us, flags); + + fetchFreeURL(u); + return (f); +} + +/* + * Attempt to parse the given URL; if successful, call fetchGet(). + */ +fetchIO * +fetchGetURL(const char *URL, const char *flags) +{ + return (fetchXGetURL(URL, NULL, flags)); +} + +/* + * Attempt to parse the given URL; if successful, call fetchPut(). + */ +fetchIO * +fetchPutURL(const char *URL, const char *flags) +{ + struct url *u; + fetchIO *f; + + if ((u = fetchParseURL(URL)) == NULL) + return (NULL); + + f = fetchPut(u, flags); + + fetchFreeURL(u); + return (f); +} + +/* + * Attempt to parse the given URL; if successful, call fetchStat(). + */ +int +fetchStatURL(const char *URL, struct url_stat *us, const char *flags) +{ + struct url *u; + int s; + + if ((u = fetchParseURL(URL)) == NULL) + return (-1); + + s = fetchStat(u, us, flags); + + fetchFreeURL(u); + return (s); +} + +/* + * Attempt to parse the given URL; if successful, call fetchList(). + */ +int +fetchListURL(struct url_list *ue, const char *URL, const char *pattern, + const char *flags) +{ + struct url *u; + int rv; + + if ((u = fetchParseURL(URL)) == NULL) + return -1; + + rv = fetchList(ue, u, pattern, flags); + + fetchFreeURL(u); + return rv; +} + +/* + * Make a URL + */ +struct url * +fetchMakeURL(const char *scheme, const char *host, int port, const char *doc, + const char *user, const char *pwd) +{ + struct url *u; + + if (!scheme || (!host && !doc)) { + url_seterr(URL_MALFORMED); + return (NULL); + } + + if (port < 0 || port > 65535) { + url_seterr(URL_BAD_PORT); + return (NULL); + } + + /* allocate struct url */ + if ((u = calloc(1, sizeof(*u))) == NULL) { + fetch_syserr(); + return (NULL); + } + + if ((u->doc = strdup(doc ? doc : "/")) == NULL) { + fetch_syserr(); + free(u); + return (NULL); + } + +#define seturl(x) snprintf(u->x, sizeof(u->x), "%s", x) + seturl(scheme); + seturl(host); + seturl(user); + seturl(pwd); +#undef seturl + u->port = port; + + return (u); +} + +int +fetch_urlpath_safe(char x) +{ + if ((x >= '0' && x <= '9') || (x >= 'A' && x <= 'Z') || + (x >= 'a' && x <= 'z')) + return 1; + + switch (x) { + case '$': + case '-': + case '_': + case '.': + case '+': + case '!': + case '*': + case '\'': + case '(': + case ')': + case ',': + /* The following are allowed in segment and path components: */ + case '?': + case ':': + case '@': + case '&': + case '=': + case '/': + case ';': + /* If something is already quoted... */ + case '%': + return 1; + default: + return 0; + } +} + +/* + * Copy an existing URL. + */ +struct url * +fetchCopyURL(const struct url *src) +{ + struct url *dst; + char *doc; + + /* allocate struct url */ + if ((dst = malloc(sizeof(*dst))) == NULL) { + fetch_syserr(); + return (NULL); + } + if ((doc = strdup(src->doc)) == NULL) { + fetch_syserr(); + free(dst); + return (NULL); + } + *dst = *src; + dst->doc = doc; + + return dst; +} + +/* + * Split an URL into components. URL syntax is: + * [method:/][/[user[:pwd]@]host[:port]/][document] + * This almost, but not quite, RFC1738 URL syntax. + */ +struct url * +fetchParseURL(const char *URL) +{ + const char *p, *q; + struct url *u; + size_t i, count; + int pre_quoted; + + /* allocate struct url */ + if ((u = calloc(1, sizeof(*u))) == NULL) { + fetch_syserr(); + return (NULL); + } + + if (*URL == '/') { + pre_quoted = 0; + strcpy(u->scheme, SCHEME_FILE); + p = URL; + goto quote_doc; + } + if (strncmp(URL, "file:", 5) == 0) { + pre_quoted = 1; + strcpy(u->scheme, SCHEME_FILE); + URL += 5; + if (URL[0] != '/' || URL[1] != '/' || URL[2] != '/') { + url_seterr(URL_MALFORMED); + goto ouch; + } + p = URL + 2; + goto quote_doc; + } + if (strncmp(URL, "http:", 5) == 0 || + strncmp(URL, "https:", 6) == 0) { + pre_quoted = 1; + if (URL[4] == ':') { + strcpy(u->scheme, SCHEME_HTTP); + URL += 5; + } else { + strcpy(u->scheme, SCHEME_HTTPS); + URL += 6; + } + + if (URL[0] != '/' || URL[1] != '/') { + url_seterr(URL_MALFORMED); + goto ouch; + } + URL += 2; + p = URL; + goto find_user; + } + if (strncmp(URL, "ftp:", 4) == 0) { + pre_quoted = 1; + strcpy(u->scheme, SCHEME_FTP); + URL += 4; + if (URL[0] != '/' || URL[1] != '/') { + url_seterr(URL_MALFORMED); + goto ouch; + } + URL += 2; + p = URL; + goto find_user; + } + + url_seterr(URL_BAD_SCHEME); + goto ouch; + +find_user: + p = strpbrk(URL, "/@"); + if (p != NULL && *p == '@') { + /* username */ + for (q = URL, i = 0; (*q != ':') && (*q != '@'); q++) { + if (i < URL_USERLEN) + u->user[i++] = *q; + } + + /* password */ + if (*q == ':') { + for (q++, i = 0; (*q != '@'); q++) + if (i < URL_PWDLEN) + u->pwd[i++] = *q; + } + + p++; + } else { + p = URL; + } + + /* hostname */ +#ifdef INET6 + if (*p == '[' && (q = strchr(p + 1, ']')) != NULL && + (*++q == '\0' || *q == '/' || *q == ':')) { + if ((i = q - p - 2) > URL_HOSTLEN) + i = URL_HOSTLEN; + strncpy(u->host, ++p, i); + p = q; + } else +#endif + for (i = 0; *p && (*p != '/') && (*p != ':'); p++) + if (i < URL_HOSTLEN) + u->host[i++] = *p; + + /* port */ + if (*p == ':') { + for (q = ++p; *q && (*q != '/'); q++) + if (isdigit((unsigned char)*q)) + u->port = u->port * 10 + (*q - '0'); + else { + /* invalid port */ + url_seterr(URL_BAD_PORT); + goto ouch; + } + p = q; + } + + /* document */ + if (!*p) + p = "/"; + +quote_doc: + count = 1; + for (i = 0; p[i] != '\0'; ++i) { + if ((!pre_quoted && p[i] == '%') || + !fetch_urlpath_safe(p[i])) + count += 3; + else + ++count; + } + + if ((u->doc = malloc(count)) == NULL) { + fetch_syserr(); + goto ouch; + } + for (i = 0; *p != '\0'; ++p) { + if ((!pre_quoted && *p == '%') || + !fetch_urlpath_safe(*p)) { + u->doc[i++] = '%'; + if ((unsigned char)*p < 160) + u->doc[i++] = '0' + ((unsigned char)*p) / 16; + else + u->doc[i++] = 'a' - 10 + ((unsigned char)*p) / 16; + if ((unsigned char)*p % 16 < 10) + u->doc[i++] = '0' + ((unsigned char)*p) % 16; + else + u->doc[i++] = 'a' - 10 + ((unsigned char)*p) % 16; + } else + u->doc[i++] = *p; + } + u->doc[i] = '\0'; + + return (u); + +ouch: + free(u); + return (NULL); +} + +/* + * Free a URL + */ +void +fetchFreeURL(struct url *u) +{ + free(u->doc); + free(u); +} + +static char +xdigit2digit(char digit) +{ + digit = tolower((unsigned char)digit); + if (digit >= 'a' && digit <= 'f') + digit = digit - 'a' + 10; + else + digit = digit - '0'; + + return digit; +} + +/* + * Unquote whole URL. + * Skips optional parts like query or fragment identifier. + */ +char * +fetchUnquotePath(struct url *url) +{ + char *unquoted; + const char *iter; + size_t i; + + if ((unquoted = malloc(strlen(url->doc) + 1)) == NULL) + return NULL; + + for (i = 0, iter = url->doc; *iter != '\0'; ++iter) { + if (*iter == '#' || *iter == '?') + break; + if (iter[0] != '%' || + !isxdigit((unsigned char)iter[1]) || + !isxdigit((unsigned char)iter[2])) { + unquoted[i++] = *iter; + continue; + } + unquoted[i++] = xdigit2digit(iter[1]) * 16 + + xdigit2digit(iter[2]); + iter += 2; + } + unquoted[i] = '\0'; + return unquoted; +} + + +/* + * Extract the file name component of a URL. + */ +char * +fetchUnquoteFilename(struct url *url) +{ + char *unquoted, *filename; + const char *last_slash; + + if ((unquoted = fetchUnquotePath(url)) == NULL) + return NULL; + + if ((last_slash = strrchr(unquoted, '/')) == NULL) + return unquoted; + filename = strdup(last_slash + 1); + free(unquoted); + return filename; +} + +char * +fetchStringifyURL(const struct url *url) +{ + size_t total; + char *doc; + + /* scheme :// user : pwd @ host :port doc */ + total = strlen(url->scheme) + 3 + strlen(url->user) + 1 + + strlen(url->pwd) + 1 + strlen(url->host) + 6 + strlen(url->doc) + 1; + if ((doc = malloc(total)) == NULL) + return NULL; + if (url->port != 0) + snprintf(doc, total, "%s%s%s%s%s%s%s:%d%s", + url->scheme, + url->scheme[0] != '\0' ? "://" : "", + url->user, + url->pwd[0] != '\0' ? ":" : "", + url->pwd, + url->user[0] != '\0' || url->pwd[0] != '\0' ? "@" : "", + url->host, + (int)url->port, + url->doc); + else { + snprintf(doc, total, "%s%s%s%s%s%s%s%s", + url->scheme, + url->scheme[0] != '\0' ? "://" : "", + url->user, + url->pwd[0] != '\0' ? ":" : "", + url->pwd, + url->user[0] != '\0' || url->pwd[0] != '\0' ? "@" : "", + url->host, + url->doc); + } + return doc; +} diff --git a/libfetch/fetch.cat3 b/libfetch/fetch.cat3 new file mode 100644 index 0000000..f112d6a --- /dev/null +++ b/libfetch/fetch.cat3 @@ -0,0 +1,526 @@ +FETCH(3) NetBSD Library Functions Manual FETCH(3) + +NNAAMMEE + ffeettcchhMMaakkeeUURRLL, ffeettcchhPPaarrsseeUURRLL, ffeettcchhCCooppyyUURRLL, ffeettcchhFFrreeeeUURRLL, ffeettcchhXXGGeettUURRLL, + ffeettcchhGGeettUURRLL, ffeettcchhPPuuttUURRLL, ffeettcchhSSttaattUURRLL, ffeettcchhLLiissttUURRLL, ffeettcchhXXGGeett, + ffeettcchhGGeett, ffeettcchhPPuutt, ffeettcchhSSttaatt, ffeettcchhLLiisstt, ffeettcchhXXGGeettFFiillee, ffeettcchhGGeettFFiillee, + ffeettcchhPPuuttFFiillee, ffeettcchhSSttaattFFiillee, ffeettcchhLLiissttFFiillee, ffeettcchhXXGGeettHHTTTTPP, ffeettcchhGGeettHHTTTTPP, + ffeettcchhPPuuttHHTTTTPP, ffeettcchhSSttaattHHTTTTPP, ffeettcchhLLiissttHHTTTTPP, ffeettcchhXXGGeettFFTTPP, ffeettcchhGGeettFFTTPP, + ffeettcchhPPuuttFFTTPP, ffeettcchhSSttaattFFTTPP, ffeettcchhLLiissttFFTTPP ffeettcchhIInniittUURRLLLLiisstt, + ffeettcchhFFrreeeeUURRLLLLiisstt, ffeettcchhUUnnqquuootteePPaatthh, ffeettcchhUUnnqquuootteeFFiilleennaammee, + ffeettcchhSSttrriinnggiiffyyUURRLL, ffeettcchhCCoonnnneeccttiioonnCCaacchheeIInniitt, ffeettcchhCCoonnnneeccttiioonnCCaacchheeCClloossee, + ffeettcchh -- file transfer functions + +LLIIBBRRAARRYY + File Transfer Library for URLs (libfetch, -lfetch) + +SSYYNNOOPPSSIISS + ##iinncclluuddee <<ssttddiioo..hh>> + ##iinncclluuddee <<ffeettcchh..hh>> + + _s_t_r_u_c_t _u_r_l _* + ffeettcchhMMaakkeeUURRLL(_c_o_n_s_t _c_h_a_r _*_s_c_h_e_m_e, _c_o_n_s_t _c_h_a_r _*_h_o_s_t, _i_n_t _p_o_r_t, + _c_o_n_s_t _c_h_a_r _*_d_o_c, _c_o_n_s_t _c_h_a_r _*_u_s_e_r, _c_o_n_s_t _c_h_a_r _*_p_w_d); + + _s_t_r_u_c_t _u_r_l _* + ffeettcchhPPaarrsseeUURRLL(_c_o_n_s_t _c_h_a_r _*_U_R_L); + + _s_t_r_u_c_t _u_r_l _* + ffeettcchhCCooppyyUURRLL(_c_o_n_s_t _s_t_r_u_c_t _u_r_l _*_u); + + _v_o_i_d + ffeettcchhFFrreeeeUURRLL(_s_t_r_u_c_t _u_r_l _*_u); + + _f_e_t_c_h_I_O _* + ffeettcchhXXGGeettUURRLL(_c_o_n_s_t _c_h_a_r _*_U_R_L, _s_t_r_u_c_t _u_r_l___s_t_a_t _*_u_s, _c_o_n_s_t _c_h_a_r _*_f_l_a_g_s); + + _f_e_t_c_h_I_O _* + ffeettcchhGGeettUURRLL(_c_o_n_s_t _c_h_a_r _*_U_R_L, _c_o_n_s_t _c_h_a_r _*_f_l_a_g_s); + + _f_e_t_c_h_I_O _* + ffeettcchhPPuuttUURRLL(_c_o_n_s_t _c_h_a_r _*_U_R_L, _c_o_n_s_t _c_h_a_r _*_f_l_a_g_s); + + _i_n_t + ffeettcchhSSttaattUURRLL(_c_o_n_s_t _c_h_a_r _*_U_R_L, _s_t_r_u_c_t _u_r_l___s_t_a_t _*_u_s, _c_o_n_s_t _c_h_a_r _*_f_l_a_g_s); + + _i_n_t + ffeettcchhLLiissttUURRLL(_s_t_r_u_c_t _u_r_l___l_i_s_t _*_l_i_s_t, _c_o_n_s_t _c_h_a_r _*_U_R_L, _c_o_n_s_t _c_h_a_r _*_f_l_a_g_s); + + _f_e_t_c_h_I_O _* + ffeettcchhXXGGeett(_s_t_r_u_c_t _u_r_l _*_u, _s_t_r_u_c_t _u_r_l___s_t_a_t _*_u_s, _c_o_n_s_t _c_h_a_r _*_f_l_a_g_s); + + _f_e_t_c_h_I_O _* + ffeettcchhGGeett(_s_t_r_u_c_t _u_r_l _*_u, _c_o_n_s_t _c_h_a_r _*_f_l_a_g_s); + + _f_e_t_c_h_I_O _* + ffeettcchhPPuutt(_s_t_r_u_c_t _u_r_l _*_u, _c_o_n_s_t _c_h_a_r _*_f_l_a_g_s); + + _i_n_t + ffeettcchhSSttaatt(_s_t_r_u_c_t _u_r_l _*_u, _s_t_r_u_c_t _u_r_l___s_t_a_t _*_u_s, _c_o_n_s_t _c_h_a_r _*_f_l_a_g_s); + + _i_n_t + ffeettcchhLLiisstt(_s_t_r_u_c_t _u_r_l___l_i_s_t _*_l_i_s_t, _s_t_r_u_c_t _u_r_l _*_u, _c_o_n_s_t _c_h_a_r _*_f_l_a_g_s); + + _f_e_t_c_h_I_O _* + ffeettcchhXXGGeettFFiillee(_s_t_r_u_c_t _u_r_l _*_u, _s_t_r_u_c_t _u_r_l___s_t_a_t _*_u_s, _c_o_n_s_t _c_h_a_r _*_f_l_a_g_s); + + _f_e_t_c_h_I_O _* + ffeettcchhGGeettFFiillee(_s_t_r_u_c_t _u_r_l _*_u, _c_o_n_s_t _c_h_a_r _*_f_l_a_g_s); + + _f_e_t_c_h_I_O _* + ffeettcchhPPuuttFFiillee(_s_t_r_u_c_t _u_r_l _*_u, _c_o_n_s_t _c_h_a_r _*_f_l_a_g_s); + + _i_n_t + ffeettcchhSSttaattFFiillee(_s_t_r_u_c_t _u_r_l _*_u, _s_t_r_u_c_t _u_r_l___s_t_a_t _*_u_s, _c_o_n_s_t _c_h_a_r _*_f_l_a_g_s); + + _i_n_t + ffeettcchhLLiissttFFiillee(_s_t_r_u_c_t _u_r_l___l_i_s_t _*_l_i_s_t, _s_t_r_u_c_t _u_r_l _*_u, _c_o_n_s_t _c_h_a_r _*_f_l_a_g_s); + + _f_e_t_c_h_I_O _* + ffeettcchhXXGGeettHHTTTTPP(_s_t_r_u_c_t _u_r_l _*_u, _s_t_r_u_c_t _u_r_l___s_t_a_t _*_u_s, _c_o_n_s_t _c_h_a_r _*_f_l_a_g_s); + + _f_e_t_c_h_I_O _* + ffeettcchhGGeettHHTTTTPP(_s_t_r_u_c_t _u_r_l _*_u, _c_o_n_s_t _c_h_a_r _*_f_l_a_g_s); + + _f_e_t_c_h_I_O _* + ffeettcchhPPuuttHHTTTTPP(_s_t_r_u_c_t _u_r_l _*_u, _c_o_n_s_t _c_h_a_r _*_f_l_a_g_s); + + _i_n_t + ffeettcchhSSttaattHHTTTTPP(_s_t_r_u_c_t _u_r_l _*_u, _s_t_r_u_c_t _u_r_l___s_t_a_t _*_u_s, _c_o_n_s_t _c_h_a_r _*_f_l_a_g_s); + + _i_n_t + ffeettcchhLLiissttHHTTTTPP(_s_t_r_u_c_t _u_r_l___l_i_s_t _*_l_i_s_t, _s_t_r_u_c_t _u_r_l _*_u, _c_o_n_s_t _c_h_a_r _*_f_l_a_g_s); + + _f_e_t_c_h_I_O _* + ffeettcchhXXGGeettFFTTPP(_s_t_r_u_c_t _u_r_l _*_u, _s_t_r_u_c_t _u_r_l___s_t_a_t _*_u_s, _c_o_n_s_t _c_h_a_r _*_f_l_a_g_s); + + _f_e_t_c_h_I_O _* + ffeettcchhGGeettFFTTPP(_s_t_r_u_c_t _u_r_l _*_u, _c_o_n_s_t _c_h_a_r _*_f_l_a_g_s); + + _f_e_t_c_h_I_O _* + ffeettcchhPPuuttFFTTPP(_s_t_r_u_c_t _u_r_l _*_u, _c_o_n_s_t _c_h_a_r _*_f_l_a_g_s); + + _i_n_t + ffeettcchhSSttaattFFTTPP(_s_t_r_u_c_t _u_r_l _*_u, _s_t_r_u_c_t _u_r_l___s_t_a_t _*_u_s, _c_o_n_s_t _c_h_a_r _*_f_l_a_g_s); + + _i_n_t + ffeettcchhLLiissttFFTTPP(_s_t_r_u_c_t _u_r_l___l_i_s_t _*_l_i_s_t, _s_t_r_u_c_t _u_r_l _*_u, _c_o_n_s_t _c_h_a_r _*_f_l_a_g_s); + + _v_o_i_d + ffeettcchhIInniittUURRLLLLiisstt(_s_t_r_u_c_t _u_r_l___l_i_s_t _*_u_l); + + _i_n_t + ffeettcchhAAppppeennddUURRLLLLiisstt(_s_t_r_u_c_t _u_r_l___l_i_s_t _*_d_s_t, _c_o_n_s_t _s_t_r_u_c_t _u_r_l___l_i_s_t _*_s_r_c); + + _v_o_i_d + ffeettcchhFFrreeeeUURRLLLLiisstt(_s_t_r_u_c_t _u_r_l___l_i_s_t _*_u_l); + + _c_h_a_r _* + ffeettcchhUUnnqquuootteePPaatthh(_s_t_r_u_c_t _u_r_l _*_u); + + _c_h_a_r _* + ffeettcchhUUnnqquuootteeFFiilleennaammee(_s_t_r_u_c_t _u_r_l _*_u); + + _c_h_a_r _* + ffeettcchhSSttrriinnggiiffyyUURRLL(_c_o_n_s_t _s_t_r_u_c_t _u_r_l _*_u); + + _v_o_i_d + ffeettcchhCCoonnnneeccttiioonnCCaacchheeIInniitt(_i_n_t _g_l_o_b_a_l, _i_n_t _p_e_r___h_o_s_t); + + _v_o_i_d + ffeettcchhCCoonnnneeccttiioonnCCaacchheeCClloossee(_v_o_i_d); + +DDEESSCCRRIIPPTTIIOONN + These functions implement a high-level library for retrieving and upload- + ing files using Uniform Resource Locators (URLs). + + ffeettcchhPPaarrsseeUURRLL() takes a URL in the form of a null-terminated string and + splits it into its components function according to the Common Internet + Scheme Syntax detailed in RFC 1738. A regular expression which produces + this syntax is: + + <scheme>:(//(<user>(:<pwd>)?@)?<host>(:<port>)?)?/(<document>)? + + If the URL does not seem to begin with a scheme name, it is assumed to be + a local path. Only absolute path names are accepted. + + Note that some components of the URL are not necessarily relevant to all + URL schemes. For instance, the file scheme only needs the <scheme> and + <document> components. ffeettcchhPPaarrsseeUURRLL() quotes any unsafe character in + the URL automatically. This is not done by ffeettcchhMMaakkeeUURRLL(). + ffeettcchhCCooppyyUURRLL() copies an existing _u_r_l structure. + + ffeettcchhMMaakkeeUURRLL(), ffeettcchhPPaarrsseeUURRLL(), and ffeettcchhCCooppyyUURRLL() return a pointer to a + _u_r_l structure, which is defined as follows in <_f_e_t_c_h_._h>: + + #define URL_SCHEMELEN 16 + #define URL_USERLEN 256 + #define URL_PWDLEN 256 + #define URL_HOSTLEN 255 + + struct url { + char scheme[URL_SCHEMELEN + 1]; + char user[URL_USERLEN + 1]; + char pwd[URL_PWDLEN + 1]; + char host[URL_HOSTLEN + 1]; + int port; + char *doc; + off_t offset; + size_t length; + time_t last_modified; + }; + + The pointer returned by ffeettcchhMMaakkeeUURRLL(), ffeettcchhCCooppyyUURRLL(), and + ffeettcchhPPaarrsseeUURRLL() should be freed using ffeettcchhFFrreeeeUURRLL(). The size of _s_t_r_u_c_t + _U_R_L is not part of the ABI. + + ffeettcchhXXGGeettUURRLL(), ffeettcchhGGeettUURRLL(), and ffeettcchhPPuuttUURRLL() constitute the recom- + mended interface to the ffeettcchh library. They examine the URL passed to + them to determine the transfer method, and call the appropriate lower- + level functions to perform the actual transfer. ffeettcchhXXGGeettUURRLL() also + returns the remote document's metadata in the _u_r_l___s_t_a_t structure pointed + to by the _u_s argument. + + The _f_l_a_g_s argument is a string of characters which specify transfer + options. The meaning of the individual flags is scheme-dependent, and is + detailed in the appropriate section below. + + ffeettcchhSSttaattUURRLL() attempts to obtain the requested document's metadata and + fill in the structure pointed to by its second argument. The _u_r_l___s_t_a_t + structure is defined as follows in <_f_e_t_c_h_._h>: + + struct url_stat { + off_t size; + time_t atime; + time_t mtime; + }; + + If the size could not be obtained from the server, the _s_i_z_e field is set + to -1. If the modification time could not be obtained from the server, + the _m_t_i_m_e field is set to the epoch. If the access time could not be + obtained from the server, the _a_t_i_m_e field is set to the modification + time. + + ffeettcchhLLiissttUURRLL() attempts to list the contents of the directory pointed to + by the URL provided. The pattern can be a simple glob-like expression as + hint. Callers should not depend on the server to filter names. If suc- + cessful, it appends the list of entries to the _u_r_l___l_i_s_t structure. The + _u_r_l___l_i_s_t structure is defined as follows in <_f_e_t_c_h_._h>: + + struct url_list { + size_t length; + size_t alloc_size; + struct url *urls; + }; + + The list should be initialized by calling ffeettcchhIInniittUURRLLLLiisstt() and the + entries be freed by calling ffeettcchhFFrreeeeUURRLLLLiisstt(). The function + ffeettcchhAAppppeennddUURRLLLLiisstt() can be used to append one URL lists to another. If + the `c' (cache result) flag is specified, the library is allowed to + internally cache the result. + + ffeettcchhSSttrriinnggiiffyyUURRLL() returns the URL as string. ffeettcchhUUnnqquuootteePPaatthh() + returns the path name part of the URL with any quoting undone. Query + arguments and fragment identifiers are not included. + ffeettcchhUUnnqquuootteeFFiilleennaammee() returns the last component of the path name as + returned by ffeettcchhUUnnqquuootteePPaatthh(). ffeettcchhSSttrriinnggiiffyyUURRLL(), ffeettcchhUUnnqquuootteePPaatthh(), + and ffeettcchhUUnnqquuootteeFFiilleennaammee() return a string that should be deallocated + with ffrreeee() after use. + + ffeettcchhCCoonnnneeccttiioonnCCaacchheeIInniitt() enables the connection cache. The first argu- + ment specifies the global limit on cached connections. The second argu- + ment specifies the host limit. Entries are considered to specify the + same host, if the host name from the URL is identical, indepent of the + address or address family. ffeettcchhCCoonnnneeccttiioonnCCaacchheeCClloossee() flushed the con- + nection cache and closes all cached connections. + + ffeettcchhXXGGeett(), ffeettcchhGGeett(), ffeettcchhPPuutt(), and ffeettcchhSSttaatt() are similar to + ffeettcchhXXGGeettUURRLL(), ffeettcchhGGeettUURRLL(), ffeettcchhPPuuttUURRLL(), and ffeettcchhSSttaattUURRLL(), except + that they expect a pre-parsed URL in the form of a pointer to a _s_t_r_u_c_t + _u_r_l rather than a string. + + All of the ffeettcchhXXGGeettXXXXXX(), ffeettcchhGGeettXXXXXX(), and ffeettcchhPPuuttXXXXXX() functions + return a pointer to a stream which can be used to read or write data from + or to the requested document, respectively. Note that although the + implementation details of the individual access methods vary, it can gen- + erally be assumed that a stream returned by one of the ffeettcchhXXGGeettXXXXXX() or + ffeettcchhGGeettXXXXXX() functions is read-only, and that a stream returned by one + of the ffeettcchhPPuuttXXXXXX() functions is write-only. + +PPRROOTTOOCCOOLL IINNDDEEPPEENNDDEENNTT FFLLAAGGSS + If the `i' (if-modified-since) flag is specified, the library will try to + fetch the content only if it is newer than _l_a_s_t___m_o_d_i_f_i_e_d. For HTTP an + If-Modified-Since HTTP header is sent. For FTP a MTDM command is sent + first and compared locally. For FILE the source file is compared. + +FFIILLEE SSCCHHEEMMEE + ffeettcchhXXGGeettFFiillee(), ffeettcchhGGeettFFiillee(), and ffeettcchhPPuuttFFiillee() provide access to + documents which are files in a locally mounted file system. Only the + <document> component of the URL is used. + + ffeettcchhXXGGeettFFiillee() and ffeettcchhGGeettFFiillee() do not accept any flags. + + ffeettcchhPPuuttFFiillee() accepts the `a' (append to file) flag. If that flag is + specified, the data written to the stream returned by ffeettcchhPPuuttFFiillee() will + be appended to the previous contents of the file, instead of replacing + them. + +FFTTPP SSCCHHEEMMEE + ffeettcchhXXGGeettFFTTPP(), ffeettcchhGGeettFFTTPP(), and ffeettcchhPPuuttFFTTPP() implement the FTP proto- + col as described in RFC 959. + + By default lliibbffeettcchh will attempt to use passive mode first and only fall- + back to active mode if the server reports a syntax error. If the `a' + (active) flag is specified, a passive connection is not tried and active + mode is used directly. + + If the `l' (low) flag is specified, data sockets will be allocated in the + low (or default) port range instead of the high port range (see ip(4)). + + If the `d' (direct) flag is specified, ffeettcchhXXGGeettFFTTPP(), ffeettcchhGGeettFFTTPP(), and + ffeettcchhPPuuttFFTTPP() will use a direct connection even if a proxy server is + defined. + + If no user name or password is given, the ffeettcchh library will attempt an + anonymous login, with user name "anonymous" and password "anony- + mous@<hostname>". + +HHTTTTPP SSCCHHEEMMEE + The ffeettcchhXXGGeettHHTTTTPP(), ffeettcchhGGeettHHTTTTPP(), and ffeettcchhPPuuttHHTTTTPP() functions imple- + ment the HTTP/1.1 protocol. With a little luck, there is even a chance + that they comply with RFC 2616 and RFC 2617. + + If the `d' (direct) flag is specified, ffeettcchhXXGGeettHHTTTTPP(), ffeettcchhGGeettHHTTTTPP(), + and ffeettcchhPPuuttHHTTTTPP() will use a direct connection even if a proxy server is + defined. + + Since there seems to be no good way of implementing the HTTP PUT method + in a manner consistent with the rest of the ffeettcchh library, ffeettcchhPPuuttHHTTTTPP() + is currently unimplemented. + +AAUUTTHHEENNTTIICCAATTIIOONN + Apart from setting the appropriate environment variables and specifying + the user name and password in the URL or the _s_t_r_u_c_t _u_r_l, the calling pro- + gram has the option of defining an authentication function with the fol- + lowing prototype: + + _i_n_t mmyyAAuutthhMMeetthhoodd(_s_t_r_u_c_t _u_r_l _*_u) + + The callback function should fill in the _u_s_e_r and _p_w_d fields in the pro- + vided _s_t_r_u_c_t _u_r_l and return 0 on success, or any other value to indicate + failure. + + To register the authentication callback, simply set _f_e_t_c_h_A_u_t_h_M_e_t_h_o_d to + point at it. The callback will be used whenever a site requires authen- + tication and the appropriate environment variables are not set. + + This interface is experimental and may be subject to change. + +RREETTUURRNN VVAALLUUEESS + ffeettcchhPPaarrsseeUURRLL() returns a pointer to a _s_t_r_u_c_t _u_r_l containing the individ- + ual components of the URL. If it is unable to allocate memory, or the + URL is syntactically incorrect, ffeettcchhPPaarrsseeUURRLL() returns a NULL pointer. + + The ffeettcchhSSttaatt() functions return 0 on success and -1 on failure. + + All other functions return a stream pointer which may be used to access + the requested document, or NULL if an error occurred. + + The following error codes are defined in <_f_e_t_c_h_._h>: + + [FETCH_ABORT] Operation aborted + + [FETCH_AUTH] Authentication failed + + [FETCH_DOWN] Service unavailable + + [FETCH_EXISTS] File exists + + [FETCH_FULL] File system full + + [FETCH_INFO] Informational response + + [FETCH_MEMORY] Insufficient memory + + [FETCH_MOVED] File has moved + + [FETCH_NETWORK] Network error + + [FETCH_OK] No error + + [FETCH_PROTO] Protocol error + + [FETCH_RESOLV] Resolver error + + [FETCH_SERVER] Server error + + [FETCH_TEMP] Temporary error + + [FETCH_TIMEOUT] Operation timed out + + [FETCH_UNAVAIL] File is not available + + [FETCH_UNKNOWN] Unknown error + + [FETCH_URL] Invalid URL + + The accompanying error message includes a protocol-specific error code + and message, e.g. "File is not available (404 Not Found)" + +EENNVVIIRROONNMMEENNTT + FETCH_BIND_ADDRESS Specifies a host name or IP address to which sockets + used for outgoing connections will be bound. + + FTP_LOGIN Default FTP login if none was provided in the URL. + + FTP_PASSIVE_MODE If set to anything but `no', forces the FTP code to + use passive mode. + + FTP_PASSWORD Default FTP password if the remote server requests + one and none was provided in the URL. + + FTP_PROXY URL of the proxy to use for FTP requests. The docu- + ment part is ignored. FTP and HTTP proxies are sup- + ported; if no scheme is specified, FTP is assumed. + If the proxy is an FTP proxy, lliibbffeettcchh will send + `user@host' as user name to the proxy, where `user' + is the real user name, and `host' is the name of the + FTP server. + + If this variable is set to an empty string, no proxy + will be used for FTP requests, even if the HTTP_PROXY + variable is set. + + ftp_proxy Same as FTP_PROXY, for compatibility. + + HTTP_AUTH Specifies HTTP authorization parameters as a colon- + separated list of items. The first and second item + are the authorization scheme and realm respectively; + further items are scheme-dependent. Currently, only + basic authorization is supported. + + Basic authorization requires two parameters: the user + name and password, in that order. + + This variable is only used if the server requires + authorization and no user name or password was speci- + fied in the URL. + + HTTP_PROXY URL of the proxy to use for HTTP requests. The docu- + ment part is ignored. Only HTTP proxies are sup- + ported for HTTP requests. If no port number is spec- + ified, the default is 3128. + + Note that this proxy will also be used for FTP docu- + ments, unless the FTP_PROXY variable is set. + + http_proxy Same as HTTP_PROXY, for compatibility. + + HTTP_PROXY_AUTH Specifies authorization parameters for the HTTP proxy + in the same format as the HTTP_AUTH variable. + + This variable is used if and only if connected to an + HTTP proxy, and is ignored if a user and/or a pass- + word were specified in the proxy URL. + + HTTP_REFERER Specifies the referrer URL to use for HTTP requests. + If set to ``auto'', the document URL will be used as + referrer URL. + + HTTP_USER_AGENT Specifies the User-Agent string to use for HTTP + requests. This can be useful when working with HTTP + origin or proxy servers that differentiate between + user agents. + + NETRC Specifies a file to use instead of _~_/_._n_e_t_r_c to look + up login names and passwords for FTP sites. See + ftp(1) for a description of the file format. This + feature is experimental. + + NO_PROXY Either a single asterisk, which disables the use of + proxies altogether, or a comma- or whitespace-sepa- + rated list of hosts for which proxies should not be + used. + + no_proxy Same as NO_PROXY, for compatibility. + +EEXXAAMMPPLLEESS + To access a proxy server on _p_r_o_x_y_._e_x_a_m_p_l_e_._c_o_m port 8080, set the + HTTP_PROXY environment variable in a manner similar to this: + + HTTP_PROXY=http://proxy.example.com:8080 + + If the proxy server requires authentication, there are two options avail- + able for passing the authentication data. The first method is by using + the proxy URL: + + HTTP_PROXY=http://<user>:<pwd>@proxy.example.com:8080 + + The second method is by using the HTTP_PROXY_AUTH environment variable: + + HTTP_PROXY=http://proxy.example.com:8080 + HTTP_PROXY_AUTH=basic:*:<user>:<pwd> + + To disable the use of a proxy for an HTTP server running on the local + host, define NO_PROXY as follows: + + NO_PROXY=localhost,127.0.0.1 + +SSEEEE AALLSSOO + ftp(1), ip(4) + + J. Postel and J. K. Reynolds, _F_i_l_e _T_r_a_n_s_f_e_r _P_r_o_t_o_c_o_l, October 1985, RFC + 959. + + P. Deutsch, A. Emtage, and A. Marine, _H_o_w _t_o _U_s_e _A_n_o_n_y_m_o_u_s _F_T_P, May 1994, + RFC 1635. + + T. Berners-Lee, L. Masinter, and M. McCahill, _U_n_i_f_o_r_m _R_e_s_o_u_r_c_e _L_o_c_a_t_o_r_s + _(_U_R_L_), December 1994, RFC 1738. + + R. Fielding, J. Gettys, J. Mogul, H. Frystyk, L. Masinter, P. Leach, and + T. Berners-Lee, _H_y_p_e_r_t_e_x_t _T_r_a_n_s_f_e_r _P_r_o_t_o_c_o_l _-_- _H_T_T_P_/_1_._1, January 1999, + RFC 2616. + + J. Franks, P. Hallam-Baker, J. Hostetler, S. Lawrence, P. Leach, A. + Luotonen, and L. Stewart, _H_T_T_P _A_u_t_h_e_n_t_i_c_a_t_i_o_n_: _B_a_s_i_c _a_n_d _D_i_g_e_s_t _A_c_c_e_s_s + _A_u_t_h_e_n_t_i_c_a_t_i_o_n, June 1999, RFC 2617. + +HHIISSTTOORRYY + The ffeettcchh library first appeared in FreeBSD 3.0. + +AAUUTTHHOORRSS + The ffeettcchh library was mostly written by Dag-Erling Smørgrav + <des@FreeBSD.org> with numerous suggestions from Jordan K. Hubbard + <jkh@FreeBSD.org>, Eugene Skepner <eu@qub.com> and other FreeBSD develop- + ers. It replaces the older ffttppiioo library written by Poul-Henning Kamp + <phk@FreeBSD.org> and Jordan K. Hubbard <jkh@FreeBSD.org>. + + This manual page was written by Dag-Erling Smørgrav <des@FreeBSD.org>. + +BBUUGGSS + Some parts of the library are not yet implemented. The most notable + examples of this are ffeettcchhPPuuttHHTTTTPP() and FTP proxy support. + + There is no way to select a proxy at run-time other than setting the + HTTP_PROXY or FTP_PROXY environment variables as appropriate. + + lliibbffeettcchh does not understand or obey 305 (Use Proxy) replies. + + Error numbers are unique only within a certain context; the error codes + used for FTP and HTTP overlap, as do those used for resolver and system + errors. For instance, error code 202 means "Command not implemented, + superfluous at this site" in an FTP context and "Accepted" in an HTTP + context. + + ffeettcchhSSttaattFFTTPP() does not check that the result of an MDTM command is a + valid date. + + The man page is incomplete, poorly written and produces badly formatted + text. + + The error reporting mechanism is unsatisfactory. + + Some parts of the code are not fully reentrant. + +NetBSD 5.0 January 22, 2010 NetBSD 5.0 diff --git a/libfetch/fetch.h b/libfetch/fetch.h new file mode 100644 index 0000000..3075486 --- /dev/null +++ b/libfetch/fetch.h @@ -0,0 +1,187 @@ +/* $NetBSD: fetch.h,v 1.16 2010/01/22 13:21:09 joerg Exp $ */ +/*- + * Copyright (c) 1998-2004 Dag-Erling Coïdan Smørgrav + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD: fetch.h,v 1.26 2004/09/21 18:35:20 des Exp $ + */ + +#ifndef _FETCH_H_INCLUDED +#define _FETCH_H_INCLUDED + +#include <sys/types.h> +#include <limits.h> +#include <stdio.h> + +#define _LIBFETCH_VER "libfetch/2.0" + +#define URL_HOSTLEN 255 +#define URL_SCHEMELEN 16 +#define URL_USERLEN 256 +#define URL_PWDLEN 256 + +typedef struct fetchIO fetchIO; + +struct url { + char scheme[URL_SCHEMELEN + 1]; + char user[URL_USERLEN + 1]; + char pwd[URL_PWDLEN + 1]; + char host[URL_HOSTLEN + 1]; + int port; + char *doc; + off_t offset; + size_t length; + time_t last_modified; +}; + +struct url_stat { + off_t size; + time_t atime; + time_t mtime; +}; + +struct url_list { + size_t length; + size_t alloc_size; + struct url *urls; +}; + +/* Recognized schemes */ +#define SCHEME_FTP "ftp" +#define SCHEME_HTTP "http" +#define SCHEME_HTTPS "https" +#define SCHEME_FILE "file" + +/* Error codes */ +#define FETCH_ABORT 1 +#define FETCH_AUTH 2 +#define FETCH_DOWN 3 +#define FETCH_EXISTS 4 +#define FETCH_FULL 5 +#define FETCH_INFO 6 +#define FETCH_MEMORY 7 +#define FETCH_MOVED 8 +#define FETCH_NETWORK 9 +#define FETCH_OK 10 +#define FETCH_PROTO 11 +#define FETCH_RESOLV 12 +#define FETCH_SERVER 13 +#define FETCH_TEMP 14 +#define FETCH_TIMEOUT 15 +#define FETCH_UNAVAIL 16 +#define FETCH_UNKNOWN 17 +#define FETCH_URL 18 +#define FETCH_VERBOSE 19 +#define FETCH_UNCHANGED 20 + +#if defined(__cplusplus) +extern "C" { +#endif + +void fetchIO_close(fetchIO *); +ssize_t fetchIO_read(fetchIO *, void *, size_t); +ssize_t fetchIO_write(fetchIO *, const void *, size_t); + +/* fetchIO-specific functions */ +fetchIO *fetchXGetFile(struct url *, struct url_stat *, const char *); +fetchIO *fetchGetFile(struct url *, const char *); +fetchIO *fetchPutFile(struct url *, const char *); +int fetchStatFile(struct url *, struct url_stat *, const char *); +int fetchListFile(struct url_list *, struct url *, const char *, + const char *); + +/* HTTP-specific functions */ +fetchIO *fetchXGetHTTP(struct url *, struct url_stat *, const char *); +fetchIO *fetchGetHTTP(struct url *, const char *); +fetchIO *fetchPutHTTP(struct url *, const char *); +int fetchStatHTTP(struct url *, struct url_stat *, const char *); +int fetchListHTTP(struct url_list *, struct url *, const char *, + const char *); + +/* FTP-specific functions */ +fetchIO *fetchXGetFTP(struct url *, struct url_stat *, const char *); +fetchIO *fetchGetFTP(struct url *, const char *); +fetchIO *fetchPutFTP(struct url *, const char *); +int fetchStatFTP(struct url *, struct url_stat *, const char *); +int fetchListFTP(struct url_list *, struct url *, const char *, + const char *); + +/* Generic functions */ +fetchIO *fetchXGetURL(const char *, struct url_stat *, const char *); +fetchIO *fetchGetURL(const char *, const char *); +fetchIO *fetchPutURL(const char *, const char *); +int fetchStatURL(const char *, struct url_stat *, const char *); +int fetchListURL(struct url_list *, const char *, const char *, + const char *); +fetchIO *fetchXGet(struct url *, struct url_stat *, const char *); +fetchIO *fetchGet(struct url *, const char *); +fetchIO *fetchPut(struct url *, const char *); +int fetchStat(struct url *, struct url_stat *, const char *); +int fetchList(struct url_list *, struct url *, const char *, + const char *); + +/* URL parsing */ +struct url *fetchMakeURL(const char *, const char *, int, + const char *, const char *, const char *); +struct url *fetchParseURL(const char *); +struct url *fetchCopyURL(const struct url *); +char *fetchStringifyURL(const struct url *); +void fetchFreeURL(struct url *); + +/* URL listening */ +void fetchInitURLList(struct url_list *); +int fetchAppendURLList(struct url_list *, const struct url_list *); +void fetchFreeURLList(struct url_list *); +char *fetchUnquotePath(struct url *); +char *fetchUnquoteFilename(struct url *); + +/* Connection caching */ +void fetchConnectionCacheInit(int, int); +void fetchConnectionCacheClose(void); + +/* Authentication */ +typedef int (*auth_t)(struct url *); +extern auth_t fetchAuthMethod; + +/* Last error code */ +extern int fetchLastErrCode; +#define MAXERRSTRING 256 +extern char fetchLastErrString[MAXERRSTRING]; + +/* I/O timeout */ +extern int fetchTimeout; + +/* Restart interrupted syscalls */ +extern volatile int fetchRestartCalls; + +/* Extra verbosity */ +extern int fetchDebug; + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/libfetch/file.c b/libfetch/file.c new file mode 100644 index 0000000..b092fdf --- /dev/null +++ b/libfetch/file.c @@ -0,0 +1,265 @@ +/* $NetBSD: file.c,v 1.15 2009/10/15 12:36:57 joerg Exp $ */ +/*- + * Copyright (c) 1998-2004 Dag-Erling Coïdan Smørgrav + * Copyright (c) 2008, 2009 Joerg Sonnenberger <joerg@NetBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD: file.c,v 1.18 2007/12/14 10:26:58 des Exp $ + */ + +#if HAVE_CONFIG_H +#include "config.h" +#endif +#ifndef NETBSD +#include <nbcompat.h> +#endif + +#include <sys/stat.h> + +#include <dirent.h> +#include <fcntl.h> +#include <fnmatch.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "fetch.h" +#include "common.h" + +static int fetch_stat_file(int, struct url_stat *); + +static ssize_t +fetchFile_read(void *cookie, void *buf, size_t len) +{ + return read(*(int *)cookie, buf, len); +} + +static ssize_t +fetchFile_write(void *cookie, const void *buf, size_t len) +{ + return write(*(int *)cookie, buf, len); +} + +static void +fetchFile_close(void *cookie) +{ + int fd = *(int *)cookie; + + free(cookie); + + close(fd); +} + +fetchIO * +fetchXGetFile(struct url *u, struct url_stat *us, const char *flags) +{ + char *path; + fetchIO *f; + struct url_stat local_us; + int if_modified_since, fd, *cookie; + + if_modified_since = CHECK_FLAG('i'); + if (if_modified_since && us == NULL) + us = &local_us; + + if ((path = fetchUnquotePath(u)) == NULL) { + fetch_syserr(); + return NULL; + } + + fd = open(path, O_RDONLY); + free(path); + if (fd == -1) { + fetch_syserr(); + return NULL; + } + + if (us && fetch_stat_file(fd, us) == -1) { + close(fd); + fetch_syserr(); + return NULL; + } + + if (if_modified_since && u->last_modified > 0 && + u->last_modified >= us->mtime) { + close(fd); + fetchLastErrCode = FETCH_UNCHANGED; + snprintf(fetchLastErrString, MAXERRSTRING, "Unchanged"); + return NULL; + } + + if (u->offset && lseek(fd, u->offset, SEEK_SET) == -1) { + close(fd); + fetch_syserr(); + return NULL; + } + + cookie = malloc(sizeof(int)); + if (cookie == NULL) { + close(fd); + fetch_syserr(); + return NULL; + } + + *cookie = fd; + f = fetchIO_unopen(cookie, fetchFile_read, fetchFile_write, fetchFile_close); + if (f == NULL) { + close(fd); + free(cookie); + } + return f; +} + +fetchIO * +fetchGetFile(struct url *u, const char *flags) +{ + return (fetchXGetFile(u, NULL, flags)); +} + +fetchIO * +fetchPutFile(struct url *u, const char *flags) +{ + char *path; + fetchIO *f; + int fd, *cookie; + + if ((path = fetchUnquotePath(u)) == NULL) { + fetch_syserr(); + return NULL; + } + + if (CHECK_FLAG('a')) + fd = open(path, O_WRONLY | O_APPEND); + else + fd = open(path, O_WRONLY); + + free(path); + + if (fd == -1) { + fetch_syserr(); + return NULL; + } + + if (u->offset && lseek(fd, u->offset, SEEK_SET) == -1) { + close(fd); + fetch_syserr(); + return NULL; + } + + cookie = malloc(sizeof(int)); + if (cookie == NULL) { + close(fd); + fetch_syserr(); + return NULL; + } + + *cookie = fd; + f = fetchIO_unopen(cookie, fetchFile_read, fetchFile_write, fetchFile_close); + if (f == NULL) { + close(fd); + free(cookie); + } + return f; +} + +static int +fetch_stat_file(int fd, struct url_stat *us) +{ + struct stat sb; + + us->size = -1; + us->atime = us->mtime = 0; + if (fstat(fd, &sb) == -1) { + fetch_syserr(); + return (-1); + } + us->size = sb.st_size; + us->atime = sb.st_atime; + us->mtime = sb.st_mtime; + return (0); +} + +int +fetchStatFile(struct url *u, struct url_stat *us, const char *flags) +{ + char *path; + int fd, rv; + + if ((path = fetchUnquotePath(u)) == NULL) { + fetch_syserr(); + return -1; + } + + fd = open(path, O_RDONLY); + free(path); + + if (fd == -1) { + fetch_syserr(); + return -1; + } + + rv = fetch_stat_file(fd, us); + close(fd); + + return rv; +} + +int +fetchListFile(struct url_list *ue, struct url *u, const char *pattern, const char *flags) +{ + char *path; + struct dirent *de; + DIR *dir; + int ret; + + if ((path = fetchUnquotePath(u)) == NULL) { + fetch_syserr(); + return -1; + } + + dir = opendir(path); + free(path); + + if (dir == NULL) { + fetch_syserr(); + return -1; + } + + ret = 0; + + while ((de = readdir(dir)) != NULL) { + if (pattern && fnmatch(pattern, de->d_name, 0) != 0) + continue; + ret = fetch_add_entry(ue, u, de->d_name, 0); + if (ret) + break; + } + + closedir(dir); + + return ret; +} diff --git a/libfetch/ftp.c b/libfetch/ftp.c new file mode 100644 index 0000000..79dcbbc --- /dev/null +++ b/libfetch/ftp.c @@ -0,0 +1,1310 @@ +/* $NetBSD: ftp.c,v 1.46 2014/06/11 13:12:12 joerg Exp $ */ +/*- + * Copyright (c) 1998-2004 Dag-Erling Coïdan Smørgrav + * Copyright (c) 2008, 2009, 2010 Joerg Sonnenberger <joerg@NetBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD: ftp.c,v 1.101 2008/01/23 20:57:59 des Exp $ + */ + +/* + * Portions of this code were taken from or based on ftpio.c: + * + * ---------------------------------------------------------------------------- + * "THE BEER-WARE LICENSE" (Revision 42): + * <phk@FreeBSD.org> wrote this file. As long as you retain this notice you + * can do whatever you want with this stuff. If we meet some day, and you think + * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp + * ---------------------------------------------------------------------------- + * + * Major Changelog: + * + * Dag-Erling Coïdan Smørgrav + * 9 Jun 1998 + * + * Incorporated into libfetch + * + * Jordan K. Hubbard + * 17 Jan 1996 + * + * Turned inside out. Now returns xfers as new file ids, not as a special + * `state' of FTP_t + * + * $ftpioId: ftpio.c,v 1.30 1998/04/11 07:28:53 phk Exp $ + * + */ + +#ifdef __linux__ +/* Keep this down to Linux, it can create surprises else where. */ +#define _GNU_SOURCE +#endif + +#if HAVE_CONFIG_H +#include "config.h" +#endif +#ifndef NETBSD +#include <nbcompat.h> +#endif + +#include <sys/types.h> +#include <sys/socket.h> + +#include <netinet/in.h> +#include <arpa/inet.h> + +#include <ctype.h> +#include <errno.h> +#include <fcntl.h> +#if defined(HAVE_INTTYPES_H) || defined(NETBSD) +#include <inttypes.h> +#endif +#include <stdarg.h> +#ifndef NETBSD +#include <nbcompat/netdb.h> +#include <nbcompat/stdio.h> +#else +#include <netdb.h> +#include <stdio.h> +#endif +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <unistd.h> + +#include "fetch.h" +#include "common.h" +#include "ftperr.h" + +static int ftp_cmd(conn_t *, const char *, ...) LIBFETCH_PRINTFLIKE(2, 3); +#define FTP_ANONYMOUS_USER "anonymous" + +#define FTP_CONNECTION_ALREADY_OPEN 125 +#define FTP_OPEN_DATA_CONNECTION 150 +#define FTP_OK 200 +#define FTP_FILE_STATUS 213 +#define FTP_SERVICE_READY 220 +#define FTP_TRANSFER_COMPLETE 226 +#define FTP_PASSIVE_MODE 227 +#define FTP_LPASSIVE_MODE 228 +#define FTP_EPASSIVE_MODE 229 +#define FTP_LOGGED_IN 230 +#define FTP_FILE_ACTION_OK 250 +#define FTP_DIRECTORY_CREATED 257 /* multiple meanings */ +#define FTP_FILE_CREATED 257 /* multiple meanings */ +#define FTP_WORKING_DIRECTORY 257 /* multiple meanings */ +#define FTP_NEED_PASSWORD 331 +#define FTP_NEED_ACCOUNT 332 +#define FTP_FILE_OK 350 +#define FTP_SYNTAX_ERROR 500 +#define FTP_PROTOCOL_ERROR 999 + +#define isftpreply(foo) \ + (isdigit((unsigned char)foo[0]) && \ + isdigit((unsigned char)foo[1]) && \ + isdigit((unsigned char)foo[2]) && \ + (foo[3] == ' ' || foo[3] == '\0')) +#define isftpinfo(foo) \ + (isdigit((unsigned char)foo[0]) && \ + isdigit((unsigned char)foo[1]) && \ + isdigit((unsigned char)foo[2]) && \ + foo[3] == '-') + +/* + * Translate IPv4 mapped IPv6 address to IPv4 address + */ +static void +unmappedaddr(struct sockaddr_in6 *sin6, socklen_t *len) +{ + struct sockaddr_in *sin4; + uint32_t addr; + int port; + + if (sin6->sin6_family != AF_INET6 || + !IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) + return; + sin4 = (struct sockaddr_in *)sin6; +#ifdef s6_addr32 + addr = sin6->sin6_addr.s6_addr32[3]; +#else + memcpy(&addr, &sin6->sin6_addr.s6_addr[12], sizeof(addr)); +#endif + port = sin6->sin6_port; + memset(sin4, 0, sizeof(struct sockaddr_in)); + sin4->sin_addr.s_addr = addr; + sin4->sin_port = port; + sin4->sin_family = AF_INET; + *len = sizeof(struct sockaddr_in); +#ifdef HAVE_SA_LEN + sin4->sin_len = sizeof(struct sockaddr_in); +#endif +} + +/* + * Get server response + */ +static int +ftp_chkerr(conn_t *conn) +{ + if (fetch_getln(conn) == -1) { + fetch_syserr(); + return (-1); + } + if (isftpinfo(conn->buf)) { + while (conn->buflen && !isftpreply(conn->buf)) { + if (fetch_getln(conn) == -1) { + fetch_syserr(); + return (-1); + } + } + } + + while (conn->buflen && + isspace((unsigned char)conn->buf[conn->buflen - 1])) + conn->buflen--; + conn->buf[conn->buflen] = '\0'; + + if (!isftpreply(conn->buf)) { + ftp_seterr(FTP_PROTOCOL_ERROR); + return (-1); + } + + conn->err = (conn->buf[0] - '0') * 100 + + (conn->buf[1] - '0') * 10 + + (conn->buf[2] - '0'); + + return (conn->err); +} + +/* + * Send a command and check reply + */ +LIBFETCH_PRINTFLIKE(2, 3) +static int +ftp_cmd(conn_t *conn, const char *fmt, ...) +{ + va_list ap; + size_t len; + char *msg; + int r; + + va_start(ap, fmt); + len = vasprintf(&msg, fmt, ap); + va_end(ap); + + if (msg == NULL) { + errno = ENOMEM; + fetch_syserr(); + return (-1); + } + + r = fetch_write(conn, msg, len); + free(msg); + + if (r == -1) { + fetch_syserr(); + return (-1); + } + + return (ftp_chkerr(conn)); +} + +/* + * Return a pointer to the filename part of a path + */ +static const char * +ftp_filename(const char *file, int *len, int *type, int subdir) +{ + const char *s; + + if ((s = strrchr(file, '/')) == NULL || subdir) + s = file; + else + s = s + 1; + *len = strlen(s); + if (*len > 7 && strncmp(s + *len - 7, ";type=", 6) == 0) { + *type = s[*len - 1]; + *len -= 7; + } else { + *type = '\0'; + } + return (s); +} + +/* + * Get current working directory from the reply to a CWD, PWD or CDUP + * command. + */ +static int +ftp_pwd(conn_t *conn, char **pwd) +{ + char *src, *dst, *end; + int q; + + if (conn->err != FTP_WORKING_DIRECTORY && + conn->err != FTP_FILE_ACTION_OK) + return (FTP_PROTOCOL_ERROR); + end = conn->buf + conn->buflen; + src = conn->buf + 4; + if (src >= end || *src++ != '"') + return (FTP_PROTOCOL_ERROR); + *pwd = malloc(end - src + 1); + if (*pwd == NULL) + return (FTP_PROTOCOL_ERROR); + for (q = 0, dst = *pwd; src < end; ++src) { + if (!q && *src == '"') + q = 1; + else if (q && *src != '"') + break; + else if (q) + *dst++ = '"', q = 0; + else + *dst++ = *src; + } + *dst = '\0'; + if (**pwd != '/') { + free(*pwd); + *pwd = NULL; + return (FTP_PROTOCOL_ERROR); + } + return (FTP_OK); +} + +/* + * Change working directory to the directory that contains the specified + * file. + */ +static int +ftp_cwd(conn_t *conn, const char *path, int subdir) +{ + const char *beg, *end; + char *pwd, *dst; + int e, i, len; + + if (*path != '/') { + ftp_seterr(501); + return (-1); + } + ++path; + + /* Simple case: still in the home directory and no directory change. */ + if (conn->ftp_home == NULL && strchr(path, '/') == NULL && + (!subdir || *path == '\0')) + return 0; + + if ((e = ftp_cmd(conn, "PWD\r\n")) != FTP_WORKING_DIRECTORY || + (e = ftp_pwd(conn, &pwd)) != FTP_OK) { + ftp_seterr(e); + return (-1); + } + if (conn->ftp_home == NULL && (conn->ftp_home = strdup(pwd)) == NULL) { + fetch_syserr(); + free(pwd); + return (-1); + } + if (*path == '/') { + while (path[1] == '/') + ++path; + dst = strdup(path); + } else if (strcmp(conn->ftp_home, "/") == 0) { + dst = strdup(path - 1); + } else { + if (asprintf(&dst, "%s/%s", conn->ftp_home, path) == -1) + dst = NULL; + } + if (dst == NULL) { + fetch_syserr(); + free(pwd); + return (-1); + } + + if (subdir) + end = dst + strlen(dst); + else + end = strrchr(dst, '/'); + + for (;;) { + len = strlen(pwd); + + /* Look for a common prefix between PWD and dir to fetch. */ + for (i = 0; i <= len && i <= end - dst; ++i) + if (pwd[i] != dst[i]) + break; + /* Keep going up a dir until we have a matching prefix. */ + if (strcmp(pwd, "/") == 0) + break; + if (pwd[i] == '\0' && (dst[i - 1] == '/' || dst[i] == '/')) + break; + free(pwd); + if ((e = ftp_cmd(conn, "CDUP\r\n")) != FTP_FILE_ACTION_OK || + (e = ftp_cmd(conn, "PWD\r\n")) != FTP_WORKING_DIRECTORY || + (e = ftp_pwd(conn, &pwd)) != FTP_OK) { + ftp_seterr(e); + free(dst); + return (-1); + } + } + free(pwd); + +#ifdef FTP_COMBINE_CWDS + /* Skip leading slashes, even "////". */ + for (beg = dst + i; beg < end && *beg == '/'; ++beg, ++i) + /* nothing */ ; + + /* If there is no trailing dir, we're already there. */ + if (beg >= end) { + free(dst); + return (0); + } + + /* Change to the directory all in one chunk (e.g., foo/bar/baz). */ + e = ftp_cmd(conn, "CWD %.*s\r\n", (int)(end - beg), beg); + if (e == FTP_FILE_ACTION_OK) { + free(dst); + return (0); + } +#endif /* FTP_COMBINE_CWDS */ + + /* That didn't work so go back to legacy behavior (multiple CWDs). */ + for (beg = dst + i; beg < end; beg = dst + i + 1) { + while (*beg == '/') + ++beg, ++i; + for (++i; dst + i < end && dst[i] != '/'; ++i) + /* nothing */ ; + e = ftp_cmd(conn, "CWD %.*s\r\n", (int)(dst + i - beg), beg); + if (e != FTP_FILE_ACTION_OK) { + free(dst); + ftp_seterr(e); + return (-1); + } + } + free(dst); + return (0); +} + +/* + * Set transfer mode and data type + */ +static int +ftp_mode_type(conn_t *conn, int mode, int type) +{ + int e; + + switch (mode) { + case 0: + case 's': + mode = 'S'; + case 'S': + break; + default: + return (FTP_PROTOCOL_ERROR); + } + if ((e = ftp_cmd(conn, "MODE %c\r\n", mode)) != FTP_OK) { + if (mode == 'S') { + /* + * Stream mode is supposed to be the default - so + * much so that some servers not only do not + * support any other mode, but do not support the + * MODE command at all. + * + * If "MODE S" fails, it is unlikely that we + * previously succeeded in setting a different + * mode. Therefore, we simply hope that the + * server is already in the correct mode, and + * silently ignore the failure. + */ + } else { + return (e); + } + } + + switch (type) { + case 0: + case 'i': + type = 'I'; + case 'I': + break; + case 'a': + type = 'A'; + case 'A': + break; + case 'd': + type = 'D'; + case 'D': + /* can't handle yet */ + default: + return (FTP_PROTOCOL_ERROR); + } + if ((e = ftp_cmd(conn, "TYPE %c\r\n", type)) != FTP_OK) + return (e); + + return (FTP_OK); +} + +/* + * Request and parse file stats + */ +static int +ftp_stat(conn_t *conn, const char *file, struct url_stat *us) +{ + char *ln; + const char *filename; + int filenamelen, type, year; + struct tm tm; + time_t t; + int e; + + us->size = -1; + us->atime = us->mtime = 0; + + filename = ftp_filename(file, &filenamelen, &type, 0); + + if ((e = ftp_mode_type(conn, 0, type)) != FTP_OK) { + ftp_seterr(e); + return (-1); + } + + e = ftp_cmd(conn, "SIZE %.*s\r\n", (int)filenamelen, filename); + if (e != FTP_FILE_STATUS) { + ftp_seterr(e); + return (-1); + } + for (ln = conn->buf + 4; *ln && isspace((unsigned char)*ln); ln++) + /* nothing */ ; + for (us->size = 0; *ln && isdigit((unsigned char)*ln); ln++) + us->size = us->size * 10 + *ln - '0'; + if (*ln && !isspace((unsigned char)*ln)) { + ftp_seterr(FTP_PROTOCOL_ERROR); + us->size = -1; + return (-1); + } + if (us->size == 0) + us->size = -1; + + e = ftp_cmd(conn, "MDTM %.*s\r\n", (int)filenamelen, filename); + if (e != FTP_FILE_STATUS) { + ftp_seterr(e); + return (-1); + } + for (ln = conn->buf + 4; *ln && isspace((unsigned char)*ln); ln++) + /* nothing */ ; + switch (strspn(ln, "0123456789")) { + case 14: + break; + case 15: + ln++; + ln[0] = '2'; + ln[1] = '0'; + break; + default: + ftp_seterr(FTP_PROTOCOL_ERROR); + return (-1); + } + if (sscanf(ln, "%04d%02d%02d%02d%02d%02d", + &year, &tm.tm_mon, &tm.tm_mday, + &tm.tm_hour, &tm.tm_min, &tm.tm_sec) != 6) { + ftp_seterr(FTP_PROTOCOL_ERROR); + return (-1); + } + tm.tm_mon--; + tm.tm_year = year - 1900; + tm.tm_isdst = -1; + t = timegm(&tm); + if (t == (time_t)-1) + t = time(NULL); + us->mtime = t; + us->atime = t; + + return (0); +} + +/* + * I/O functions for FTP + */ +struct ftpio { + conn_t *cconn; /* Control connection */ + conn_t *dconn; /* Data connection */ + int dir; /* Direction */ + int eof; /* EOF reached */ + int err; /* Error code */ +}; + +static ssize_t ftp_readfn(void *, void *, size_t); +static ssize_t ftp_writefn(void *, const void *, size_t); +static void ftp_closefn(void *); + +static ssize_t +ftp_readfn(void *v, void *buf, size_t len) +{ + struct ftpio *io; + int r; + + io = (struct ftpio *)v; + if (io == NULL) { + errno = EBADF; + return (-1); + } + if (io->cconn == NULL || io->dconn == NULL || io->dir == O_WRONLY) { + errno = EBADF; + return (-1); + } + if (io->err) { + errno = io->err; + return (-1); + } + if (io->eof) + return (0); + r = fetch_read(io->dconn, buf, len); + if (r > 0) + return (r); + if (r == 0) { + io->eof = 1; + return (0); + } + if (errno != EINTR) + io->err = errno; + return (-1); +} + +static ssize_t +ftp_writefn(void *v, const void *buf, size_t len) +{ + struct ftpio *io; + int w; + + io = (struct ftpio *)v; + if (io == NULL) { + errno = EBADF; + return (-1); + } + if (io->cconn == NULL || io->dconn == NULL || io->dir == O_RDONLY) { + errno = EBADF; + return (-1); + } + if (io->err) { + errno = io->err; + return (-1); + } + w = fetch_write(io->dconn, buf, len); + if (w >= 0) + return (w); + if (errno != EINTR) + io->err = errno; + return (-1); +} + +static int +ftp_disconnect(conn_t *conn) +{ + ftp_cmd(conn, "QUIT\r\n"); + return fetch_close(conn); +} + +static void +ftp_closefn(void *v) +{ + struct ftpio *io; + + io = (struct ftpio *)v; + if (io == NULL) { + errno = EBADF; + return; + } + if (io->dir == -1) + return; + if (io->cconn == NULL || io->dconn == NULL) { + errno = EBADF; + return; + } + fetch_close(io->dconn); + io->dconn = NULL; + io->dir = -1; + ftp_chkerr(io->cconn); + fetch_cache_put(io->cconn, ftp_disconnect); + free(io); + return; +} + +static fetchIO * +ftp_setup(conn_t *cconn, conn_t *dconn, int mode) +{ + struct ftpio *io; + fetchIO *f; + + if (cconn == NULL || dconn == NULL) + return (NULL); + if ((io = malloc(sizeof(*io))) == NULL) + return (NULL); + io->cconn = cconn; + io->dconn = dconn; + io->dir = mode; + io->eof = io->err = 0; + f = fetchIO_unopen(io, ftp_readfn, ftp_writefn, ftp_closefn); + if (f == NULL) + free(io); + return (f); +} + +/* + * Transfer file + */ +static fetchIO * +ftp_transfer(conn_t *conn, const char *oper, const char *file, const char *op_arg, + int mode, off_t offset, const char *flags) +{ + union anonymous { + struct sockaddr_storage ss; + struct sockaddr sa; + struct sockaddr_in6 sin6; + struct sockaddr_in sin4; + } u; + const char *bindaddr; + const char *filename; + int filenamelen, type; + int pasv, verbose; + int e, sd = -1; + socklen_t l; + char *s; + fetchIO *df; + + /* check flags */ + pasv = !CHECK_FLAG('a'); + verbose = CHECK_FLAG('v'); + + /* passive mode */ + if (!pasv) + pasv = ((s = getenv("FTP_PASSIVE_MODE")) != NULL && + strncasecmp(s, "no", 2) != 0); + + /* isolate filename */ + filename = ftp_filename(file, &filenamelen, &type, op_arg != NULL); + + /* set transfer mode and data type */ + if ((e = ftp_mode_type(conn, 0, type)) != FTP_OK) + goto ouch; + + /* find our own address, bind, and listen */ + l = sizeof(u.ss); + if (getsockname(conn->sd, &u.sa, &l) == -1) + goto sysouch; + if (u.ss.ss_family == AF_INET6) + unmappedaddr(&u.sin6, &l); + +retry_mode: + + /* open data socket */ + if ((sd = socket(u.ss.ss_family, SOCK_STREAM, IPPROTO_TCP)) == -1) { + fetch_syserr(); + return (NULL); + } + + if (pasv) { + unsigned char addr[64]; + char *ln, *p; + unsigned int i; + int port; + + /* send PASV command */ + if (verbose) + fetch_info("setting passive mode"); + switch (u.ss.ss_family) { + case AF_INET: + if ((e = ftp_cmd(conn, "PASV\r\n")) != FTP_PASSIVE_MODE) + goto ouch; + break; + case AF_INET6: + if ((e = ftp_cmd(conn, "EPSV\r\n")) != FTP_EPASSIVE_MODE) { + if (e == -1) + goto ouch; + if ((e = ftp_cmd(conn, "LPSV\r\n")) != + FTP_LPASSIVE_MODE) + goto ouch; + } + break; + default: + e = FTP_PROTOCOL_ERROR; /* XXX: error code should be prepared */ + goto ouch; + } + + /* + * Find address and port number. The reply to the PASV command + * is IMHO the one and only weak point in the FTP protocol. + */ + ln = conn->buf; + switch (e) { + case FTP_PASSIVE_MODE: + case FTP_LPASSIVE_MODE: + for (p = ln + 3; *p && !isdigit((unsigned char)*p); p++) + /* nothing */ ; + if (!*p) { + e = FTP_PROTOCOL_ERROR; + goto ouch; + } + l = (e == FTP_PASSIVE_MODE ? 6 : 21); + for (i = 0; *p && i < l; i++, p++) + addr[i] = strtol(p, &p, 10); + if (i < l) { + e = FTP_PROTOCOL_ERROR; + goto ouch; + } + break; + case FTP_EPASSIVE_MODE: + for (p = ln + 3; *p && *p != '('; p++) + /* nothing */ ; + if (!*p) { + e = FTP_PROTOCOL_ERROR; + goto ouch; + } + ++p; + if (sscanf(p, "%c%c%c%d%c", &addr[0], &addr[1], &addr[2], + &port, &addr[3]) != 5 || + addr[0] != addr[1] || + addr[0] != addr[2] || addr[0] != addr[3]) { + e = FTP_PROTOCOL_ERROR; + goto ouch; + } + break; + case FTP_SYNTAX_ERROR: + if (verbose) + fetch_info("passive mode failed"); + /* Close socket and retry with passive mode. */ + pasv = 0; + close(sd); + sd = -1; + goto retry_mode; + } + + /* seek to required offset */ + if (offset) + if (ftp_cmd(conn, "REST %lu\r\n", (unsigned long)offset) != FTP_FILE_OK) + goto sysouch; + + /* construct sockaddr for data socket */ + l = sizeof(u.ss); + if (getpeername(conn->sd, &u.sa, &l) == -1) + goto sysouch; + if (u.ss.ss_family == AF_INET6) + unmappedaddr(&u.sin6, &l); + switch (u.ss.ss_family) { + case AF_INET6: + if (e == FTP_EPASSIVE_MODE) + u.sin6.sin6_port = htons(port); + else { + memcpy(&u.sin6.sin6_addr, addr + 2, 16); + memcpy(&u.sin6.sin6_port, addr + 19, 2); + } + break; + case AF_INET: + if (e == FTP_EPASSIVE_MODE) + u.sin4.sin_port = htons(port); + else { + memcpy(&u.sin4.sin_addr, addr, 4); + memcpy(&u.sin4.sin_port, addr + 4, 2); + } + break; + default: + e = FTP_PROTOCOL_ERROR; /* XXX: error code should be prepared */ + break; + } + + /* connect to data port */ + if (verbose) + fetch_info("opening data connection"); + bindaddr = getenv("FETCH_BIND_ADDRESS"); + if (bindaddr != NULL && *bindaddr != '\0' && + fetch_bind(sd, u.ss.ss_family, bindaddr) != 0) + goto sysouch; + if (connect(sd, &u.sa, l) == -1) + goto sysouch; + + /* make the server initiate the transfer */ + if (verbose) + fetch_info("initiating transfer"); + if (op_arg) + e = ftp_cmd(conn, "%s%s%s\r\n", oper, *op_arg ? " " : "", op_arg); + else + e = ftp_cmd(conn, "%s %.*s\r\n", oper, + (int)filenamelen, filename); + if (e != FTP_CONNECTION_ALREADY_OPEN && e != FTP_OPEN_DATA_CONNECTION) + goto ouch; + + } else { + uint32_t a; + uint16_t p; +#if defined(IPV6_PORTRANGE) || defined(IP_PORTRANGE) + int arg; + int low = CHECK_FLAG('l'); +#endif + int d; + char hname[INET6_ADDRSTRLEN]; + + switch (u.ss.ss_family) { + case AF_INET6: + u.sin6.sin6_port = 0; +#ifdef IPV6_PORTRANGE + arg = low ? IPV6_PORTRANGE_DEFAULT : IPV6_PORTRANGE_HIGH; + if (setsockopt(sd, IPPROTO_IPV6, IPV6_PORTRANGE, + (char *)&arg, sizeof(arg)) == -1) + goto sysouch; +#endif + break; + case AF_INET: + u.sin4.sin_port = 0; +#ifdef IP_PORTRANGE + arg = low ? IP_PORTRANGE_DEFAULT : IP_PORTRANGE_HIGH; + if (setsockopt(sd, IPPROTO_IP, IP_PORTRANGE, + (char *)&arg, sizeof(arg)) == -1) + goto sysouch; +#endif + break; + } + if (verbose) + fetch_info("binding data socket"); + if (bind(sd, &u.sa, l) == -1) + goto sysouch; + if (listen(sd, 1) == -1) + goto sysouch; + + /* find what port we're on and tell the server */ + if (getsockname(sd, &u.sa, &l) == -1) + goto sysouch; + switch (u.ss.ss_family) { + case AF_INET: + a = ntohl(u.sin4.sin_addr.s_addr); + p = ntohs(u.sin4.sin_port); + e = ftp_cmd(conn, "PORT %d,%d,%d,%d,%d,%d\r\n", + (a >> 24) & 0xff, (a >> 16) & 0xff, + (a >> 8) & 0xff, a & 0xff, + (p >> 8) & 0xff, p & 0xff); + break; + case AF_INET6: + e = -1; + u.sin6.sin6_scope_id = 0; + if (getnameinfo(&u.sa, l, + hname, sizeof(hname), + NULL, 0, NI_NUMERICHOST) == 0) { + e = ftp_cmd(conn, "EPRT |%d|%s|%d|\r\n", 2, hname, + htons(u.sin6.sin6_port)); + if (e == -1) + goto ouch; + } + if (e != FTP_OK) { + unsigned char *ap = (void *)&u.sin6.sin6_addr.s6_addr; + uint16_t port = ntohs(u.sin6.sin6_port); + e = ftp_cmd(conn, + "LPRT %d,%d,%u,%u,%u,%u,%u,%u,%u,%u," + "%u,%u,%u,%u,%u,%u,%u,%u,%d,%d,%d\r\n", + 6, 16, + (unsigned)ap[0], (unsigned)ap[1], + (unsigned)ap[2], (unsigned)ap[3], + (unsigned)ap[4], (unsigned)ap[5], + (unsigned)ap[6], (unsigned)ap[7], + (unsigned)ap[8], (unsigned)ap[9], + (unsigned)ap[10], (unsigned)ap[11], + (unsigned)ap[12], (unsigned)ap[13], + (unsigned)ap[14], (unsigned)ap[15], + 2, port >> 8, port & 0xff); + } + break; + default: + e = FTP_PROTOCOL_ERROR; /* XXX: error code should be prepared */ + goto ouch; + } + if (e != FTP_OK) + goto ouch; + + /* seek to required offset */ + if (offset) + if (ftp_cmd(conn, "REST %llu\r\n", (unsigned long long)offset) != FTP_FILE_OK) + goto sysouch; + + /* make the server initiate the transfer */ + if (verbose) + fetch_info("initiating transfer"); + if (op_arg) + e = ftp_cmd(conn, "%s%s%s\r\n", oper, *op_arg ? " " : "", op_arg); + else + e = ftp_cmd(conn, "%s %.*s\r\n", oper, + (int)filenamelen, filename); + if (e != FTP_CONNECTION_ALREADY_OPEN && e != FTP_OPEN_DATA_CONNECTION) + goto ouch; + + /* accept the incoming connection and go to town */ + if ((d = accept(sd, NULL, NULL)) == -1) + goto sysouch; + close(sd); + sd = d; + } + + if ((df = ftp_setup(conn, fetch_reopen(sd), mode)) == NULL) + goto sysouch; + return (df); + +sysouch: + fetch_syserr(); + if (sd >= 0) + close(sd); + return (NULL); + +ouch: + if (e != -1) + ftp_seterr(e); + if (sd >= 0) + close(sd); + return (NULL); +} + +/* + * Authenticate + */ +static int +ftp_authenticate(conn_t *conn, struct url *url, struct url *purl) +{ + const char *user, *pwd, *login_name; + char pbuf[URL_USERLEN + 1 + URL_HOSTLEN + 1]; + int e, len; + + /* XXX FTP_AUTH, and maybe .netrc */ + + /* send user name and password */ + if (url->user[0] == '\0') + fetch_netrc_auth(url); + user = url->user; + if (*user == '\0') + user = getenv("FTP_LOGIN"); + if (user == NULL || *user == '\0') + user = FTP_ANONYMOUS_USER; + if (purl && url->port == fetch_default_port(url->scheme)) + e = ftp_cmd(conn, "USER %s@%s\r\n", user, url->host); + else if (purl) + e = ftp_cmd(conn, "USER %s@%s@%d\r\n", user, url->host, url->port); + else + e = ftp_cmd(conn, "USER %s\r\n", user); + + /* did the server request a password? */ + if (e == FTP_NEED_PASSWORD) { + pwd = url->pwd; + if (*pwd == '\0') + pwd = getenv("FTP_PASSWORD"); + if (pwd == NULL || *pwd == '\0') { + if ((login_name = getlogin()) == 0) + login_name = FTP_ANONYMOUS_USER; + if ((len = snprintf(pbuf, URL_USERLEN + 2, "%s@", login_name)) < 0) + len = 0; + else if (len > URL_USERLEN + 1) + len = URL_USERLEN + 1; + gethostname(pbuf + len, sizeof(pbuf) - len); + /* MAXHOSTNAMELEN can differ from URL_HOSTLEN + 1 */ + pbuf[sizeof(pbuf) - 1] = '\0'; + pwd = pbuf; + } + e = ftp_cmd(conn, "PASS %s\r\n", pwd); + } + + return (e); +} + +/* + * Log on to FTP server + */ +static conn_t * +ftp_connect(struct url *url, struct url *purl, const char *flags) +{ + conn_t *conn; + int e, direct, verbose; +#ifdef INET6 + int af = AF_UNSPEC; +#else + int af = AF_INET; +#endif + + direct = CHECK_FLAG('d'); + verbose = CHECK_FLAG('v'); + if (CHECK_FLAG('4')) + af = AF_INET; + else if (CHECK_FLAG('6')) + af = AF_INET6; + + if (direct) + purl = NULL; + + /* check for proxy */ + if (purl) { + /* XXX proxy authentication! */ + /* XXX connetion caching */ + if (!purl->port) + purl->port = fetch_default_port(purl->scheme); + + conn = fetch_connect(purl, af, verbose); + } else { + /* no proxy, go straight to target */ + if (!url->port) + url->port = fetch_default_port(url->scheme); + + while ((conn = fetch_cache_get(url, af)) != NULL) { + e = ftp_cmd(conn, "NOOP\r\n"); + if (e == FTP_OK) + return conn; + fetch_close(conn); + } + conn = fetch_connect(url, af, verbose); + purl = NULL; + } + + /* check connection */ + if (conn == NULL) + /* fetch_connect() has already set an error code */ + return (NULL); + + /* expect welcome message */ + if ((e = ftp_chkerr(conn)) != FTP_SERVICE_READY) + goto fouch; + + /* authenticate */ + if ((e = ftp_authenticate(conn, url, purl)) != FTP_LOGGED_IN) + goto fouch; + + /* TODO: Request extended features supported, if any (RFC 3659). */ + + /* done */ + return (conn); + +fouch: + if (e != -1) + ftp_seterr(e); + fetch_close(conn); + return (NULL); +} + +/* + * Check the proxy settings + */ +static struct url * +ftp_get_proxy(struct url * url, const char *flags) +{ + struct url *purl; + char *p; + + if (flags != NULL && strchr(flags, 'd') != NULL) + return (NULL); + if (fetch_no_proxy_match(url->host)) + return (NULL); + if (((p = getenv("FTP_PROXY")) || (p = getenv("ftp_proxy")) || + (p = getenv("HTTP_PROXY")) || (p = getenv("http_proxy"))) && + *p && (purl = fetchParseURL(p)) != NULL) { + if (!*purl->scheme) { + if (getenv("FTP_PROXY") || getenv("ftp_proxy")) + strcpy(purl->scheme, SCHEME_FTP); + else + strcpy(purl->scheme, SCHEME_HTTP); + } + if (!purl->port) + purl->port = fetch_default_proxy_port(purl->scheme); + if (strcasecmp(purl->scheme, SCHEME_FTP) == 0 || + strcasecmp(purl->scheme, SCHEME_HTTP) == 0) + return (purl); + fetchFreeURL(purl); + } + return (NULL); +} + +/* + * Process an FTP request + */ +fetchIO * +ftp_request(struct url *url, const char *op, const char *op_arg, + struct url_stat *us, struct url *purl, const char *flags) +{ + fetchIO *f; + char *path; + conn_t *conn; + int if_modified_since, oflag; + struct url_stat local_us; + + /* check if we should use HTTP instead */ + if (purl && strcasecmp(purl->scheme, SCHEME_HTTP) == 0) { + if (strcmp(op, "STAT") == 0) + return (http_request(url, "HEAD", us, purl, flags)); + else if (strcmp(op, "RETR") == 0) + return (http_request(url, "GET", us, purl, flags)); + /* + * Our HTTP code doesn't support PUT requests yet, so try + * a direct connection. + */ + } + + /* connect to server */ + conn = ftp_connect(url, purl, flags); + if (purl) + fetchFreeURL(purl); + if (conn == NULL) + return (NULL); + + if ((path = fetchUnquotePath(url)) == NULL) { + fetch_close(conn); + fetch_syserr(); + return NULL; + } + + /* change directory */ + if (ftp_cwd(conn, path, op_arg != NULL) == -1) { + fetch_close(conn); + free(path); + return (NULL); + } + + if_modified_since = CHECK_FLAG('i'); + if (if_modified_since && us == NULL) + us = &local_us; + + /* stat file */ + if (us && ftp_stat(conn, path, us) == -1 + && fetchLastErrCode != FETCH_PROTO + && fetchLastErrCode != FETCH_UNAVAIL) { + fetch_close(conn); + free(path); + return (NULL); + } + + if (if_modified_since && url->last_modified > 0 && + url->last_modified >= us->mtime) { + fetch_cache_put(conn, ftp_disconnect); + free(path); + fetchLastErrCode = FETCH_UNCHANGED; + snprintf(fetchLastErrString, MAXERRSTRING, "Unchanged"); + return NULL; + } + + /* just a stat */ + if (strcmp(op, "STAT") == 0) { + fetch_cache_put(conn, ftp_disconnect); + free(path); + return fetchIO_unopen(NULL, NULL, NULL, NULL); + } + if (strcmp(op, "STOR") == 0 || strcmp(op, "APPE") == 0) + oflag = O_WRONLY; + else + oflag = O_RDONLY; + + /* initiate the transfer */ + f = (ftp_transfer(conn, op, path, op_arg, oflag, url->offset, flags)); + free(path); + return f; +} + +/* + * Get and stat file + */ +fetchIO * +fetchXGetFTP(struct url *url, struct url_stat *us, const char *flags) +{ + return (ftp_request(url, "RETR", NULL, us, ftp_get_proxy(url, flags), flags)); +} + +/* + * Get file + */ +fetchIO * +fetchGetFTP(struct url *url, const char *flags) +{ + return (fetchXGetFTP(url, NULL, flags)); +} + +/* + * Put file + */ +fetchIO * +fetchPutFTP(struct url *url, const char *flags) +{ + return (ftp_request(url, CHECK_FLAG('a') ? "APPE" : "STOR", NULL, NULL, + ftp_get_proxy(url, flags), flags)); +} + +/* + * Get file stats + */ +int +fetchStatFTP(struct url *url, struct url_stat *us, const char *flags) +{ + fetchIO *f; + + f = ftp_request(url, "STAT", NULL, us, ftp_get_proxy(url, flags), flags); + if (f == NULL) + return (-1); + fetchIO_close(f); + return (0); +} + +/* + * List a directory + */ +int +fetchListFTP(struct url_list *ue, struct url *url, const char *pattern, const char *flags) +{ + fetchIO *f; + char buf[2 * PATH_MAX], *eol, *eos; + ssize_t len; + size_t cur_off; + int ret; + + /* XXX What about proxies? */ + if (pattern == NULL || strcmp(pattern, "*") == 0) + pattern = ""; + f = ftp_request(url, "NLST", pattern, NULL, ftp_get_proxy(url, flags), flags); + if (f == NULL) + return -1; + + cur_off = 0; + ret = 0; + + while ((len = fetchIO_read(f, buf + cur_off, sizeof(buf) - cur_off)) > 0) { + cur_off += len; + while ((eol = memchr(buf, '\n', cur_off)) != NULL) { + if (len == eol - buf) + break; + if (eol != buf) { + if (eol[-1] == '\r') + eos = eol - 1; + else + eos = eol; + *eos = '\0'; + ret = fetch_add_entry(ue, url, buf, 0); + if (ret) + break; + cur_off -= eol - buf + 1; + memmove(buf, eol + 1, cur_off); + } + } + if (ret) + break; + } + if (cur_off != 0 || len < 0) { + /* Not RFC conform, bail out. */ + fetchIO_close(f); + return -1; + } + fetchIO_close(f); + return ret; +} diff --git a/libfetch/ftp.errors b/libfetch/ftp.errors new file mode 100644 index 0000000..e9c4950 --- /dev/null +++ b/libfetch/ftp.errors @@ -0,0 +1,48 @@ +# $NetBSD: ftp.errors,v 1.2 2008/10/06 12:58:29 joerg Exp $ +# $FreeBSD: ftp.errors,v 1.6 2002/10/30 06:06:16 des Exp $ +# +# This list is taken from RFC 959. +# It probably needs a going over. +# +110 OK Restart marker reply +120 TEMP Service ready in a few minutes +125 OK Data connection already open; transfer starting +150 OK File status okay; about to open data connection +200 OK Command okay +202 PROTO Command not implemented, superfluous at this site +211 INFO System status, or system help reply +212 INFO Directory status +213 INFO File status +214 INFO Help message +215 INFO Set system type +220 OK Service ready for new user +221 OK Service closing control connection +225 OK Data connection open; no transfer in progress +226 OK Requested file action successful +227 OK Entering Passive Mode +229 OK Entering Extended Passive Mode +230 OK User logged in, proceed +250 OK Requested file action okay, completed +257 OK File/directory created +331 AUTH User name okay, need password +332 AUTH Need account for login +350 OK Requested file action pending further information +421 DOWN Service not available, closing control connection +425 NETWORK Can't open data connection +426 ABORT Connection closed; transfer aborted +450 UNAVAIL File unavailable (e.g., file busy) +451 SERVER Requested action aborted: local error in processing +452 FULL Insufficient storage space in system +500 PROTO Syntax error, command unrecognized +501 PROTO Syntax error in parameters or arguments +502 PROTO Command not implemented +503 PROTO Bad sequence of commands +504 PROTO Command not implemented for that parameter +530 AUTH Not logged in +532 AUTH Need account for storing files +535 PROTO Bug in MediaHawk Video Kernel FTP server +550 UNAVAIL File unavailable (e.g., file not found, no access) +551 PROTO Requested action aborted. Page type unknown +552 FULL Exceeded storage allocation +553 EXISTS File name not allowed +999 PROTO Protocol error diff --git a/libfetch/http.c b/libfetch/http.c new file mode 100644 index 0000000..d0882e2 --- /dev/null +++ b/libfetch/http.c @@ -0,0 +1,1552 @@ +/* $NetBSD: http.c,v 1.40 2016/10/21 11:51:18 jperkin Exp $ */ +/*- + * Copyright (c) 2000-2004 Dag-Erling Coïdan Smørgrav + * Copyright (c) 2003 Thomas Klausner <wiz@NetBSD.org> + * Copyright (c) 2008, 2009 Joerg Sonnenberger <joerg@NetBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD: http.c,v 1.83 2008/02/06 11:39:55 des Exp $ + */ + +/* + * The following copyright applies to the base64 code: + * + *- + * Copyright 1997 Massachusetts Institute of Technology + * + * Permission to use, copy, modify, and distribute this software and + * its documentation for any purpose and without fee is hereby + * granted, provided that both the above copyright notice and this + * permission notice appear in all copies, that both the above + * copyright notice and this permission notice appear in all + * supporting documentation, and that the name of M.I.T. not be used + * in advertising or publicity pertaining to distribution of the + * software without specific, written prior permission. M.I.T. makes + * no representations about the suitability of this software for any + * purpose. It is provided "as is" without express or implied + * warranty. + * + * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS + * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT + * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(__linux__) || defined(__MINT__) || defined(__FreeBSD_kernel__) +/* Keep this down to Linux or MiNT, it can create surprises elsewhere. */ +/* + __FreeBSD_kernel__ is defined for GNU/kFreeBSD. + See http://glibc-bsd.alioth.debian.org/porting/PORTING . +*/ +#define _GNU_SOURCE +#endif + +/* Needed for gmtime_r on Interix */ +#define _REENTRANT + +#if HAVE_CONFIG_H +#include "config.h" +#endif +#ifndef NETBSD +#include <nbcompat.h> +#endif + +#include <sys/types.h> +#include <sys/socket.h> + +#include <ctype.h> +#include <errno.h> +#include <locale.h> +#include <stdarg.h> +#ifndef NETBSD +#include <nbcompat/stdio.h> +#else +#include <stdio.h> +#endif +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <unistd.h> + +#include <netinet/in.h> +#include <netinet/tcp.h> + +#ifndef NETBSD +#include <nbcompat/netdb.h> +#else +#include <netdb.h> +#endif + +#include <arpa/inet.h> + +#include "fetch.h" +#include "common.h" +#include "httperr.h" + +/* Maximum number of redirects to follow */ +#define MAX_REDIRECT 5 + +/* Symbolic names for reply codes we care about */ +#define HTTP_OK 200 +#define HTTP_PARTIAL 206 +#define HTTP_MOVED_PERM 301 +#define HTTP_MOVED_TEMP 302 +#define HTTP_SEE_OTHER 303 +#define HTTP_NOT_MODIFIED 304 +#define HTTP_TEMP_REDIRECT 307 +#define HTTP_NEED_AUTH 401 +#define HTTP_NEED_PROXY_AUTH 407 +#define HTTP_BAD_RANGE 416 +#define HTTP_PROTOCOL_ERROR 999 + +#define HTTP_REDIRECT(xyz) ((xyz) == HTTP_MOVED_PERM \ + || (xyz) == HTTP_MOVED_TEMP \ + || (xyz) == HTTP_TEMP_REDIRECT \ + || (xyz) == HTTP_SEE_OTHER) + +#define HTTP_ERROR(xyz) ((xyz) > 400 && (xyz) < 599) + +static int http_cmd(conn_t *, const char *, ...) LIBFETCH_PRINTFLIKE(2, 3); + +/***************************************************************************** + * I/O functions for decoding chunked streams + */ + +struct httpio +{ + conn_t *conn; /* connection */ + int chunked; /* chunked mode */ + int keep_alive; /* keep-alive mode */ + char *buf; /* chunk buffer */ + size_t bufsize; /* size of chunk buffer */ + ssize_t buflen; /* amount of data currently in buffer */ + int bufpos; /* current read offset in buffer */ + int eof; /* end-of-file flag */ + int error; /* error flag */ + size_t chunksize; /* remaining size of current chunk */ + off_t contentlength; /* remaining size of the content */ +}; + +/* + * Get next chunk header + */ +static int +http_new_chunk(struct httpio *io) +{ + char *p; + + if (fetch_getln(io->conn) == -1) + return (-1); + + if (io->conn->buflen < 2 || !isxdigit((unsigned char)*io->conn->buf)) + return (-1); + + for (p = io->conn->buf; *p && !isspace((unsigned char)*p); ++p) { + if (*p == ';') + break; + if (!isxdigit((unsigned char)*p)) + return (-1); + if (isdigit((unsigned char)*p)) { + io->chunksize = io->chunksize * 16 + + *p - '0'; + } else { + io->chunksize = io->chunksize * 16 + + 10 + tolower((unsigned char)*p) - 'a'; + } + } + + return (io->chunksize); +} + +/* + * Grow the input buffer to at least len bytes + */ +static int +http_growbuf(struct httpio *io, size_t len) +{ + char *tmp; + + if (io->bufsize >= len) + return (0); + + if ((tmp = realloc(io->buf, len)) == NULL) + return (-1); + io->buf = tmp; + io->bufsize = len; + return (0); +} + +/* + * Fill the input buffer, do chunk decoding on the fly + */ +static int +http_fillbuf(struct httpio *io, size_t len) +{ + if (io->error) + return (-1); + if (io->eof) + return (0); + + if (io->contentlength >= 0 && (off_t)len > io->contentlength) + len = io->contentlength; + + if (io->chunked == 0) { + if (http_growbuf(io, len) == -1) + return (-1); + if ((io->buflen = fetch_read(io->conn, io->buf, len)) == -1) { + io->error = 1; + return (-1); + } + if (io->contentlength) + io->contentlength -= io->buflen; + io->bufpos = 0; + return (io->buflen); + } + + if (io->chunksize == 0) { + switch (http_new_chunk(io)) { + case -1: + io->error = 1; + return (-1); + case 0: + io->eof = 1; + if (fetch_getln(io->conn) == -1) + return (-1); + return (0); + } + } + + if (len > io->chunksize) + len = io->chunksize; + if (http_growbuf(io, len) == -1) + return (-1); + if ((io->buflen = fetch_read(io->conn, io->buf, len)) == -1) { + io->error = 1; + return (-1); + } + io->chunksize -= io->buflen; + if (io->contentlength >= 0) + io->contentlength -= io->buflen; + + if (io->chunksize == 0) { + char endl[2]; + ssize_t len2; + + len2 = fetch_read(io->conn, endl, 2); + if (len2 == 1 && fetch_read(io->conn, endl + 1, 1) != 1) + return (-1); + if (len2 == -1 || endl[0] != '\r' || endl[1] != '\n') + return (-1); + } + + io->bufpos = 0; + + return (io->buflen); +} + +/* + * Read function + */ +static ssize_t +http_readfn(void *v, void *buf, size_t len) +{ + struct httpio *io = (struct httpio *)v; + size_t l, pos; + + if (io->error) + return (-1); + if (io->eof) + return (0); + + for (pos = 0; len > 0; pos += l, len -= l) { + /* empty buffer */ + if (!io->buf || io->bufpos == io->buflen) + if (http_fillbuf(io, len) < 1) + break; + l = io->buflen - io->bufpos; + if (len < l) + l = len; + memcpy((char *)buf + pos, io->buf + io->bufpos, l); + io->bufpos += l; + } + + if (!pos && io->error) + return (-1); + return (pos); +} + +/* + * Write function + */ +static ssize_t +http_writefn(void *v, const void *buf, size_t len) +{ + struct httpio *io = (struct httpio *)v; + + return (fetch_write(io->conn, buf, len)); +} + +/* + * Close function + */ +static void +http_closefn(void *v) +{ + struct httpio *io = (struct httpio *)v; + + if (io->keep_alive) { + int val; + + val = 0; + setsockopt(io->conn->sd, IPPROTO_TCP, TCP_NODELAY, &val, + sizeof(val)); + fetch_cache_put(io->conn, fetch_close); +#if defined(TCP_NOPUSH) && !defined(__APPLE__) + val = 1; + setsockopt(io->conn->sd, IPPROTO_TCP, TCP_NOPUSH, &val, + sizeof(val)); +#endif + } else { + fetch_close(io->conn); + } + + free(io->buf); + free(io); +} + +/* + * Wrap a file descriptor up + */ +static fetchIO * +http_funopen(conn_t *conn, int chunked, int keep_alive, off_t clength) +{ + struct httpio *io; + fetchIO *f; + + if ((io = calloc(1, sizeof(*io))) == NULL) { + fetch_syserr(); + return (NULL); + } + io->conn = conn; + io->chunked = chunked; + io->contentlength = clength; + io->keep_alive = keep_alive; + f = fetchIO_unopen(io, http_readfn, http_writefn, http_closefn); + if (f == NULL) { + fetch_syserr(); + free(io); + return (NULL); + } + return (f); +} + + +/***************************************************************************** + * Helper functions for talking to the server and parsing its replies + */ + +/* Header types */ +typedef enum { + hdr_syserror = -2, + hdr_error = -1, + hdr_end = 0, + hdr_unknown = 1, + hdr_connection, + hdr_content_length, + hdr_content_range, + hdr_last_modified, + hdr_location, + hdr_transfer_encoding, + hdr_www_authenticate +} hdr_t; + +/* Names of interesting headers */ +static struct { + hdr_t num; + const char *name; +} hdr_names[] = { + { hdr_connection, "Connection" }, + { hdr_content_length, "Content-Length" }, + { hdr_content_range, "Content-Range" }, + { hdr_last_modified, "Last-Modified" }, + { hdr_location, "Location" }, + { hdr_transfer_encoding, "Transfer-Encoding" }, + { hdr_www_authenticate, "WWW-Authenticate" }, + { hdr_unknown, NULL }, +}; + +/* + * Send a formatted line; optionally echo to terminal + */ +LIBFETCH_PRINTFLIKE(2, 3) +static int +http_cmd(conn_t *conn, const char *fmt, ...) +{ + va_list ap; + size_t len; + char *msg; + int r; + + va_start(ap, fmt); + len = vasprintf(&msg, fmt, ap); + va_end(ap); + + if (msg == NULL) { + errno = ENOMEM; + fetch_syserr(); + return (-1); + } + + r = fetch_write(conn, msg, len); + free(msg); + + if (r == -1) { + fetch_syserr(); + return (-1); + } + + return (0); +} + +/* + * Get and parse status line + */ +static int +http_get_reply(conn_t *conn) +{ + char *p; + + if (fetch_getln(conn) == -1) + return (-1); + /* + * A valid status line looks like "HTTP/m.n xyz reason" where m + * and n are the major and minor protocol version numbers and xyz + * is the reply code. + * Unfortunately, there are servers out there (NCSA 1.5.1, to name + * just one) that do not send a version number, so we can't rely + * on finding one, but if we do, insist on it being 1.0 or 1.1. + * We don't care about the reason phrase. + */ + if (strncmp(conn->buf, "HTTP", 4) != 0) + return (HTTP_PROTOCOL_ERROR); + p = conn->buf + 4; + if (*p == '/') { + if (p[1] != '1' || p[2] != '.' || (p[3] != '0' && p[3] != '1')) + return (HTTP_PROTOCOL_ERROR); + p += 4; + } + if (*p != ' ' || + !isdigit((unsigned char)p[1]) || + !isdigit((unsigned char)p[2]) || + !isdigit((unsigned char)p[3])) + return (HTTP_PROTOCOL_ERROR); + + conn->err = (p[1] - '0') * 100 + (p[2] - '0') * 10 + (p[3] - '0'); + return (conn->err); +} + +/* + * Check a header; if the type matches the given string, return a pointer + * to the beginning of the value. + */ +static const char * +http_match(const char *str, const char *hdr) +{ + while (*str && *hdr && + tolower((unsigned char)*str++) == tolower((unsigned char)*hdr++)) + /* nothing */; + if (*str || *hdr != ':') + return (NULL); + while (*hdr && isspace((unsigned char)*++hdr)) + /* nothing */; + return (hdr); +} + +/* + * Get the next header and return the appropriate symbolic code. + */ +static hdr_t +http_next_header(conn_t *conn, const char **p) +{ + int i; + + if (fetch_getln(conn) == -1) + return (hdr_syserror); + while (conn->buflen && isspace((unsigned char)conn->buf[conn->buflen - 1])) + conn->buflen--; + conn->buf[conn->buflen] = '\0'; + if (conn->buflen == 0) + return (hdr_end); + /* + * We could check for malformed headers but we don't really care. + * A valid header starts with a token immediately followed by a + * colon; a token is any sequence of non-control, non-whitespace + * characters except "()<>@,;:\\\"{}". + */ + for (i = 0; hdr_names[i].num != hdr_unknown; i++) + if ((*p = http_match(hdr_names[i].name, conn->buf)) != NULL) + return (hdr_names[i].num); + return (hdr_unknown); +} + +/* + * Parse a last-modified header + */ +static int +http_parse_mtime(const char *p, time_t *mtime) +{ + char locale[64], *r; + struct tm tm; + + strncpy(locale, setlocale(LC_TIME, NULL), sizeof(locale)); + setlocale(LC_TIME, "C"); + r = strptime(p, "%a, %d %b %Y %H:%M:%S GMT", &tm); + /* XXX should add support for date-2 and date-3 */ + setlocale(LC_TIME, locale); + if (r == NULL) + return (-1); + *mtime = timegm(&tm); + return (0); +} + +/* + * Parse a content-length header + */ +static int +http_parse_length(const char *p, off_t *length) +{ + off_t len; + + for (len = 0; *p && isdigit((unsigned char)*p); ++p) + len = len * 10 + (*p - '0'); + if (*p) + return (-1); + *length = len; + return (0); +} + +/* + * Parse a content-range header + */ +static int +http_parse_range(const char *p, off_t *offset, off_t *length, off_t *size) +{ + off_t first, last, len; + + if (strncasecmp(p, "bytes ", 6) != 0) + return (-1); + p += 6; + if (*p == '*') { + first = last = -1; + ++p; + } else { + for (first = 0; *p && isdigit((unsigned char)*p); ++p) + first = first * 10 + *p - '0'; + if (*p != '-') + return (-1); + for (last = 0, ++p; *p && isdigit((unsigned char)*p); ++p) + last = last * 10 + *p - '0'; + } + if (first > last || *p != '/') + return (-1); + for (len = 0, ++p; *p && isdigit((unsigned char)*p); ++p) + len = len * 10 + *p - '0'; + if (*p || len < last - first + 1) + return (-1); + if (first == -1) + *length = 0; + else + *length = last - first + 1; + *offset = first; + *size = len; + return (0); +} + + +/***************************************************************************** + * Helper functions for authorization + */ + +/* + * Base64 encoding + */ +static char * +http_base64(const char *src) +{ + static const char base64[] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789+/"; + char *str, *dst; + size_t l; + int t, r; + + l = strlen(src); + if ((str = malloc(((l + 2) / 3) * 4 + 1)) == NULL) + return (NULL); + dst = str; + r = 0; + + while (l >= 3) { + t = (src[0] << 16) | (src[1] << 8) | src[2]; + dst[0] = base64[(t >> 18) & 0x3f]; + dst[1] = base64[(t >> 12) & 0x3f]; + dst[2] = base64[(t >> 6) & 0x3f]; + dst[3] = base64[(t >> 0) & 0x3f]; + src += 3; l -= 3; + dst += 4; r += 4; + } + + switch (l) { + case 2: + t = (src[0] << 16) | (src[1] << 8); + dst[0] = base64[(t >> 18) & 0x3f]; + dst[1] = base64[(t >> 12) & 0x3f]; + dst[2] = base64[(t >> 6) & 0x3f]; + dst[3] = '='; + dst += 4; + r += 4; + break; + case 1: + t = src[0] << 16; + dst[0] = base64[(t >> 18) & 0x3f]; + dst[1] = base64[(t >> 12) & 0x3f]; + dst[2] = dst[3] = '='; + dst += 4; + r += 4; + break; + case 0: + break; + } + + *dst = 0; + return (str); +} + +/* + * Encode username and password + */ +static int +http_basic_auth(conn_t *conn, const char *hdr, const char *usr, const char *pwd) +{ + char *upw, *auth; + int r; + + if (asprintf(&upw, "%s:%s", usr, pwd) == -1) + return (-1); + auth = http_base64(upw); + free(upw); + if (auth == NULL) + return (-1); + r = http_cmd(conn, "%s: Basic %s\r\n", hdr, auth); + free(auth); + return (r); +} + +/* + * Send an authorization header + */ +static int +http_authorize(conn_t *conn, const char *hdr, const char *p) +{ + /* basic authorization */ + if (strncasecmp(p, "basic:", 6) == 0) { + char *user, *pwd, *str; + int r; + + /* skip realm */ + for (p += 6; *p && *p != ':'; ++p) + /* nothing */ ; + if (!*p || strchr(++p, ':') == NULL) + return (-1); + if ((str = strdup(p)) == NULL) + return (-1); /* XXX */ + user = str; + pwd = strchr(str, ':'); + *pwd++ = '\0'; + r = http_basic_auth(conn, hdr, user, pwd); + free(str); + return (r); + } + return (-1); +} + + +/***************************************************************************** + * Helper functions for connecting to a server or proxy + */ + +/* + * Connect to the correct HTTP server or proxy. + */ +static conn_t * +http_connect(struct url *URL, struct url *purl, const char *flags, int *cached) +{ + struct url *curl; + conn_t *conn; + hdr_t h; + const char *p; + int af, verbose; +#if defined(TCP_NOPUSH) && !defined(__APPLE__) + int val; +#endif + + *cached = 0; + +#ifdef INET6 + af = AF_UNSPEC; +#else + af = AF_INET; +#endif + + verbose = CHECK_FLAG('v'); + if (CHECK_FLAG('4')) + af = AF_INET; +#ifdef INET6 + else if (CHECK_FLAG('6')) + af = AF_INET6; +#endif + + curl = (purl != NULL) ? purl : URL; + + if ((conn = fetch_cache_get(URL, af)) != NULL) { + *cached = 1; + return (conn); + } + + if ((conn = fetch_connect(curl, af, verbose)) == NULL) + /* fetch_connect() has already set an error code */ + return (NULL); + if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0 && purl) { + http_cmd(conn, "CONNECT %s:%d HTTP/1.1\r\n", + URL->host, URL->port); + http_cmd(conn, "Host: %s:%d\r\n", + URL->host, URL->port); + http_cmd(conn, "\r\n"); + if (http_get_reply(conn) != HTTP_OK) { + http_seterr(conn->err); + goto ouch; + } + /* Read and discard the rest of the proxy response */ + if (fetch_getln(conn) < 0) { + fetch_syserr(); + goto ouch; + } + do { + switch ((h = http_next_header(conn, &p))) { + case hdr_syserror: + fetch_syserr(); + goto ouch; + case hdr_error: + http_seterr(HTTP_PROTOCOL_ERROR); + goto ouch; + default: + /* ignore */ ; + } + } while (h < hdr_end); + } + if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0 && + fetch_ssl(conn, URL, verbose) == -1) { + /* grrr */ +#ifdef EAUTH + errno = EAUTH; +#else + errno = EPERM; +#endif + fetch_syserr(); + goto ouch; + } + +#if defined(TCP_NOPUSH) && !defined(__APPLE__) + val = 1; + setsockopt(conn->sd, IPPROTO_TCP, TCP_NOPUSH, &val, sizeof(val)); +#endif + + return (conn); +ouch: + fetch_close(conn); + return (NULL); +} + +static struct url * +http_get_proxy(struct url * url, const char *flags) +{ + struct url *purl; + char *p; + + if (flags != NULL && strchr(flags, 'd') != NULL) + return (NULL); + if (fetch_no_proxy_match(url->host)) + return (NULL); + if (((p = getenv("HTTP_PROXY")) || (p = getenv("http_proxy"))) && + *p && (purl = fetchParseURL(p))) { + if (!*purl->scheme) + strcpy(purl->scheme, SCHEME_HTTP); + if (!purl->port) + purl->port = fetch_default_proxy_port(purl->scheme); + if (strcasecmp(purl->scheme, SCHEME_HTTP) == 0) + return (purl); + fetchFreeURL(purl); + } + return (NULL); +} + +static void +set_if_modified_since(conn_t *conn, time_t last_modified) +{ + static const char weekdays[] = "SunMonTueWedThuFriSat"; + static const char months[] = "JanFebMarAprMayJunJulAugSepOctNovDec"; + struct tm tm; + char buf[80]; + gmtime_r(&last_modified, &tm); + snprintf(buf, sizeof(buf), "%.3s, %02d %.3s %4ld %02d:%02d:%02d GMT", + weekdays + tm.tm_wday * 3, tm.tm_mday, months + tm.tm_mon * 3, + (long)(tm.tm_year + 1900), tm.tm_hour, tm.tm_min, tm.tm_sec); + http_cmd(conn, "If-Modified-Since: %s\r\n", buf); +} + + +/***************************************************************************** + * Core + */ + +/* + * Send a request and process the reply + * + * XXX This function is way too long, the do..while loop should be split + * XXX off into a separate function. + */ +fetchIO * +http_request(struct url *URL, const char *op, struct url_stat *us, + struct url *purl, const char *flags) +{ + conn_t *conn; + struct url *url, *new; + int chunked, direct, if_modified_since, need_auth, noredirect; + int keep_alive, verbose, cached; + int e, i, n, val; + off_t offset, clength, length, size; + time_t mtime; + const char *p; + fetchIO *f; + hdr_t h; + char hbuf[URL_HOSTLEN + 7], *host; + + direct = CHECK_FLAG('d'); + noredirect = CHECK_FLAG('A'); + verbose = CHECK_FLAG('v'); + if_modified_since = CHECK_FLAG('i'); + keep_alive = 0; + + if (direct && purl) { + fetchFreeURL(purl); + purl = NULL; + } + + /* try the provided URL first */ + url = URL; + + /* if the A flag is set, we only get one try */ + n = noredirect ? 1 : MAX_REDIRECT; + i = 0; + + e = HTTP_PROTOCOL_ERROR; + need_auth = 0; + do { + new = NULL; + chunked = 0; + offset = 0; + clength = -1; + length = -1; + size = -1; + mtime = 0; + + /* check port */ + if (!url->port) + url->port = fetch_default_port(url->scheme); + + /* were we redirected to an FTP URL? */ + if (purl == NULL && strcmp(url->scheme, SCHEME_FTP) == 0) { + if (strcmp(op, "GET") == 0) + return (ftp_request(url, "RETR", NULL, us, purl, flags)); + else if (strcmp(op, "HEAD") == 0) + return (ftp_request(url, "STAT", NULL, us, purl, flags)); + } + + /* connect to server or proxy */ + if ((conn = http_connect(url, purl, flags, &cached)) == NULL) + goto ouch; + + host = url->host; +#ifdef INET6 + if (strchr(url->host, ':')) { + snprintf(hbuf, sizeof(hbuf), "[%s]", url->host); + host = hbuf; + } +#endif + if (url->port != fetch_default_port(url->scheme)) { + if (host != hbuf) { + strcpy(hbuf, host); + host = hbuf; + } + snprintf(hbuf + strlen(hbuf), + sizeof(hbuf) - strlen(hbuf), ":%d", url->port); + } + + /* send request */ + if (verbose) + fetch_info("requesting %s://%s%s", + url->scheme, host, url->doc); + if (purl && strcasecmp(URL->scheme, SCHEME_HTTPS) != 0) { + http_cmd(conn, "%s %s://%s%s HTTP/1.1\r\n", + op, url->scheme, host, url->doc); + } else { + http_cmd(conn, "%s %s HTTP/1.1\r\n", + op, url->doc); + } + + if (if_modified_since && url->last_modified > 0) + set_if_modified_since(conn, url->last_modified); + + /* virtual host */ + http_cmd(conn, "Host: %s\r\n", host); + + /* proxy authorization */ + if (purl) { + if (*purl->user || *purl->pwd) + http_basic_auth(conn, "Proxy-Authorization", + purl->user, purl->pwd); + else if ((p = getenv("HTTP_PROXY_AUTH")) != NULL && *p != '\0') + http_authorize(conn, "Proxy-Authorization", p); + } + + /* server authorization */ + if (need_auth || *url->user || *url->pwd) { + if (*url->user || *url->pwd) + http_basic_auth(conn, "Authorization", url->user, url->pwd); + else if ((p = getenv("HTTP_AUTH")) != NULL && *p != '\0') + http_authorize(conn, "Authorization", p); + else if (fetchAuthMethod && fetchAuthMethod(url) == 0) { + http_basic_auth(conn, "Authorization", url->user, url->pwd); + } else { + http_seterr(HTTP_NEED_AUTH); + goto ouch; + } + } + + /* other headers */ + if ((p = getenv("HTTP_REFERER")) != NULL && *p != '\0') { + if (strcasecmp(p, "auto") == 0) + http_cmd(conn, "Referer: %s://%s%s\r\n", + url->scheme, host, url->doc); + else + http_cmd(conn, "Referer: %s\r\n", p); + } + if ((p = getenv("HTTP_USER_AGENT")) != NULL && *p != '\0') + http_cmd(conn, "User-Agent: %s\r\n", p); + else + http_cmd(conn, "User-Agent: %s\r\n", _LIBFETCH_VER); + if (url->offset > 0) + http_cmd(conn, "Range: bytes=%lld-\r\n", (long long)url->offset); + http_cmd(conn, "\r\n"); + + /* + * Force the queued request to be dispatched. Normally, one + * would do this with shutdown(2) but squid proxies can be + * configured to disallow such half-closed connections. To + * be compatible with such configurations, fiddle with socket + * options to force the pending data to be written. + */ +#if defined(TCP_NOPUSH) && !defined(__APPLE__) + val = 0; + setsockopt(conn->sd, IPPROTO_TCP, TCP_NOPUSH, &val, + sizeof(val)); +#endif + val = 1; + setsockopt(conn->sd, IPPROTO_TCP, TCP_NODELAY, &val, + sizeof(val)); + + /* get reply */ + switch (http_get_reply(conn)) { + case HTTP_OK: + case HTTP_PARTIAL: + case HTTP_NOT_MODIFIED: + /* fine */ + break; + case HTTP_MOVED_PERM: + case HTTP_MOVED_TEMP: + case HTTP_SEE_OTHER: + /* + * Not so fine, but we still have to read the + * headers to get the new location. + */ + break; + case HTTP_NEED_AUTH: + if (need_auth) { + /* + * We already sent out authorization code, + * so there's nothing more we can do. + */ + http_seterr(conn->err); + goto ouch; + } + /* try again, but send the password this time */ + if (verbose) + fetch_info("server requires authorization"); + break; + case HTTP_NEED_PROXY_AUTH: + /* + * If we're talking to a proxy, we already sent + * our proxy authorization code, so there's + * nothing more we can do. + */ + http_seterr(conn->err); + goto ouch; + case HTTP_BAD_RANGE: + /* + * This can happen if we ask for 0 bytes because + * we already have the whole file. Consider this + * a success for now, and check sizes later. + */ + break; + case HTTP_PROTOCOL_ERROR: + /* fall through */ + case -1: + --i; + if (cached) + continue; + fetch_syserr(); + goto ouch; + default: + http_seterr(conn->err); + if (!verbose) + goto ouch; + /* fall through so we can get the full error message */ + } + + /* get headers */ + do { + switch ((h = http_next_header(conn, &p))) { + case hdr_syserror: + fetch_syserr(); + goto ouch; + case hdr_error: + http_seterr(HTTP_PROTOCOL_ERROR); + goto ouch; + case hdr_connection: + /* XXX too weak? */ + keep_alive = (strcasecmp(p, "keep-alive") == 0); + break; + case hdr_content_length: + http_parse_length(p, &clength); + break; + case hdr_content_range: + http_parse_range(p, &offset, &length, &size); + break; + case hdr_last_modified: + http_parse_mtime(p, &mtime); + break; + case hdr_location: + if (!HTTP_REDIRECT(conn->err)) + break; + if (new) + free(new); + if (verbose) + fetch_info("%d redirect to %s", conn->err, p); + if (*p == '/') + /* absolute path */ + new = fetchMakeURL(url->scheme, url->host, url->port, p, + url->user, url->pwd); + else + new = fetchParseURL(p); + if (new == NULL) { + /* XXX should set an error code */ + goto ouch; + } + if (!*new->user && !*new->pwd) { + strcpy(new->user, url->user); + strcpy(new->pwd, url->pwd); + } + new->offset = url->offset; + new->length = url->length; + break; + case hdr_transfer_encoding: + /* XXX weak test*/ + chunked = (strcasecmp(p, "chunked") == 0); + break; + case hdr_www_authenticate: + if (conn->err != HTTP_NEED_AUTH) + break; + /* if we were smarter, we'd check the method and realm */ + break; + case hdr_end: + /* fall through */ + case hdr_unknown: + /* ignore */ + break; + } + } while (h > hdr_end); + + /* we need to provide authentication */ + if (conn->err == HTTP_NEED_AUTH) { + e = conn->err; + need_auth = 1; + fetch_close(conn); + conn = NULL; + continue; + } + + /* requested range not satisfiable */ + if (conn->err == HTTP_BAD_RANGE) { + if (url->offset == size && url->length == 0) { + /* asked for 0 bytes; fake it */ + offset = url->offset; + conn->err = HTTP_OK; + break; + } else { + http_seterr(conn->err); + goto ouch; + } + } + + /* we have a hit or an error */ + if (conn->err == HTTP_OK || + conn->err == HTTP_PARTIAL || + conn->err == HTTP_NOT_MODIFIED || + HTTP_ERROR(conn->err)) + break; + + /* all other cases: we got a redirect */ + e = conn->err; + need_auth = 0; + fetch_close(conn); + conn = NULL; + if (!new) + break; + if (url != URL) + fetchFreeURL(url); + url = new; + } while (++i < n); + + /* we failed, or ran out of retries */ + if (conn == NULL) { + http_seterr(e); + goto ouch; + } + + /* check for inconsistencies */ + if (clength != -1 && length != -1 && clength != length) { + http_seterr(HTTP_PROTOCOL_ERROR); + goto ouch; + } + if (clength == -1) + clength = length; + if (clength != -1) + length = offset + clength; + if (length != -1 && size != -1 && length != size) { + http_seterr(HTTP_PROTOCOL_ERROR); + goto ouch; + } + if (size == -1) + size = length; + + /* fill in stats */ + if (us) { + us->size = size; + us->atime = us->mtime = mtime; + } + + /* too far? */ + if (URL->offset > 0 && offset > URL->offset) { + http_seterr(HTTP_PROTOCOL_ERROR); + goto ouch; + } + + /* report back real offset and size */ + URL->offset = offset; + URL->length = clength; + + if (clength == -1 && !chunked) + keep_alive = 0; + + if (conn->err == HTTP_NOT_MODIFIED) { + http_seterr(HTTP_NOT_MODIFIED); + if (keep_alive) { + fetch_cache_put(conn, fetch_close); + conn = NULL; + } + goto ouch; + } + + /* wrap it up in a fetchIO */ + if ((f = http_funopen(conn, chunked, keep_alive, clength)) == NULL) { + fetch_syserr(); + goto ouch; + } + + if (url != URL) + fetchFreeURL(url); + if (purl) + fetchFreeURL(purl); + + if (HTTP_ERROR(conn->err)) { + + if (keep_alive) { + char buf[512]; + do { + } while (fetchIO_read(f, buf, sizeof(buf)) > 0); + } + + fetchIO_close(f); + f = NULL; + } + + return (f); + +ouch: + if (url != URL) + fetchFreeURL(url); + if (purl) + fetchFreeURL(purl); + if (conn != NULL) + fetch_close(conn); + return (NULL); +} + + +/***************************************************************************** + * Entry points + */ + +/* + * Retrieve and stat a file by HTTP + */ +fetchIO * +fetchXGetHTTP(struct url *URL, struct url_stat *us, const char *flags) +{ + return (http_request(URL, "GET", us, http_get_proxy(URL, flags), flags)); +} + +/* + * Retrieve a file by HTTP + */ +fetchIO * +fetchGetHTTP(struct url *URL, const char *flags) +{ + return (fetchXGetHTTP(URL, NULL, flags)); +} + +/* + * Store a file by HTTP + */ +fetchIO * +fetchPutHTTP(struct url *URL, const char *flags) +{ + fprintf(stderr, "fetchPutHTTP(): not implemented\n"); + return (NULL); +} + +/* + * Get an HTTP document's metadata + */ +int +fetchStatHTTP(struct url *URL, struct url_stat *us, const char *flags) +{ + fetchIO *f; + + f = http_request(URL, "HEAD", us, http_get_proxy(URL, flags), flags); + if (f == NULL) + return (-1); + fetchIO_close(f); + return (0); +} + +enum http_states { + ST_NONE, + ST_LT, + ST_LTA, + ST_TAGA, + ST_H, + ST_R, + ST_E, + ST_F, + ST_HREF, + ST_HREFQ, + ST_TAG, + ST_TAGAX, + ST_TAGAQ +}; + +struct index_parser { + struct url_list *ue; + struct url *url; + enum http_states state; +}; + +static ssize_t +parse_index(struct index_parser *parser, const char *buf, size_t len) +{ + char *end_attr, p = *buf; + + switch (parser->state) { + case ST_NONE: + /* Plain text, not in markup */ + if (p == '<') + parser->state = ST_LT; + return 1; + case ST_LT: + /* In tag -- "<" already found */ + if (p == '>') + parser->state = ST_NONE; + else if (p == 'a' || p == 'A') + parser->state = ST_LTA; + else if (!isspace((unsigned char)p)) + parser->state = ST_TAG; + return 1; + case ST_LTA: + /* In tag -- "<a" already found */ + if (p == '>') + parser->state = ST_NONE; + else if (p == '"') + parser->state = ST_TAGAQ; + else if (isspace((unsigned char)p)) + parser->state = ST_TAGA; + else + parser->state = ST_TAG; + return 1; + case ST_TAG: + /* In tag, but not "<a" -- disregard */ + if (p == '>') + parser->state = ST_NONE; + return 1; + case ST_TAGA: + /* In a-tag -- "<a " already found */ + if (p == '>') + parser->state = ST_NONE; + else if (p == '"') + parser->state = ST_TAGAQ; + else if (p == 'h' || p == 'H') + parser->state = ST_H; + else if (!isspace((unsigned char)p)) + parser->state = ST_TAGAX; + return 1; + case ST_TAGAX: + /* In unknown keyword in a-tag */ + if (p == '>') + parser->state = ST_NONE; + else if (p == '"') + parser->state = ST_TAGAQ; + else if (isspace((unsigned char)p)) + parser->state = ST_TAGA; + return 1; + case ST_TAGAQ: + /* In a-tag, unknown argument for keys. */ + if (p == '>') + parser->state = ST_NONE; + else if (p == '"') + parser->state = ST_TAGA; + return 1; + case ST_H: + /* In a-tag -- "<a h" already found */ + if (p == '>') + parser->state = ST_NONE; + else if (p == '"') + parser->state = ST_TAGAQ; + else if (p == 'r' || p == 'R') + parser->state = ST_R; + else if (isspace((unsigned char)p)) + parser->state = ST_TAGA; + else + parser->state = ST_TAGAX; + return 1; + case ST_R: + /* In a-tag -- "<a hr" already found */ + if (p == '>') + parser->state = ST_NONE; + else if (p == '"') + parser->state = ST_TAGAQ; + else if (p == 'e' || p == 'E') + parser->state = ST_E; + else if (isspace((unsigned char)p)) + parser->state = ST_TAGA; + else + parser->state = ST_TAGAX; + return 1; + case ST_E: + /* In a-tag -- "<a hre" already found */ + if (p == '>') + parser->state = ST_NONE; + else if (p == '"') + parser->state = ST_TAGAQ; + else if (p == 'f' || p == 'F') + parser->state = ST_F; + else if (isspace((unsigned char)p)) + parser->state = ST_TAGA; + else + parser->state = ST_TAGAX; + return 1; + case ST_F: + /* In a-tag -- "<a href" already found */ + if (p == '>') + parser->state = ST_NONE; + else if (p == '"') + parser->state = ST_TAGAQ; + else if (p == '=') + parser->state = ST_HREF; + else if (!isspace((unsigned char)p)) + parser->state = ST_TAGAX; + return 1; + case ST_HREF: + /* In a-tag -- "<a href=" already found */ + if (p == '>') + parser->state = ST_NONE; + else if (p == '"') + parser->state = ST_HREFQ; + else if (!isspace((unsigned char)p)) + parser->state = ST_TAGA; + return 1; + case ST_HREFQ: + /* In href of the a-tag */ + end_attr = memchr(buf, '"', len); + if (end_attr == NULL) + return 0; + *end_attr = '\0'; + parser->state = ST_TAGA; + if (fetch_add_entry(parser->ue, parser->url, buf, 1)) + return -1; + return end_attr + 1 - buf; + } + /* NOTREACHED */ + abort(); +} + +struct http_index_cache { + struct http_index_cache *next; + struct url *location; + struct url_list ue; +}; + +static struct http_index_cache *index_cache; + +/* + * List a directory + */ +int +fetchListHTTP(struct url_list *ue, struct url *url, const char *pattern, const char *flags) +{ + fetchIO *f; + char buf[2 * PATH_MAX]; + size_t buf_len, sum_processed; + ssize_t read_len, processed; + struct index_parser state; + struct http_index_cache *cache = NULL; + int do_cache, ret; + + do_cache = CHECK_FLAG('c'); + + if (do_cache) { + for (cache = index_cache; cache != NULL; cache = cache->next) { + if (strcmp(cache->location->scheme, url->scheme)) + continue; + if (strcmp(cache->location->user, url->user)) + continue; + if (strcmp(cache->location->pwd, url->pwd)) + continue; + if (strcmp(cache->location->host, url->host)) + continue; + if (cache->location->port != url->port) + continue; + if (strcmp(cache->location->doc, url->doc)) + continue; + return fetchAppendURLList(ue, &cache->ue); + } + + cache = malloc(sizeof(*cache)); + fetchInitURLList(&cache->ue); + cache->location = fetchCopyURL(url); + } + + f = fetchGetHTTP(url, flags); + if (f == NULL) { + if (do_cache) { + fetchFreeURLList(&cache->ue); + fetchFreeURL(cache->location); + free(cache); + } + return -1; + } + + state.url = url; + state.state = ST_NONE; + if (do_cache) { + state.ue = &cache->ue; + } else { + state.ue = ue; + } + + buf_len = 0; + + while ((read_len = fetchIO_read(f, buf + buf_len, sizeof(buf) - buf_len)) > 0) { + buf_len += read_len; + sum_processed = 0; + do { + processed = parse_index(&state, buf + sum_processed, buf_len); + if (processed == -1) + break; + buf_len -= processed; + sum_processed += processed; + } while (processed != 0 && buf_len > 0); + if (processed == -1) { + read_len = -1; + break; + } + memmove(buf, buf + sum_processed, buf_len); + } + + fetchIO_close(f); + + ret = read_len < 0 ? -1 : 0; + + if (do_cache) { + if (ret == 0) { + cache->next = index_cache; + index_cache = cache; + } + + if (fetchAppendURLList(ue, &cache->ue)) + ret = -1; + } + + return ret; +} diff --git a/libfetch/http.errors b/libfetch/http.errors new file mode 100644 index 0000000..004aac2 --- /dev/null +++ b/libfetch/http.errors @@ -0,0 +1,46 @@ +# $FreeBSD: http.errors,v 1.5 2001/05/23 18:52:02 des Exp $ +# $NetBSD: http.errors,v 1.3 2009/02/05 16:59:45 joerg Exp $ +# +# This list is taken from RFC 2068. +# +100 OK Continue +101 OK Switching Protocols +200 OK OK +201 OK Created +202 OK Accepted +203 INFO Non-Authoritative Information +204 OK No Content +205 OK Reset Content +206 OK Partial Content +300 MOVED Multiple Choices +301 MOVED Moved Permanently +302 MOVED Moved Temporarily +303 MOVED See Other +304 UNCHANGED Not Modified +305 INFO Use Proxy +307 MOVED Temporary Redirect +400 PROTO Bad Request +401 AUTH Unauthorized +402 AUTH Payment Required +403 AUTH Forbidden +404 UNAVAIL Not Found +405 PROTO Method Not Allowed +406 PROTO Not Acceptable +407 AUTH Proxy Authentication Required +408 TIMEOUT Request Time-out +409 EXISTS Conflict +410 UNAVAIL Gone +411 PROTO Length Required +412 SERVER Precondition Failed +413 PROTO Request Entity Too Large +414 PROTO Request-URI Too Large +415 PROTO Unsupported Media Type +416 UNAVAIL Requested Range Not Satisfiable +417 SERVER Expectation Failed +500 SERVER Internal Server Error +501 PROTO Not Implemented +502 SERVER Bad Gateway +503 TEMP Service Unavailable +504 TIMEOUT Gateway Time-out +505 PROTO HTTP Version not supported +999 PROTO Protocol error |