From: Justin Wind Date: Thu, 4 Sep 2014 22:28:30 +0000 (-0700) Subject: initial overhaul of resolver X-Git-Url: https://git.squeep.com/?a=commitdiff_plain;h=a92970088391362908dfaf949dae799b1525d97e;p=fb-resolver initial overhaul of resolver --- diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2394bf2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +*.o +.depend +lru_cache_test +resolver + diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..c479c1b --- /dev/null +++ b/Makefile @@ -0,0 +1,26 @@ +CFLAGS += -g -Wall -Wextra +CPPFLAGS += +LDFLAGS += -lpthread + +MAKEDEPEND = $(CC) -MM + +TARGETS = resolver +TESTS = lru_cache_test +SOURCES = lru_cache.c lru_cache_test.c resolver.c +OBJECTS = $(SOURCES:.c=.o) + +.phony: all test clean + +all: $(TARGETS) .depend + +.depend: $(SOURCES) + $(MAKEDEPEND) $(CPPFLAGS) $^ > $@ + +clean: + @rm -rf $(TARGETS) $(TESTS) $(OBJECTS) *.dSYM .depend core + +test: $(TESTS) + +resolver: %: %.o lru_cache.o + +-include .depend diff --git a/lru_cache.c b/lru_cache.c new file mode 100644 index 0000000..65b7e6f --- /dev/null +++ b/lru_cache.c @@ -0,0 +1,186 @@ +/* + LRU Cache + + A simple base struct for maintaining a least-recently-used cache of + entries. + + Entries are pushed onto the front of a deque, overflows are popped + off the back. A hash table indexes the entries for lookup. + + Use a lru_entry_t as the first member of the struct you want to + cache, and swaddle these routines with casts. + +*/ +#include "lru_cache.h" + +#include +#include + +static inline unsigned long hash_(struct lru_cache_entry *, lru_hash_feed_fn *); + + +/* lru_cache_new + Allocate and initialize a new LRU Cache. + + hash_sz - size of the hash table (this ought to be prime) + capacity - retain only this many entries in queue (0 for unlimited) + feed - function which provides a buffer for an entry, to generate hash from + cmp - function for comparing two entries +*/ +struct lru_cache * +lru_cache_new(size_t hash_sz, + size_t capacity, + lru_hash_feed_fn *feed, + lru_entry_cmp_fn *cmp) +{ + struct lru_cache *c; + size_t sz = sizeof *c + (hash_sz - 1) * sizeof *c->hash; + /* hash_sz - 1 because of old-style flexible array */ + + c = malloc(sz); + if (c == NULL) + return NULL; + + *((size_t *)&(c->hash_sz)) = hash_sz; + *((size_t *)&(c->capacity)) = capacity; + + c->feed_fn = feed; + c->cmp_fn = cmp; + + c->num_entries = 0; + c->newest = NULL; + c->oldest = NULL; + + memset(c->hash, 0, hash_sz * sizeof *c->hash); + + return c; +} + + +/* lru_cache_insert + Insert an entry into the front of the queue. + + cache - the cache to use + e - the entry to insert + overflow - will be set to the entry which was removed if cache is at capacity +*/ +void +lru_cache_insert(struct lru_cache *cache, struct lru_cache_entry *e, struct lru_cache_entry **overflow) +{ + *overflow = NULL; + if (cache->capacity && cache->num_entries == cache->capacity) { + *overflow = cache->oldest; + lru_cache_extract(cache, *overflow); + } + + e->hash_slot = hash_(e, cache->feed_fn) % cache->hash_sz; + + e->hash_next = cache->hash[e->hash_slot].entry; + cache->hash[e->hash_slot].entry = e; + cache->hash[e->hash_slot].tally++; + + e->prev = NULL; + e->next = cache->newest; + if (cache->newest) + cache->newest->prev = e; + else + cache->oldest = e; + cache->newest = e; + + cache->num_entries++; +} + + +/* lru_cache_locate + Return the first entry for which cache->cmp_fn equates to the provided + entry to match. +*/ +struct lru_cache_entry * +lru_cache_locate(struct lru_cache *cache, struct lru_cache_entry *match) +{ + struct lru_cache_entry *e; + unsigned long hash; + + hash = hash_(match, cache->feed_fn) % cache->hash_sz; + + for (e = cache->hash[hash].entry; e; e = e->hash_next) + if (cache->cmp_fn(match, e) == 0) + break; + + return e; +} + + +/* lru_cache_extract + Remove an entry from the cache. +*/ +void +lru_cache_extract(struct lru_cache *cache, struct lru_cache_entry *e) +{ + /* remove from hash list */ + if (cache->hash[e->hash_slot].entry == e) { + cache->hash[e->hash_slot].entry = e->hash_next; + } else { + struct lru_cache_entry *hp; + + for (hp = cache->hash[e->hash_slot].entry; hp; hp = hp->hash_next) + if (hp->hash_next == e) { + hp->hash_next = e->hash_next; + break; + } + + if (hp) { + hp->hash_next = e->hash_next; + } + } + cache->hash[e->hash_slot].tally--; + + /* remove from queue */ + if (cache->newest == e) + cache->newest = e->next; + if (cache->oldest == e) + cache->oldest = e->prev; + if (e->prev) + e->prev->next = e->next; + if (e->next) + e->next->prev = e->prev; + + cache->num_entries--; +} + + +/* lru_cache_foreach + Calls fn(entry, index, data) for each entry in cache, from newest to oldest. +*/ +void +lru_cache_foreach(struct lru_cache *cache, void (*fn)(lru_entry_t *, size_t, void *), void *data) +{ + struct lru_cache_entry *e; + size_t i; + + for (i = 0, e = cache->newest; e; e = e->next, i++) + fn(e, i, data); +} + + +/* hash_ + djbx33x + The hash used for lookup table. Is best at lowercase strings, but should + be non-pathological for most inputs. +*/ +static +inline +unsigned long +hash_(struct lru_cache_entry *e, lru_hash_feed_fn *feed_fn) +{ + unsigned long hash = 5381; + char *buf; + size_t len; + + feed_fn(e, &buf, &len); + + while (len--) + hash = ((hash << 5) + hash) ^ *buf++; + + return hash; +} diff --git a/lru_cache.h b/lru_cache.h new file mode 100644 index 0000000..bda2619 --- /dev/null +++ b/lru_cache.h @@ -0,0 +1,50 @@ +#ifndef LRU_CACHE_H +#define LRU_CACHE_H + +#include + +struct lru_cache_entry { + struct lru_cache_entry *next; + struct lru_cache_entry *prev; + struct lru_cache_entry *hash_next; + unsigned long hash_slot; +}; +typedef struct lru_cache_entry lru_entry_t; + + +/* + A function which, given an entry, returns a pointer to a buffer + and a number of bytes to use to generate a hash of the entry. +*/ +typedef void (lru_hash_feed_fn)(lru_entry_t *, char **, size_t *); + +/* + A function which compares two entries for equality. +*/ +typedef int (lru_entry_cmp_fn)(lru_entry_t *, lru_entry_t *); + +struct lru_cache_hashslot_ { + size_t tally; + struct lru_cache_entry *entry; +}; + +struct lru_cache { + const size_t hash_sz; + const size_t capacity; + + lru_hash_feed_fn *feed_fn; + lru_entry_cmp_fn *cmp_fn; + + size_t num_entries; + struct lru_cache_entry *newest; + struct lru_cache_entry *oldest; + struct lru_cache_hashslot_ hash[1]; +}; + +struct lru_cache *lru_cache_new(size_t, size_t, lru_hash_feed_fn *, lru_entry_cmp_fn *); +void lru_cache_insert(struct lru_cache *, lru_entry_t *, lru_entry_t **); +void lru_cache_extract(struct lru_cache *, lru_entry_t *); +lru_entry_t *lru_cache_locate(struct lru_cache *, lru_entry_t *); +void lru_cache_foreach(struct lru_cache *, void (*)(lru_entry_t *, size_t, void *), void *); + +#endif /* LRU_CACHE_H */ diff --git a/lru_cache_test.c b/lru_cache_test.c new file mode 100644 index 0000000..135ec34 --- /dev/null +++ b/lru_cache_test.c @@ -0,0 +1,198 @@ +#include +#include +#include +#include + +/* wrap the whole module */ +#include "lru_cache.c" + +static unsigned int g_verbose_ = 0; + +struct test_kv_cache_entry { + struct lru_cache_entry lru_; + + char key[128]; + char value[128]; +}; +typedef struct test_kv_cache_entry kve_t; + +static +void +test_kv_hash_feed(lru_entry_t *entry, char **bufp, size_t *szp) +{ + struct test_kv_cache_entry *e = (struct test_kv_cache_entry *)entry; + *bufp = e->key; + *szp = sizeof e->key; +} + +static +int +test_kv_cmp(lru_entry_t *a, lru_entry_t *b) +{ + kve_t *kv_a = (kve_t *)a, + *kv_b = (kve_t *)b; + + return strcmp(kv_a->key, kv_b->key); +} + +static +void +kv_dump(struct lru_cache *c) +{ + struct test_kv_cache_entry *kve; + lru_entry_t *e; + size_t i; + + fprintf(stderr, "\nLRU, %zu entries\n", c->num_entries); + + if (g_verbose_ == 0) + return; + + fprintf(stderr, "-- hash --\n"); + for (i = 0; i < c->hash_sz; i++) { + int s; + for (s = 0, e = c->hash[i].entry; e; e = e->hash_next, s++) { + kve = (struct test_kv_cache_entry *)e; + if (e != NULL) { + if (e->hash_slot != i) + fprintf(stderr, "!!! sanity failure, entry has hash slot %zu, but is in slot %zu !!!\n", e->hash_slot, i); + fprintf(stderr, "%*s[%zu]: (%p) '%s':'%s'\n", s, "", i, kve, kve->key, kve->value); + } + } + } + + fprintf(stderr, "-- queue --\n"); + for (e = c->newest; e; e = e->next) { + kve = (struct test_kv_cache_entry *)e; + fprintf(stderr, "(%p) '%s':'%s'\n", kve, kve->key, kve->value); + } + + fprintf(stderr, "\n"); +} + +void +kv_genstats(lru_entry_t *e, size_t i, void *data) +{ + struct test_kv_cache_entry *kve = (struct test_kv_cache_entry *)e; + size_t *hash_depth = (size_t *)data; + (void)i, (void)kve; + + hash_depth[e->hash_slot]++; +} +void +kv_dumpstats(struct lru_cache *c, size_t *stats, size_t sz) +{ + size_t i; + + for (i = 0; i < sz; i++) + fprintf(stderr, "slot %04zu: %zu (expect: %zu)\n", i, stats[i], c->hash[i].tally); +} + +int +main(int argc, char *argv[]) +{ + const size_t hash_sz = 31; + const size_t capacity = 256; + + struct lru_cache *cache; + struct test_kv_cache_entry *e, *r, m; + int i; + + size_t *stats; + + while ( (i = getopt(argc, argv, "vh")) != EOF ) { + switch (i) { + case 'v': + g_verbose_++; + break; + + case 'h': + default: + exit(EXIT_FAILURE); + } + } + + stats = malloc(hash_sz * sizeof *stats); + memset(stats, 0, hash_sz * sizeof *stats); + + cache = lru_cache_new(hash_sz, capacity, test_kv_hash_feed, test_kv_cmp); + assert(cache != NULL); + + kv_dump(cache); + + e = malloc(sizeof *e); + assert(e != NULL); + + strncpy(e->key, "key", sizeof e->key); + strncpy(e->value, "value", sizeof e->value); + + lru_cache_insert(cache, (lru_entry_t *)e, (lru_entry_t **)&r); + assert(r == NULL); + + kv_dump(cache); + + memset(&m, 0, sizeof m); + strncpy(m.key, "key", sizeof e->key); + e = (struct test_kv_cache_entry *)lru_cache_locate(cache, (lru_entry_t *)&m); + assert(e != NULL); + + lru_cache_extract(cache, (lru_entry_t *)e); + free(e); + + kv_dump(cache); + + for (i = 0; i < 500; i++) { + r = NULL; + e = malloc(sizeof *e); + assert(e != NULL); + memset(e, 0, sizeof *e); + snprintf(e->key, sizeof e->key, "test %d key", i); + snprintf(e->value, sizeof e->value, "some %d value", i); + lru_cache_insert(cache, (lru_entry_t *)e, (lru_entry_t **)&r); + if (r) { + if (g_verbose_) + fprintf(stderr, "freeing (%p) '%s':'%s'\n", r, r->key, r->value); + free(r); + } + } + + kv_dump(cache); + + for (i = 0; i < 500; i += 3) { + memset(&m, 0, sizeof m); + snprintf(m.key, sizeof m.key, "test %d key", i); + e = (struct test_kv_cache_entry *)lru_cache_locate(cache, (lru_entry_t *)&m); + if (e == NULL) { + if (g_verbose_) + fprintf(stderr, "key %d was not cached\n", i); + continue; + } + lru_cache_extract(cache, (lru_entry_t *)e); + if (g_verbose_) + fprintf(stderr, "extracted '%s'\n", e->key); + free(e); + } + + kv_dump(cache); + + for (i = 0; i < 10; i += 2) { + e = malloc(sizeof *e); + assert(e != NULL); + memset(e, 0, sizeof *e); + snprintf(e->key, sizeof e->key, "new key %d", i); + snprintf(e->value, sizeof e->value, "new value %d", i); + lru_cache_insert(cache, (lru_entry_t *)e, (lru_entry_t **)&r); + if (r) { + if (g_verbose_) + fprintf(stderr, "freeing (%p) '%s':'%s'\n", r, r->key, r->value); + free(r); + } + } + + kv_dump(cache); + + lru_cache_foreach(cache, kv_genstats, stats); + kv_dumpstats(cache, stats, hash_sz); + + exit(EXIT_SUCCESS); +} diff --git a/resolver.c b/resolver.c new file mode 100644 index 0000000..50907e3 --- /dev/null +++ b/resolver.c @@ -0,0 +1,1263 @@ +#define _REENTRANT + +#if defined(SOLARIS) && !defined(_POSIX_SOURCE) +# define _POSIX_SOURCE +#endif + +#include "copyright.h" +#include "config.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef HAVE_GETRLIMIT +# include +#endif /* HAVE_GETRLIMIT */ + +#include "lru_cache.h" + +#if !defined(O_NONBLOCK) || defined(ULTRIX) +# ifdef FNDELAY /* SunOS */ +# define O_NONBLOCK FNDELAY +# else +# ifdef O_NDELAY /* SysV */ +# define O_NONBLOCK O_NDELAY +# endif /* O_NDELAY */ +# endif /* FNDELAY */ +#endif + +#define NUM_THREADS 5 /* default number of threads to start */ +#define CACHE_CAPACITY 8192 /* default number of addresses to cache */ +#define CACHE_HASH_SZ 1021 /* default number of hash slots, prime recommended */ +#define CACHE_AGE_SUCCESS 60 /* default seconds to maintain a cached success */ +#define CACHE_AGE_FAIL 1800 /* seconds to maintain a cached fail */ +#define BUF_SZ 1024 /* size of input and output buffers */ + +#define IDENT_PORT 113 /* port number of the ident service */ +#define IDENT_TIMEOUT 60 /* number of seconds to wait for an ident connection */ + +#define QUIT_COMMAND "QUIT" /* shut down if this input is encountered */ +#ifdef WITH_RESOLVER_STATS +# define DUMP_COMMAND "DUMP" /* report on internal state */ +#endif + + +static +struct options { + unsigned int verbose; + size_t num_threads; + size_t capacity; + size_t hash_sz; + unsigned int age_success; + unsigned int age_fail; + char *log_filename; +} g_opts_ = { + 0, + NUM_THREADS, + CACHE_CAPACITY, + CACHE_HASH_SZ, + CACHE_AGE_SUCCESS, + CACHE_AGE_FAIL, + NULL +}; + + +/* + N.B. The hostname in a cache entry is a flexible array! + If this is ever updated to c99 style, also update how it's allocated + in cache_add_. +*/ +struct cache_entry { + lru_entry_t lru_; /* must be first */ + + struct sockaddr_storage ss; + time_t timestamp; + int succeeded; + char hostname[1]; +}; + +static pthread_rwlock_t g_cache_rwlock_ = PTHREAD_RWLOCK_INITIALIZER; +static struct lru_cache *g_cache_ = NULL; + +static volatile unsigned int g_want_shutdown_ = 0; + +static FILE *g_log_stream_ = NULL; + +#define LOG(...) do { fprintf(g_log_stream_, __VA_ARGS__); fflush(g_log_stream_); } while (0) +#define LOG_V(...) do { if (g_opts_.verbose > 0) { fprintf(g_log_stream_, __VA_ARGS__); fflush(g_log_stream_); } } while (0) +#define LOG_VV(...) do { if (g_opts_.verbose > 1) { fprintf(g_log_stream_, __VA_ARGS__); fflush(g_log_stream_); } } while (0) +#define LOG_VVV(...) do { if (g_opts_.verbose > 2) { fprintf(g_log_stream_, __VA_ARGS__); fflush(g_log_stream_); } } while (0) + + + +#define USAGE_FLAG_SHOWFULL (1<<0) +static +void +usage_(const char *prog, unsigned int flags) +{ + FILE *f = (flags & USAGE_FLAG_SHOWFULL) ? stdout : stderr; + char *x = strrchr(prog, '/'); + + if (x && *(x + 1)) + prog = x + 1; + + if (flags & USAGE_FLAG_SHOWFULL) + fprintf(f, + "%s -- asynchronous caching hostname and identd lookup tool\n\n" + + "\tReads lines from stdin, in the format of an IP address,\n" + "followed by a remote port number in parenthesis, followed by a local\n" + "port number, followed by a newline.\n\n" + + "\tFor each valid line read, it will attempt to resolve the\n" + "hostname of the IP address, and the identd response from the IP for\n" + "the port pairing.\n\n", + prog); + + fprintf(f, "Usage: %s [-h] [-v] [-a ] [-f ] [-j ] [-c ] [-s ] [-o ]\n", prog); + + if (flags & USAGE_FLAG_SHOWFULL) { + fprintf(f, + "Options:\n" + "\t -h -- this screen\n" + "\t -v -- increase verbosity\n" + "\t -o -- write errors and notices to this file (default is to stderr) (implies -v)\n" + "\t -j -- use this many threads (default: %zu)\n" + "\t -c -- cache up to this many addresses (default: %zu)\n" + "\t -s -- use this many bins for hashing (default: %zu)\n" + "\t -a -- cache successful queries for this many seconds (default: %u)\n" + "\t -f -- cache failed queries for this many seconds (default: %u)\n", + g_opts_.num_threads, + g_opts_.capacity, + g_opts_.hash_sz, + g_opts_.age_success, + g_opts_.age_fail + ); + } + fflush(f); +} + + + +#define PTHREAD_OR_DIE__(__fn__,...) do { \ + int r; \ + r = __fn__(__VA_ARGS__); \ + if (r) { \ + LOG("%s: %s\n", #__fn__, strerror(r)); \ + exit(EX_OSERR); \ + } \ +} while (0) + +#define UNLOCK(__l__) PTHREAD_OR_DIE__(pthread_rwlock_unlock, (__l__)) +#define RDLOCK(__l__) PTHREAD_OR_DIE__(pthread_rwlock_rdlock, (__l__)) +#define WRLOCK(__l__) PTHREAD_OR_DIE__(pthread_rwlock_wrlock, (__l__)) + + + +/* + Access AF-specific fields inside a generic sockaddr_storage struct. +*/ +static +void +ss_addr_fields_(const struct sockaddr_storage *ss, + socklen_t *sockaddr_len, void **vaddr, size_t *addr_len, unsigned short **port) +{ + assert(ss != NULL); + + if (ss->ss_family == AF_INET) { + struct sockaddr_in *sa = (struct sockaddr_in *)ss; + struct in_addr *a = &sa->sin_addr; + + if (vaddr) + *vaddr = &a->s_addr; + if (addr_len) + *addr_len = sizeof a->s_addr; + if (sockaddr_len) + *sockaddr_len = sizeof *sa; + if (port) + *port = &sa->sin_port; + } else if (ss->ss_family == AF_INET6) { + struct sockaddr_in6 *sa = (struct sockaddr_in6 *)ss; + struct in6_addr *a = &sa->sin6_addr; + + if (vaddr) + *vaddr = &a->s6_addr; + if (addr_len) + *addr_len = sizeof a->s6_addr; + if (sockaddr_len) + *sockaddr_len = sizeof *sa; + if (port) + *port = &sa->sin6_port; + } else { + LOG_V("unknown AF %d\n", ss->ss_family); + + if (vaddr) + *vaddr = NULL; + if (addr_len) + *addr_len = 0; + if (sockaddr_len) + *sockaddr_len = 0; + if (port) + *port = NULL; + } +} + + + +/* + this is the lru_entry_cmp_fn + Two entries are equal if their addresses match. +*/ +static +int +cache_entry_cmp_(lru_entry_t *a, lru_entry_t *b) +{ + struct cache_entry *ea = (struct cache_entry *)a, + *eb = (struct cache_entry *)b; + void *va, *vb; + size_t addr_len; + + assert(ea != NULL); + assert(eb != NULL); + + /* Keep things simple: different families are not equal. */ + if (ea->ss.ss_family != eb->ss.ss_family) + return -1; + + ss_addr_fields_(&ea->ss, NULL, &va, &addr_len, NULL); + if (va == NULL) + return -1; + + ss_addr_fields_(&eb->ss, NULL, &vb, NULL, NULL); + if (vb == NULL) + return -1; + + return memcmp(va, vb, addr_len); +} + + + +/* + this is the lru_hash_feed_fn + an entry is hashed by its address +*/ +static +void +cache_entry_hash_feed_(lru_entry_t *e, char **buf, size_t *sz) +{ + struct cache_entry *entry = (struct cache_entry *)e; + void *vaddr; + size_t addr_len; + + assert(entry != NULL); + + /* just hash the address */ + ss_addr_fields_(&entry->ss, NULL, &vaddr, &addr_len, NULL); + *buf = vaddr; + *sz = addr_len; +} + + + +/* + Search the cache for an entry. + If it exists, and has not expired, move it to the front of queue. + If it has expired, expunge. +*/ +static +int +cache_find_(struct sockaddr_storage *ss, char *buf, size_t buf_sz) +{ + struct cache_entry *entry, match; + int write_lock = 0; + + assert(ss != NULL); + + memset(&match, 0, sizeof match); + memcpy(&match.ss, ss, sizeof *ss); + + RDLOCK(&g_cache_rwlock_); + +again: + entry = (struct cache_entry *)lru_cache_locate(g_cache_, (lru_entry_t *)&match); + if (entry == NULL) + goto done; + + /* + If an entry exists in the cache, it will need to be updated, so + exchange the read lock for a write lock, and look it up again on + the off chance another thread got rid of it before we could. + */ + if ( ! write_lock) { + UNLOCK(&g_cache_rwlock_); + WRLOCK(&g_cache_rwlock_); + write_lock = 1; + goto again; + } + + /* Drop the entry if it is too old. */ + if (time(NULL) > entry->timestamp + (time_t)(entry->succeeded ? g_opts_.age_success : g_opts_.age_fail)) { + LOG_VV("expired %s\n", entry->hostname); + lru_cache_extract(g_cache_, (lru_entry_t *)entry); + free(entry); + entry = NULL; + goto done; + } + + /* Peek at the front of the queue, and only reinsert if this entry is not there already. */ + if ((lru_entry_t *)entry != g_cache_->newest) { + struct cache_entry *removed; + lru_cache_extract(g_cache_, (lru_entry_t *)entry); + lru_cache_insert(g_cache_, (lru_entry_t *)entry, (lru_entry_t **)&removed); + /* No need to check removed, the queue can never overflow here. */ + } + + if (buf) { + strncpy(buf, entry->hostname, buf_sz); + if (buf[buf_sz - 1] != '\0') + buf[buf_sz - 1] = '\0'; + } + +done: + UNLOCK(&g_cache_rwlock_); + return (entry != NULL); +} + + + +/* + Update the cache to include the given data. + hostname_len is the strlen of the hostname +*/ +static +void +cache_add_(struct sockaddr_storage *ss, const char *hostname, size_t hostname_len, time_t timestamp, int succeeded) +{ + struct cache_entry *entry, *old, match; + + assert(ss != NULL); + assert(hostname != NULL || hostname_len == 0); + + memset(&match, 0, sizeof match); + memcpy(&match.ss, ss, sizeof *ss); + + /* + When allocating an entry, the size of the struct already includes + the extra byte for the nul at the end of the hostname. + */ + entry = malloc(sizeof *entry + hostname_len); + if (entry == NULL) { + LOG("%s: %s\n", "malloc", strerror(errno)); + return; + } + + entry->timestamp = timestamp; + entry->succeeded = succeeded; + memcpy(&entry->ss, ss, sizeof *ss); + memcpy(&entry->hostname, hostname, hostname_len); + entry->hostname[hostname_len] = '\0'; + + WRLOCK(&g_cache_rwlock_); + + /* assure we're not duplicating entries */ + old = (struct cache_entry *)lru_cache_locate(g_cache_, (lru_entry_t *)entry); + if (old != NULL) { + lru_cache_extract(g_cache_, (lru_entry_t *)old); + free(old); + } + + lru_cache_insert(g_cache_, (lru_entry_t *)entry, (lru_entry_t **)&old); + if (old) + free(old); + + UNLOCK(&g_cache_rwlock_); +} + + + +#ifdef WITH_RESOLVER_STATS + +static +void +cache_entry_dump_(lru_entry_t *e, size_t idx, void *data) +{ + char buf[BUF_SZ]; + struct cache_entry *entry = (struct cache_entry *)e; + time_t *now = (time_t *)data; + int r; + + assert(e != NULL); + assert(data != NULL); + + r = getnameinfo((struct sockaddr *)&(entry->ss), sizeof entry->ss, buf, sizeof buf, NULL, 0, NI_NUMERICHOST); + if (r) { + LOG("%s: %s\n", "getnameinfo", gai_strerror(r)); + snprintf(buf, sizeof buf, "(unknown)"); + } + fprintf(g_log_stream_, "%05zu: %s -> %s [%ld]\n", idx, buf, entry->hostname, *now - entry->timestamp); +} + +static +void +cache_statdump_(void) { + time_t now = time(NULL); + size_t i; + + RDLOCK(&g_cache_rwlock_); + flockfile(g_log_stream_); + + fprintf(g_log_stream_, "-- cache newest to oldest --\n"); + lru_cache_foreach(g_cache_, cache_entry_dump_, &now); + fprintf(g_log_stream_, "----------------------------\n"); + + fprintf(g_log_stream_, "----- hash slot depths -----\n"); + for (i = 0; i < g_cache_->hash_sz; i++) { + if (g_cache_->hash[i].tally) { + fprintf(g_log_stream_, "[%zu]: %zu\n", i, g_cache_->hash[i].tally); + } + } + fprintf(g_log_stream_, "----------------------------\n"); + + funlockfile(g_log_stream_); + UNLOCK(&g_cache_rwlock_); +} + +#endif /* WITH_RESOLVER_STATS */ + + + +/* empty everything */ +void cache_free_(void) +{ + struct cache_entry *entry; + + while ((entry = (struct cache_entry *)g_cache_->newest)) { + lru_cache_extract(g_cache_, g_cache_->newest); + free(entry); + } +} + + + +/* connect_nb_ + connect wrapper, with timeout + deadline is the absolute time after which it will give up +*/ +static +int +connect_nb_(int fd, struct sockaddr *sa, socklen_t sa_len, time_t deadline) +{ + fd_set fds; + struct timeval timeout_tv; + time_t now = time(NULL); + socklen_t r_len; + int r; + + LOG_VVV("connecting fd %d\n", fd); + + /* attempt to connect, expect EINPROGRESS or immediate success */ + while ( (r = connect(fd, sa, sa_len)) ) { + if (errno == EINPROGRESS) + break; + if (errno == EINTR) + continue; + + LOG("%s: %s\n", "connect", strerror(errno)); + return -1; + } + + /* wait for it to become writable, which indicates connect completion */ + do { + FD_ZERO(&fds); + FD_SET(fd, &fds); + timeout_tv.tv_sec = deadline - now; + timeout_tv.tv_usec = 0; + + r = select(fd + 1, NULL, &fds, NULL, &timeout_tv); + if (r < 0) { + LOG("%s: %s\n", "select", strerror(errno)); + if (errno != EINTR && errno != EAGAIN) + return -1; + } + } while (r == 0 + && time(&now) < deadline); + if (now >= deadline) { + errno = ETIMEDOUT; + LOG("%s: %s\n", "connect", strerror(errno)); + return -1; + } + + /* connect completed, check for errors */ + r = -1; + r_len = sizeof r; + if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &r, &r_len)) { + LOG("%s: %s\n", "getsockopt", strerror(errno)); + return -1; + } + if (r) { + errno = r; + LOG("%s: %s\n", "connect", strerror(errno)); + return -1; + } + + return 0; +} + + + +/* write_nb_ + write a buffer to a socket, with timeout +*/ +static +ssize_t +write_nb_(int fd, const char *buf, size_t buf_len, time_t deadline) +{ + fd_set fds; + struct timeval timeout_tv; + time_t now = time(NULL); + ssize_t w_len, total; + int r; + + total = 0; + do { + LOG_VVV("writing '%*s' to fd %d\n", (int)buf_len, buf, fd); + + FD_ZERO(&fds); + FD_SET(fd, &fds); + timeout_tv.tv_sec = deadline - now; + timeout_tv.tv_usec = 0; + + r = select(fd + 1, NULL, &fds, NULL, &timeout_tv); + if (r < 0) { + LOG("%s: %s\n", "select", strerror(errno)); + if (errno == EINTR || errno == EAGAIN) + continue; + return -1; + } + if (r == 0) + continue; + + if (FD_ISSET(fd, &fds)) { + w_len = write(fd, buf, buf_len); + if (w_len < 0) { + LOG("%s: %s\n", "write", strerror(errno)); + if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR) + continue; + return -1; + } + LOG_VVV("wrote %zd bytes (of %zu) to fd %d\n", w_len, buf_len, fd); + + total += w_len; + buf += w_len; + buf_len -= w_len; + } + } while (buf_len > 0 + && time(&now) < deadline); + if (now >= deadline) { + errno = ETIMEDOUT; + LOG("%s: %s\n", "write", strerror(errno)); + return -1; + } + + LOG_VVV("fd %d wrote %zd bytes\n", fd, total); + + return total; +} + + + +/* read_line_nb_ + reads at least a line, with a timeout + sets *line_end to the first '\n' or '\r' encountered +*/ +static +ssize_t +read_line_nb_(int fd, char *buf, size_t buf_len, char **line_end, time_t deadline) +{ + fd_set fds; + struct timeval timeout_tv; + time_t now = time(NULL); + ssize_t r_len, total; + int r; + + *line_end = NULL; + total = 0; + do { + FD_ZERO(&fds); + FD_SET(fd, &fds); + timeout_tv.tv_sec = deadline - now; + timeout_tv.tv_usec = 0; + + r = select(fd + 1, &fds, NULL, NULL, &timeout_tv); + if (r < 0) { + LOG("%s: %s\n", "select", strerror(errno)); + if (errno == EINTR || errno == EAGAIN) + continue; + return -1; + } + if (r == 0) + continue; + + if (FD_ISSET(fd, &fds)) { + r_len = read(fd, buf, buf_len); + if (r_len < 0) { + LOG("%s: %s\n", "read", strerror(errno)); + if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR) + continue; + return -1; + } + if (r_len == 0) { + LOG_VVV("fd %d EOF\n", fd); + break; + } + + LOG_VVV("read %zu bytes from fd %d\n", r_len, fd); + + total += r_len; + while (r_len--) { + if (*buf == '\r' || *buf == '\n') { + LOG_VVV("fd %d line-length %zu\n", fd, total - r_len); + *line_end = buf; + break; + } + buf++; + buf_len--; + } + } + } while (*line_end == NULL + && buf_len > 0 + && time(&now) < deadline); + if (now >= deadline) { + errno = ETIMEDOUT; + LOG("%s: %s\n", "read", strerror(errno)); + return -1; + } + + return total; +} + + + +/* + remove trim characters from start and end of a token +*/ +static +char * +strtok_trim_(char *str, const char *sep, const char *trim, char **endp) +{ + char *tok, *ep; + + tok = strtok_r(str, sep, endp); + if (tok == NULL) + return NULL; + + if (trim) { + while (*tok && strchr(trim, *tok)) { + tok++; + } + + ep = *endp - 1; + while (ep > tok && strchr(trim, *ep)) { + *ep = '\0'; + ep--; + } + } + + return tok; +} + + + +/* + destructively parse response for validity, leaves username in buffer on success + + an ident response is like: + ' port , port : USERID : os : userid' + or + ' port , port : ERROR : error' +*/ +static +int +ident_response_parse_(char *buf, unsigned short port_remote, unsigned short port_local) +{ + char *tok, *last; + int port; + + /* first, ensure response prefix matches request */ + + /* remote port */ + if ((tok = strtok_trim_(buf, ",", " \t", &last)) == NULL) + return -1; + if (sscanf(tok, "%d", &port) != 1) + return -1; + if (port != port_remote) + return -1; + + /* local port */ + if ((tok = strtok_trim_(NULL, ":", " \t", &last)) == NULL) + return -1; + if (sscanf(tok, "%d", &port) != 1) + return -1; + if (port != port_local) + return -1; + + /* reply */ + if ((tok = strtok_trim_(NULL, ":", " \t", &last)) == NULL) + return -1; + + LOG_VVV("ident resonse: %s %s\n", tok, last); + + if (strcasecmp(tok, "USERID") != 0) + return -1; + + /* os type, ignored */ + if ((tok = strtok_trim_(NULL, ":", NULL, &last)) == NULL) + return -1; + + LOG_VVV("ident user: %s\n", last); + + /* anything remaining is user id */ + /* move it to the front of the buffer */ + /* strcpy is safe here because we could not have passed in an unterminated string */ + strcpy(buf, last); + + return 0; +} + + + +/* + Query a host's identity service for user information. + This overwrites ss->sin*_port. +*/ +static +int +ident_query_(char *out_buf, size_t out_buf_sz, struct sockaddr_storage *ss, unsigned short port_remote, unsigned short port_local) +{ + int retval = -1; + char req_buf[16]; + char buf[BUF_SZ]; + size_t len; + char *buf_ptr; + int fd; + int r; + time_t now, deadline; + unsigned short *vport; + socklen_t sockaddr_len; + +#ifdef DEBUG_PRETEND_IDENT + size_t delay = lrand48() % IDENT_TIMEOUT; + if (delay < 45) { + delay = delay / 5; + sleep(delay); + } else { + delay = 0; + } + LOG_VVV("pretending ident query took %zu seconds\n", delay); + goto done; +#endif /* DEBUG_PRETEND_IDENT */ + + deadline = time(&now) + IDENT_TIMEOUT; + + fd = socket(ss->ss_family, SOCK_STREAM, 0); + if (fd == -1) { + LOG("%s: %s\n", "socket", strerror(errno)); + goto done; + } + + /* make non-blocking */ + if (fcntl(fd, F_SETFL, O_NONBLOCK) == -1) { + LOG("%s: %s\n", "fcntl", strerror(errno)); + goto close_done; + } + + /* get the sockaddr length, and the port address */ + ss_addr_fields_(ss, &sockaddr_len, NULL, NULL, &vport); + if (vport == NULL) + goto close_done; + + *vport = htons(IDENT_PORT); + + r = connect_nb_(fd, (struct sockaddr *)ss, sockaddr_len, deadline); + if (r) { + LOG_V("%s: %s\n", "nb_connect_", strerror(errno)); + goto close_done; + } + + /* craft and send our meager request */ + len = snprintf(req_buf, sizeof req_buf, "%hu,%hu\r\n", port_remote, port_local); + r = write_nb_(fd, req_buf, len, deadline); + if (r < 0) { + LOG_V("%s: %s\n", "nb_write_", strerror(errno)); + goto close_done; + } + + /* read one full line as a response */ + r = read_line_nb_(fd, buf, sizeof buf, &buf_ptr, deadline); + if (r < 0) { + LOG_V("%s: %s\n", "nb_read_", strerror(errno)); + goto close_done; + } + + if (buf_ptr == NULL) { + LOG_VV("incomplete response\n"); + goto close_done; + } + + *buf_ptr = '\0'; + + if (ident_response_parse_(buf, port_remote, port_local)) + goto close_done; + + /* success */ + strncpy(out_buf, buf, out_buf_sz); + retval = 0; + +close_done: + shutdown(fd, 2); /* be ruthless, discard any remaining data */ + close(fd); + +done: + if (retval) { + /* no success, just echo the port */ + snprintf(out_buf, out_buf_sz, "%d", port_remote); + } + + return retval; +} + + + +/* + Render the name of an address into the supplied buffer. + buf_sz ought to be at least NI_HOSTMAX (1025) +*/ +static +void +host_query_(char *buf, size_t buf_sz, struct sockaddr_storage *ss) +{ + int succeeded = 1; + int r; + + if (cache_find_(ss, buf, buf_sz)) { + LOG_VVV("found '%s' in cache\n", buf); + return; + } + + /* not in cache, try to resolve it */ + r = getnameinfo((struct sockaddr *)ss, sizeof *ss, buf, buf_sz, NULL, 0, NI_NAMEREQD); + if (r) { + if (r != EAI_NONAME) + LOG("%s: %s\n", "getnameinfo", gai_strerror(r)); + + succeeded = 0; + + /* not resolvable, render it numerically */ + r = getnameinfo((struct sockaddr *)ss, sizeof *ss, buf, buf_sz, NULL, 0, NI_NUMERICHOST); + if (r) { + LOG("%s: %s\n", "getnameinfo", gai_strerror(r)); + snprintf(buf, buf_sz, "(unknown)"); + } + LOG_VVV("failed to resolve '%s'\n", buf); + } + + cache_add_(ss, buf, strlen(buf), time(NULL), succeeded); +} + + + +/* + The main processing loop for each thread. +*/ +static +void * +resolve_thread_(void *data) +{ + size_t id = *(size_t *)data; + char buf[BUF_SZ]; + char hostname_buf[2048]; /* seems silly-large, but is just the next power-of-two up from NI_MAXHOST (1025) */ + char username_buf[512]; /* identd user id might be this long */ + char *buf_ptr; + struct sockaddr_storage ss; + void *vaddr; + int port_remote, port_local; + sigset_t sigset_maskall; + int r; + + LOG_VV("<%zu> thread started\n", id); + + sigfillset(&sigset_maskall); + pthread_sigmask(SIG_SETMASK, &sigset_maskall, NULL); + + for (/* */; ! g_want_shutdown_; /* */) { + memset(&ss, 0, sizeof ss); + + /* + Attempt to fetch a line from stdin. + If EOF is encountered, or any error other than EAGAIN, + signal global shutdown and bail. + */ + flockfile(stdin); + if (g_want_shutdown_) { + funlockfile(stdin); + break; + } + + LOG_VVV("<%zu> awaiting request\n", id); + + if (fgets(buf, sizeof buf, stdin) == NULL) { + if (feof_unlocked(stdin)) { + g_want_shutdown_ = 1; + } else if (ferror_unlocked(stdin)) { + if (errno == EINTR) { + clearerr_unlocked(stdin); + funlockfile(stdin); + continue; + } else { + LOG("<%zu> %s: %s\n", id, "fgets", strerror(errno)); + g_want_shutdown_ = 1; + } + } else { + LOG("<%zu> %s: %s\n", id, "fgets", "NULL but no error?"); + g_want_shutdown_ = 1; + } + } + + LOG_VVV("<%zu> '%s'\n", id, buf); + + funlockfile(stdin); + if (g_want_shutdown_) + break; + + /* try again if line is empty */ + if (*buf == '\n' || *buf == '\0') + continue; + + /* explicit exit request */ + if (strncmp(QUIT_COMMAND, buf, strlen(QUIT_COMMAND)) == 0) { + g_want_shutdown_ = 1; + fclose(stdin); + break; + } + +#ifdef WITH_RESOLVER_STATS + if (strncmp(DUMP_COMMAND, buf, strlen(DUMP_COMMAND)) == 0) { + cache_statdump_(); + continue; + } +#endif /* WITH_RESOLVER_STATS */ + + /* locate the ports, and parse them first */ + buf_ptr = strchr(buf, '('); + if (buf_ptr == NULL) { + LOG_V("<%zu> malformed request, no paren: %s", id, buf); /* expect \n in buf */ + continue; + } + + r = sscanf(buf_ptr, "(%d)%d", &port_remote, &port_local); + if (r != 2) { + LOG_V("<%zu> malformed request, bad ports: %s (%d)", id, buf_ptr, r); + continue; + } + + if (port_local < 0 + || port_local > (unsigned short)-1 + || port_remote < 0 + || port_remote > (unsigned short)-1) { + LOG_V("<%zu> port out of range: %s", id, buf); + continue; + } + + /* truncate buffer at ports, and parse into an address */ + *buf_ptr = '\0'; + + /* locate the first delimiter to determine address family */ + buf_ptr = strpbrk(buf, ".:"); + if (buf_ptr == NULL) { + LOG_V("<%zu> could not parse address: %s\n", id, buf); + continue; + } + else if (*buf_ptr == '.') { + /* ipv4 */ + ss.ss_family = AF_INET; + vaddr = &((struct sockaddr_in *)&ss)->sin_addr; + } +#ifdef USE_IPV6 + else if (*buf_ptr == ':') { + /* ipv6 */ + ss.ss_family = AF_INET6; + vaddr = &((struct sockaddr_in6 *)&ss)->sin6_addr; + } +#endif /* USE_IPV6 */ + else { + LOG_V("<%zu> could not parse address: %s\n", id, buf); + continue; + } + + r = inet_pton(ss.ss_family, buf, vaddr); + if (r == -1) { + LOG("%s: %s\n", "inet_pton", strerror(errno)); + continue; + } else if (r == 0) { + LOG_V("<%zu> could not parse address: %s\n", id, buf); + continue; + } + + LOG_VVV("> %s %d %d\n", buf, port_remote, port_local); + + ident_query_(username_buf, sizeof username_buf, &ss, port_remote, port_local); + host_query_(hostname_buf, sizeof hostname_buf, &ss); + + flockfile(stdout); + fprintf(stdout, "%s(%d)|%s(%s)\n", buf, port_remote, hostname_buf, username_buf); + fflush(stdout); + funlockfile(stdout); + + LOG_VVV("<%zu> '%s(%d)|%s(%s)'\n", id, buf, port_remote, hostname_buf, username_buf); + } + + LOG_VV("<%zu> thread exiting\n", id); + return NULL; +} + + + +static +void +set_signal_(int sig, void (*func)(int)) +{ +#ifdef _POSIX_VERSION + struct sigaction act, oact; + + act.sa_handler = func; + sigemptyset(&act.sa_mask); + +# ifdef SA_RESTART + act.sa_flags = SA_RESTART; +# else /* SA_RESTART */ + act.sa_flags = 0; +# endif /* SA_RESTART */ + + if (sigaction(sig, &act, &oact)) { + LOG("%s: %s\n", "sigaction", strerror(errno)); + } +#else /* _POSIX_VERSION */ + if (signal(signo, sighandler) == SIG_ERR) { + LOG("%s: %s\n", "signal", strerror(errno)); + } +#endif /* _POSIX_VERSION */ +} + + + +/* +Convert a base-10 number in str to a size_t within range specified. +Just a helper for processing command-line arguments. +*/ +static +int +str_to_sizet_range_(const char *str, size_t *val, size_t min, size_t max) +{ + char *end; + long long num; + + num = strtoll(str, &end, 10); + if (*str == '\0' || *end != '\0') { + fprintf(stderr, "'%s' is not a valid number\n", str); + return -1; + } + if (num < 0 + || (unsigned long long)num < min + || (unsigned long long)num > max) { + fprintf(stderr, "%lld is not between %zu and %zu\n", num, min, max); + return -1; + } + + *val = (size_t)num; + + return 0; +} + +/* same for unsigned int */ +static +int +str_to_uint_range_(const char *str, unsigned int *val, unsigned int min, unsigned int max) +{ + char *end; + long long num; + + num = strtoll(str, &end, 10); + if (*str == '\0' || *end != '\0') { + fprintf(stderr, "'%s' is not a valid number\n", str); + return -1; + } + if (num < 0 + || (unsigned long long)num < min + || (unsigned long long)num > max) { + fprintf(stderr, "%lld is not between %u and %u\n", num, min, max); + return -1; + } + + *val = (unsigned int)num; + + return 0; +} + +int +main(int argc, char **argv) +{ + struct { + pthread_t pthr; + size_t tid; + } *threads = NULL; + size_t i; + int c; + +#ifdef HAVE_GETRLIMIT + struct rlimit lim; + + /* close anything not stdio */ + getrlimit(RLIMIT_NOFILE, &lim); + for (i = 3; i < lim.rlim_cur; i++) { + if (close(i) == 0) + LOG_VV("cleaned up fd %zu\n", i); + } +#endif /* HAVE_GETRLIMIT */ + +#ifdef DEBUG_PRETEND_IDENT + srand48(getpid()); +#endif + + g_log_stream_ = stderr; + + while ( (c = getopt(argc, argv, "a:c:f:j:s:o:vh")) != EOF ) { + switch (c) { + case 'a': + if (str_to_uint_range_(optarg, &g_opts_.age_success, 0, (unsigned int)-1)) + exit(EX_USAGE); + break; + + case 'c': + if (str_to_sizet_range_(optarg, &g_opts_.capacity, 256, (size_t)-1)) + exit(EX_USAGE); + break; + + case 'f': + if (str_to_uint_range_(optarg, &g_opts_.age_fail, 0, (unsigned int)-1)) + exit(EX_USAGE); + break; + + case 'j': + if (str_to_sizet_range_(optarg, &g_opts_.num_threads, 1, (size_t)-1)) + exit(EX_USAGE); + break; + + case 's': + if (str_to_sizet_range_(optarg, &g_opts_.hash_sz, 1, (size_t)-1)) + exit(EX_USAGE); + break; + + case 'v': + g_opts_.verbose += 1; + break; + + case 'o': + g_opts_.log_filename = optarg; + g_opts_.verbose += 1; + break; + + case 'h': + usage_(argv[0], USAGE_FLAG_SHOWFULL); + exit(EX_OK); + + default: + usage_(argv[0], 0); + exit(EX_USAGE); + } + } + + if (argc - optind != 0) { + usage_(argv[0], 0); + exit(EX_USAGE); + } + + if (g_opts_.log_filename) { + g_log_stream_ = fopen(g_opts_.log_filename, "a+"); + if (g_log_stream_ == NULL) { + fprintf(stderr, "could not open '%s': %s\n", g_opts_.log_filename, strerror(errno)); + exit(EX_OSERR); + } + } + +#ifdef LOG_DEBUG + char lf[] = "/tmp/resolver-debug.XXXXXX"; + int lf_fd = mkstemp(lf); + + if (lf_fd == -1) { + fprintf(stderr, "could not create debug logfile '%s': %s\n", lf, strerror(errno)); + exit(EX_OSERR); + } + g_log_stream_ = fdopen(lf_fd, "a+"); + if (g_log_stream_ == NULL) { + fprintf(stderr, "%s: %s\n", "fdopen", strerror(errno)); + exit(EX_OSERR); + } + g_opts_.verbose = 3; +#endif + + g_cache_ = lru_cache_new(g_opts_.hash_sz, g_opts_.capacity, cache_entry_hash_feed_, cache_entry_cmp_); + if (g_cache_ == NULL) { + exit(EX_OSERR); + } + + threads = calloc(g_opts_.num_threads, sizeof *threads); + if (threads == NULL) { + LOG("%s: %s\n", "calloc", strerror(errno)); + exit(EX_OSERR); + } + + set_signal_(SIGPIPE, SIG_IGN); + set_signal_(SIGHUP, SIG_IGN); + + for (i = 0; i < g_opts_.num_threads; i++) { + threads[i].tid = i; + c = pthread_create(&threads[i].pthr, NULL, resolve_thread_, (void *)&threads[i].tid); + if (c) { + LOG("%s: %s\n", "pthread_create", strerror(c)); + exit(EX_OSERR); + } + } + + for (i = 0; i < g_opts_.num_threads; i++) { + void *retval; + c = pthread_join(threads[i].pthr, &retval); + if (c) { + LOG("%s: %s\n", "pthread_join", strerror(c)); + } + } + + cache_free_(); + free(g_cache_); + free(threads); + + LOG("Resolver exited.\n"); + + fclose(g_log_stream_); + + exit(EX_OK); +}