From: Justin Wind Date: Tue, 26 Feb 2013 22:08:03 +0000 (-0800) Subject: General cleanups, and argument validations. X-Git-Url: https://git.squeep.com/?a=commitdiff_plain;h=c09f96c65c1d62b2f21bf592e5927c3e07e2906b;p=reservoir_sample General cleanups, and argument validations. Unified textual style in comments. Fixed wrong option identifier in usage_(). Added _del() to match _new() call in module interfaces. Numeric command-line arguments now do basic error-checking, but still do no sane range-checking yet. Added simple statistical test to 'make check' target. --- diff --git a/Makefile b/Makefile index 405e033..b335a02 100644 --- a/Makefile +++ b/Makefile @@ -39,6 +39,7 @@ check: test echo Checking $${t};\ ./$${t} > /dev/null || echo -- $${t} FAILED --;\ done + $(TEST_DIR)/validate-statistics.sh $(TEST_DIR)/%_test.o: %.c $(CC) $(CFLAGS) $(CPPFLAGS) -DTEST -c -o $@ $< diff --git a/buf.c b/buf.c index 7a94079..4465ff2 100644 --- a/buf.c +++ b/buf.c @@ -7,9 +7,14 @@ #include "notify.h" #include "test_suite.h" +/* If these ever need to be overridden... */ +void *(*buf_malloc_)(size_t) = malloc; +void *(*buf_realloc_)(void *, size_t) = realloc; +void (*buf_free_)(void *) = free; + inline buf_t buf_new(size_t sz) { - buf_t buf = malloc(sz + sizeof *buf); + buf_t buf = buf_malloc_(sz + sizeof *buf); if (buf) { buf->buf_sz = sz; buf->buf_start = buf->buf_used = 0; @@ -18,6 +23,16 @@ buf_t buf_new(size_t sz) { return buf; } +inline +void buf_del(buf_t *pbuf) { + if (pbuf) { + if (*pbuf) { + buf_free_(*pbuf); + *pbuf = NULL; + } + } +} + inline void buf_rebase(buf_t buf) { if (buf->buf_start == 0) @@ -46,7 +61,7 @@ int buf_makeroom(buf_t *pbuf, size_t roomfor) { return 0; new_sz = (*pbuf)->buf_used + roomfor; - tmp_ptr = realloc(*pbuf, new_sz + sizeof **pbuf); + tmp_ptr = buf_realloc_(*pbuf, new_sz + sizeof **pbuf); if (tmp_ptr == NULL) { NOTIFY_ERROR("%s:%s", "realloc", strerror(errno)); return -1; @@ -198,8 +213,7 @@ int test_buf_flense_(void *test_arg, void *suite_arg) { TEST_INFO("flensed: '%.*s'", (int)dst->buf_used, dst->buf + dst->buf_start); memset(dst, 0, dst->buf_sz + sizeof *dst); - free(dst); - dst = NULL; + buf_del(&dst); next_result++; } @@ -216,8 +230,7 @@ int test_buf_flense_(void *test_arg, void *suite_arg) { TEST_INFO("remains: '%.*s'", (int)src->buf_used, src->buf + src->buf_start); memset(src, 0, src->buf_sz + sizeof *src); - free(src); - src = NULL; + buf_del(&src); return retval; } diff --git a/buf.h b/buf.h index a47f3ba..70a1ae5 100644 --- a/buf.h +++ b/buf.h @@ -7,7 +7,7 @@ #include "notify.h" #endif /* NDEBUG */ -/* A simple sliding-window byte buffer. */ +/* A simple sliding-window byte buffer. */ typedef struct buf_ { size_t buf_sz; @@ -18,6 +18,7 @@ typedef struct buf_ { #define BUF_ROOM(__b__) ( (__b__)->buf_sz - ( (__b__)->buf_start + (__b__)->buf_used ) ) #ifndef NDEBUG +/* For debugging, dump a buffer. */ #define D_BUF(__pre__,__b__,...) do {\ if ( (__b__) == NULL )\ NOTIFY_DEBUG(__pre__ "buf:%p", ## __VA_ARGS__, (__b__));\ @@ -37,29 +38,35 @@ typedef struct buf_ { #define D_BUF(...) #endif /* NDEBUG */ -/* buf_new - Allocate and return a new buf_t capable of holding #sz bytes. +/* buf_new + Allocate and return a new buf_t capable of holding #sz bytes. */ buf_t buf_new(size_t sz); -/* buf_rebase - Reclaim any free space from the start of #buf, preserving active content. +/* buf_del + Deallocate the buf_t pointed to by #pbuf. +*/ +void buf_del(buf_t *pbuf); + +/* buf_rebase + Reclaim any free space from the start of #buf, preserving active content. */ void buf_rebase(buf_t buf); -/* buf_makeroom - Assures that the buf pointed to by #pbuf has at space to hold #roomfor more - bytes. +/* buf_makeroom + Assures that the buf pointed to by #pbuf has at space to hold #roomfor + more bytes. */ int buf_makeroom(buf_t *pbuf, size_t roomfor); -/* buf_range_dup_or_append - Starting at the #src_offset byte of #src, appends the following #n bytes to - the buffer pointed to by #dstp, which will be re/allocated if needed. +/* buf_range_dup_or_append + Starting at the #src_offset byte of #src, appends the following #n + bytes to the buffer pointed to by #dstp, which will be re/allocated if + needed. */ int buf_range_dup_or_append(buf_t src, size_t src_offset, size_t n, buf_t *pdst); -/* buf_flense +/* buf_flense Starting after #src_offset characters, scan through the buffer pointed to by #psrc, stopping at the first byte matching #delimiter, whereupon, if #pdst is not NULL, all the bytes previous to #delimiter are appended onto diff --git a/reservoir.c b/reservoir.c index 1878203..c139df4 100644 --- a/reservoir.c +++ b/reservoir.c @@ -12,10 +12,15 @@ #include "randomness.h" #include "test_suite.h" +/* If these ever need to be overridden.. */ +void *(*reservoir_malloc_)(size_t) = malloc; +void *(*reservoir_realloc_)(void *, size_t) = realloc; +void (*reservoir_free_)(void *) = free; + reservoir_t reservoir_new(size_t sz) { reservoir_t reservoir; - reservoir = malloc((sz * sizeof *reservoir->reservoir) + sizeof *reservoir); + reservoir = reservoir_malloc_((sz * sizeof *reservoir->reservoir) + sizeof *reservoir); if (reservoir == NULL) { NOTIFY_ERROR("%s:%s", "malloc", strerror(errno)); return NULL; @@ -28,11 +33,24 @@ reservoir_t reservoir_new(size_t sz) { return reservoir; } +void reservoir_del(reservoir_t *preservoir) { + if (preservoir) { + if (*preservoir) { + while ((*preservoir)->reservoir_used) { + (*preservoir)->reservoir_used -= 1; + buf_del(&((*preservoir)->reservoir[(*preservoir)->reservoir_used])); + } + reservoir_free_(*preservoir); + *preservoir = NULL; + } + } +} + int reservoir_grow(reservoir_t *preservoir, size_t growby) { assert(preservoir != NULL); if (growby) { - void *tmp_ptr = realloc(*preservoir, (((*preservoir)->reservoir_sz + growby) * sizeof *(*preservoir)->reservoir) + sizeof **preservoir); + void *tmp_ptr = reservoir_realloc_(*preservoir, (((*preservoir)->reservoir_sz + growby) * sizeof *(*preservoir)->reservoir) + sizeof **preservoir); if (tmp_ptr == NULL) { NOTIFY_ERROR("%s:%s", "realloc", strerror(errno)); return -1; @@ -84,7 +102,8 @@ void reservoir_remember(reservoir_t reservoir, buf_t buf) { if (old_buf != NULL) { D_BUF("FREEING ", old_buf); memset(old_buf, 0, old_buf->buf_sz + sizeof *old_buf); - free(old_buf); + buf_del(&old_buf); + assert(old_buf == NULL); } D_RESERVOIR(reservoir); diff --git a/reservoir.h b/reservoir.h index c056ac5..e59e00a 100644 --- a/reservoir.h +++ b/reservoir.h @@ -9,7 +9,10 @@ #include "notify.h" #endif /* NDEBUG */ -/* A pool of buf_t */ +/* + A size-limited pool of buf_t entries. When the reservoir is filled with + more entries than there is room for, some spill away and are lost. +*/ typedef struct reservoir_ { size_t reservoir_sz; /* allocated slots */ @@ -19,6 +22,7 @@ typedef struct reservoir_ { } *reservoir_t; #ifndef NDEBUG +/* For debugging, dump an entire reservoir's contents. */ #define D_RESERVOIR(__r__) do {\ size_t i;\ NOTIFY_DEBUG("reservoir:%p sz: %zu used:%zu tally:%zu", (__r__), (__r__)->reservoir_sz, (__r__)->reservoir_used, (__r__)->reservoir_tally);\ @@ -31,30 +35,36 @@ typedef struct reservoir_ { #endif /* NDEBUG */ /* reservoir_new - Allocate and return a new reservoir capable of holding #sz bufs. + Allocate and return a new reservoir capable of holding #sz buf_t + entries. */ reservoir_t reservoir_new(size_t sz); +/* reservoir_del + Deallocate the reservoir pointed to by #preservoir. +*/ +void reservoir_del(reservoir_t *preservoir); + /* reservoir_grow - Increase the storage capacity of the reservoir pointed to by #preservoir - by #growby bufs. + Increase the storage capacity of the reservoir pointed to by + #preservoir by #growby entries. */ int reservoir_grow(reservoir_t *preservoir, size_t growby); /* reservoir_remember - Remember #buf, forgetting another buf at random if the reservoir - pointed to by #preservoir is already full to capacity. + Remember #buf, forgetting (and freeing) another buf_t chosen at random + if the reservoir pointed to by #preservoir is already full to capacity. */ void reservoir_remember(reservoir_t reservoir, buf_t buf); /* reservoir_write - Write the contents of the bufs within #reservoir to #fd, each with a - trailing #delim. + Write the contents of the buf_t entries within #reservoir to #fd, each + with a trailing #delim character. */ int reservoir_write(int fd, reservoir_t reservoir, char delim); /* reservoir_write_meta - Write metadata of #reservoir and #samples to fd. + Write metadata about #reservoir and #samples to fd. */ int reservoir_write_meta(int fd, reservoir_t reservoir, unsigned long samples, char delim); diff --git a/reservoir_sample.c b/reservoir_sample.c index 1c64f6f..50a1235 100644 --- a/reservoir_sample.c +++ b/reservoir_sample.c @@ -51,8 +51,9 @@ static struct options_ { .status_fd = STDERR_FILENO, }; -/* #status_requested_ will be set whenever a view of the current sample - reservoir is desired. +/* #status_requested_ + This will be set whenever a snapshot of the current state of the sample + reservoir is desired. Typically this will occur on a signal. */ static int status_requested_ = 0; @@ -76,9 +77,9 @@ void usage_(const char *prog, unsigned int full) { "\t-n -- returns samples [default: %zu]\n" "\t-d -- use as input and output delimiter [default: '\\%03hho']\n" "\t-r -- read randomness from [default: '%s']\n" - "\t-b -- set input buffer size [default: %zu]\n" + "\t-b -- input buffer size [default: %zu]\n" "\t-i -- grow reservoir by 1 for every input samples (0 inhibits behavior) [default: %lu]\n" - "\t-h -- double the reservoir growth interval every input samples (0 inhibits behavior) [default: %lu]\n " + "\t-j -- double the reservoir growth interval every input samples (0 inhibits behavior) [default: %lu]\n " "\t-s -- USR1 signals will write reservoir contents to rather than stderr (has no effect on normal output to stdout upon input EOF)\n" "\t-v -- increase verbosity\n" "\t-h -- this screen\n", @@ -105,10 +106,13 @@ void request_snapshot_(int signum) { status_requested_ = 1; } -/* accumulate_input_ - Read (up to #read_block_sz bytes at a time) from #fd (until EOF) into an - accumulator buffer. For each #delimiter character found in what was just - read, occasionally remember the preceeding characters. +/* accumulate_input_ + Read (up to #read_block_sz bytes at a time, or until EOF) from #fd into + an accumulator buffer. For each #delimiter character found in what was + just read, occasionally remember the group of preceeding characters. + This behavioral logic lives here, rather than being contained in the + reservoir module, simply because it was easier to reduce the number of + allocations by handling it here. */ static int accumulate_input_(int fd, size_t read_block_sz, int delimiter, unsigned long *psamples, reservoir_t *preservoir) { @@ -137,10 +141,15 @@ int accumulate_input_(int fd, size_t read_block_sz, int delimiter, unsigned long for (;;) { NOTIFY_DEBUG("read loop\n\n"); - /* make sure there's enough room in our input buffer for a full read() */ + /* + Make sure there's enough room in our input buffer to append a + full read() of #read_block_sz onto the end. + */ if (buf_makeroom(&read_buf, read_block_sz)) { - free(read_buf); - free(new_buf); + int e = errno; + buf_del(&read_buf); + buf_del(&new_buf); + errno = e; return -1; } @@ -148,8 +157,12 @@ int accumulate_input_(int fd, size_t read_block_sz, int delimiter, unsigned long do { len = read(fd, read_buf->buf + read_buf->buf_start + read_buf->buf_used, BUF_ROOM(read_buf)); - /* a signal may have interrupted read(), deal with that before - doing anything else */ + /* + A signal may have interrupted read(), deal with that before + doing anything else. Status output is best handled here anyhow, + due to the naturally quiescent state of things, and also due to + not being about to finish and output the final state. + */ if (status_requested_) { status_requested_ = 0; NOTIFY_DEBUG("dumping reservoir due to signal"); @@ -163,8 +176,8 @@ int accumulate_input_(int fd, size_t read_block_sz, int delimiter, unsigned long } while (len == -1 && (errno == EINTR || errno == EAGAIN)); if (len < 0) { NOTIFY_ERROR("%s:%s", "read", strerror(errno)); - free(read_buf); - free(new_buf); + buf_del(&read_buf); + buf_del(&new_buf); return -1; } if (len == 0) { @@ -175,6 +188,10 @@ int accumulate_input_(int fd, size_t read_block_sz, int delimiter, unsigned long NOTIFY_DEBUG("len:%zd", len); D_BUF("read_buf: ", read_buf); + /* + Accumulator has been filled, pare and process. + */ + while (len > 0) { ssize_t len_flensed; @@ -186,7 +203,7 @@ int accumulate_input_(int fd, size_t read_block_sz, int delimiter, unsigned long grow_count = 0; if (reservoir_grow(preservoir, 1)) { NOTIFY_ERROR("failed to increase reservoir size"); - free(read_buf); + buf_del(&read_buf); return -1; } } @@ -221,7 +238,7 @@ int accumulate_input_(int fd, size_t read_block_sz, int delimiter, unsigned long NOTIFY_DEBUG("next buffer will be remembered.."); new_buf = buf_new(0); if (new_buf == NULL) { - free(read_buf); + buf_del(&read_buf); return -1; } } else { @@ -231,8 +248,8 @@ int accumulate_input_(int fd, size_t read_block_sz, int delimiter, unsigned long len_flensed = buf_flense(&read_buf, bytes_scanned, delimiter, new_buf ? &new_buf : NULL); if (len_flensed < 0) { - free(read_buf); - free(new_buf); + buf_del(&read_buf); + buf_del(&new_buf); return -1; } if (len_flensed == 0) { @@ -274,18 +291,36 @@ int accumulate_input_(int fd, size_t read_block_sz, int delimiter, unsigned long *psamples += 1; } - free(read_buf); - free(new_buf); + buf_del(&read_buf); + buf_del(&new_buf); return 0; } +/* validate_strtoul_ + Wrap strtoul and check for success. +*/ +static +unsigned long validate_strtoul_(const char *str, unsigned int *invalid) { + char *ep; + unsigned long num; + + errno = 0; + num = strtoul(str, &ep, 0); + if (errno + || ! (optarg && *optarg != '\0' && *ep == '\0') ) { + *invalid += 1; + NOTIFY_ERROR("could not parse '%s' as a number: %s", str, strerror(errno)); + } + return num; +} int main(int argc, char *argv[]) { struct sigaction sa; char *status_filename = NULL; reservoir_t reservoir; unsigned long num_samples = 0; + unsigned int options_error = 0; int c; while ( (c = getopt(argc, argv, "hvb:d:D:i:j:s:n:r:")) != EOF ) { @@ -295,8 +330,7 @@ int main(int argc, char *argv[]) { break; case 'b': - options_.read_buf_sz = strtoul(optarg, NULL, 0); - /* XXX: validate */ + options_.read_buf_sz = validate_strtoul_(optarg, &options_error); break; case 'd': @@ -307,11 +341,11 @@ int main(int argc, char *argv[]) { break; case 'i': - options_.reservoir_grow_per = strtoul(optarg, NULL, 0); + options_.reservoir_grow_per = validate_strtoul_(optarg, &options_error); break; case 'j': - options_.reservoir_grow_double_per = strtoul(optarg, NULL, 0); + options_.reservoir_grow_double_per = validate_strtoul_(optarg, &options_error); break; case 's': @@ -319,7 +353,7 @@ int main(int argc, char *argv[]) { break; case 'n': - options_.reservoir_sz = strtoul(optarg, NULL, 0); + options_.reservoir_sz = validate_strtoul_(optarg, &options_error); break; case 'r': @@ -336,6 +370,10 @@ int main(int argc, char *argv[]) { } } + if (options_error) { + exit(EX_USAGE); + } + #if 0 /* zero-or-more arguments required */ /* if that ever changes... */ diff --git a/test/validate-statistics.sh b/test/validate-statistics.sh new file mode 100755 index 0000000..0df8fb1 --- /dev/null +++ b/test/validate-statistics.sh @@ -0,0 +1,29 @@ +#!/bin/bash +# + +# Collects and counts the output of multiple invocations over the same input stream. +# Counts ought to be roughly the same, based on quality of random source. + +invoke_n() { + seq $1 | ./reservoir_sample -r /dev/random 2>/dev/null +} + +invoke_n_i() { + local i=0 + while [ $i -lt $2 ] + do + invoke_n $1 + ((i++)) + local note=`expr $i % 1000` + if [ $note -eq 0 ] + then + echo "... iteration $i of $2" 1>&2 + fi + done +} + +tally_n_i() { + invoke_n_i $1 $2 | sort -n | uniq -c +} + +tally_n_i 20 10000