General cleanups, and argument validations.
authorJustin Wind <justin.wind@gmail.com>
Tue, 26 Feb 2013 22:08:03 +0000 (14:08 -0800)
committerJustin Wind <justin.wind@gmail.com>
Tue, 26 Feb 2013 22:08:03 +0000 (14:08 -0800)
Unified textual style in comments.
Fixed wrong option identifier in usage_().
Added _del() to match _new() call in module interfaces.
Numeric command-line arguments now do basic error-checking, but still do no sane range-checking yet.
Added simple statistical test to 'make check' target.

Makefile
buf.c
buf.h
reservoir.c
reservoir.h
reservoir_sample.c
test/validate-statistics.sh [new file with mode: 0755]

index 405e0337e375fe2003d988e481109329f0286384..b335a02a28e34d6c74422923468fa1fc11640070 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -39,6 +39,7 @@ check:        test
                echo Checking $${t};\
                ./$${t} > /dev/null || echo -- $${t} FAILED --;\
        done
+       $(TEST_DIR)/validate-statistics.sh
 
 $(TEST_DIR)/%_test.o:  %.c
        $(CC) $(CFLAGS) $(CPPFLAGS) -DTEST -c -o $@ $<
diff --git a/buf.c b/buf.c
index 7a94079d0e965ebcd3b174e032c18064d7c51eca..4465ff236e5563f6825034f0690cc4373041efbc 100644 (file)
--- a/buf.c
+++ b/buf.c
@@ -7,9 +7,14 @@
 #include "notify.h"
 #include "test_suite.h"
 
+/* If these ever need to be overridden... */
+void *(*buf_malloc_)(size_t) = malloc;
+void *(*buf_realloc_)(void *, size_t) = realloc;
+void (*buf_free_)(void *) = free;
+
 inline
 buf_t buf_new(size_t sz) {
-       buf_t buf = malloc(sz + sizeof *buf);
+       buf_t buf = buf_malloc_(sz + sizeof *buf);
        if (buf) {
                buf->buf_sz = sz;
                buf->buf_start = buf->buf_used = 0;
@@ -18,6 +23,16 @@ buf_t buf_new(size_t sz) {
        return buf;
 }
 
+inline
+void buf_del(buf_t *pbuf) {
+       if (pbuf) {
+               if (*pbuf) {
+                       buf_free_(*pbuf);
+                       *pbuf = NULL;
+               }
+       }
+}
+
 inline
 void buf_rebase(buf_t buf) {
        if (buf->buf_start == 0)
@@ -46,7 +61,7 @@ int buf_makeroom(buf_t *pbuf, size_t roomfor) {
                return 0;
 
        new_sz = (*pbuf)->buf_used + roomfor;
-       tmp_ptr = realloc(*pbuf, new_sz + sizeof **pbuf);
+       tmp_ptr = buf_realloc_(*pbuf, new_sz + sizeof **pbuf);
        if (tmp_ptr == NULL) {
                NOTIFY_ERROR("%s:%s", "realloc", strerror(errno));
                return -1;
@@ -198,8 +213,7 @@ int test_buf_flense_(void *test_arg, void *suite_arg) {
                TEST_INFO("flensed: '%.*s'", (int)dst->buf_used, dst->buf + dst->buf_start);
 
                memset(dst, 0, dst->buf_sz + sizeof *dst);
-               free(dst);
-               dst = NULL;
+               buf_del(&dst);
 
                next_result++;
        }
@@ -216,8 +230,7 @@ int test_buf_flense_(void *test_arg, void *suite_arg) {
        TEST_INFO("remains: '%.*s'", (int)src->buf_used, src->buf + src->buf_start);
 
        memset(src, 0, src->buf_sz + sizeof *src);
-       free(src);
-       src = NULL;
+       buf_del(&src);
 
        return retval;
 }
diff --git a/buf.h b/buf.h
index a47f3ba154d38ec7958fe0f70905f19fb91c8257..70a1ae524538e04fcca415f6b0f2d411a3b0fa06 100644 (file)
--- a/buf.h
+++ b/buf.h
@@ -7,7 +7,7 @@
 #include "notify.h"
 #endif /* NDEBUG */
 
-/* A simple sliding-window byte buffer. */
+/*     A simple sliding-window byte buffer. */
 
 typedef struct buf_ {
        size_t buf_sz;
@@ -18,6 +18,7 @@ typedef struct buf_ {
 #define BUF_ROOM(__b__) ( (__b__)->buf_sz - ( (__b__)->buf_start + (__b__)->buf_used ) )
 
 #ifndef NDEBUG
+/*     For debugging, dump a buffer. */
 #define D_BUF(__pre__,__b__,...) do {\
        if ( (__b__) == NULL )\
                NOTIFY_DEBUG(__pre__ "buf:%p", ## __VA_ARGS__, (__b__));\
@@ -37,29 +38,35 @@ typedef struct buf_ {
 #define D_BUF(...)
 #endif /* NDEBUG */
 
-/*  buf_new
-       Allocate and return a new buf_t capable of holding #sz bytes.
+/*     buf_new
+               Allocate and return a new buf_t capable of holding #sz bytes.
 */
 buf_t buf_new(size_t sz);
 
-/*  buf_rebase
-       Reclaim any free space from the start of #buf, preserving active content.
+/*     buf_del
+               Deallocate the buf_t pointed to by #pbuf.
+*/
+void buf_del(buf_t *pbuf);
+
+/*     buf_rebase
+               Reclaim any free space from the start of #buf, preserving active content.
 */
 void buf_rebase(buf_t buf);
 
-/* buf_makeroom
-               Assures that the buf pointed to by #pbuf has at space to hold #roomfor more
-       bytes.
+/*     buf_makeroom
+               Assures that the buf pointed to by #pbuf has at space to hold #roomfor
+       more bytes.
 */
 int buf_makeroom(buf_t *pbuf, size_t roomfor);
 
-/*  buf_range_dup_or_append
-               Starting at the #src_offset byte of #src, appends the following #n bytes to
-       the buffer pointed to by #dstp, which will be re/allocated if needed.
+/*     buf_range_dup_or_append
+               Starting at the #src_offset byte of #src, appends the following #n
+       bytes to the buffer pointed to by #dstp, which will be re/allocated if
+       needed.
 */
 int buf_range_dup_or_append(buf_t src, size_t src_offset, size_t n, buf_t *pdst);
 
-/*  buf_flense
+/*     buf_flense
                Starting after #src_offset characters, scan through the buffer pointed
        to by #psrc, stopping at the first byte matching #delimiter, whereupon, if
        #pdst is not NULL, all the bytes previous to #delimiter are appended onto
index 1878203037f0bc95ac16bd5b991b61fded1ec35a..c139df4b447c60dc46a7c65b1bcc5770a8b8d012 100644 (file)
 #include "randomness.h"
 #include "test_suite.h"
 
+/* If these ever need to be overridden.. */
+void *(*reservoir_malloc_)(size_t) = malloc;
+void *(*reservoir_realloc_)(void *, size_t) = realloc;
+void (*reservoir_free_)(void *) = free;
+
 reservoir_t reservoir_new(size_t sz) {
        reservoir_t reservoir;
 
-       reservoir = malloc((sz * sizeof *reservoir->reservoir) + sizeof *reservoir);
+       reservoir = reservoir_malloc_((sz * sizeof *reservoir->reservoir) + sizeof *reservoir);
        if (reservoir == NULL) {
                NOTIFY_ERROR("%s:%s", "malloc", strerror(errno));
                return NULL;
@@ -28,11 +33,24 @@ reservoir_t reservoir_new(size_t sz) {
        return reservoir;
 }
 
+void reservoir_del(reservoir_t *preservoir) {
+       if (preservoir) {
+               if (*preservoir) {
+                       while ((*preservoir)->reservoir_used) {
+                               (*preservoir)->reservoir_used -= 1;
+                               buf_del(&((*preservoir)->reservoir[(*preservoir)->reservoir_used]));
+                       }
+                       reservoir_free_(*preservoir);
+                       *preservoir = NULL;
+               }
+       }
+}
+
 int reservoir_grow(reservoir_t *preservoir, size_t growby) {
        assert(preservoir != NULL);
 
        if (growby) {
-               void *tmp_ptr = realloc(*preservoir, (((*preservoir)->reservoir_sz + growby) * sizeof *(*preservoir)->reservoir) + sizeof **preservoir);
+               void *tmp_ptr = reservoir_realloc_(*preservoir, (((*preservoir)->reservoir_sz + growby) * sizeof *(*preservoir)->reservoir) + sizeof **preservoir);
                if (tmp_ptr == NULL) {
                        NOTIFY_ERROR("%s:%s", "realloc", strerror(errno));
                        return -1;
@@ -84,7 +102,8 @@ void reservoir_remember(reservoir_t reservoir, buf_t buf) {
        if (old_buf != NULL) {
                D_BUF("FREEING ", old_buf);
                memset(old_buf, 0, old_buf->buf_sz + sizeof *old_buf);
-               free(old_buf);
+               buf_del(&old_buf);
+               assert(old_buf == NULL);
        }
 
        D_RESERVOIR(reservoir);
index c056ac567e733ce262f93cd16be22f0b724d97b3..e59e00af54e98b9f0c52230a56e6c6b40e72e2dc 100644 (file)
@@ -9,7 +9,10 @@
 #include "notify.h"
 #endif /* NDEBUG */
 
-/* A pool of buf_t */
+/*
+       A size-limited pool of buf_t entries.  When the reservoir is filled with
+       more entries than there is room for, some spill away and are lost.
+*/
 
 typedef struct reservoir_ {
        size_t reservoir_sz; /* allocated slots */
@@ -19,6 +22,7 @@ typedef struct reservoir_ {
 } *reservoir_t;
 
 #ifndef NDEBUG
+/*     For debugging, dump an entire reservoir's contents. */
 #define D_RESERVOIR(__r__) do {\
        size_t i;\
                NOTIFY_DEBUG("reservoir:%p sz: %zu used:%zu tally:%zu", (__r__), (__r__)->reservoir_sz, (__r__)->reservoir_used, (__r__)->reservoir_tally);\
@@ -31,30 +35,36 @@ typedef struct reservoir_ {
 #endif /* NDEBUG */
 
 /*     reservoir_new
-       Allocate and return a new reservoir capable of holding #sz bufs.
+               Allocate and return a new reservoir capable of holding #sz buf_t
+       entries.
 */
 reservoir_t reservoir_new(size_t sz);
 
+/*     reservoir_del
+               Deallocate the reservoir pointed to by #preservoir.
+*/
+void reservoir_del(reservoir_t *preservoir);
+
 /*     reservoir_grow
-       Increase the storage capacity of the reservoir pointed to by #preservoir
-       by #growby bufs.
+               Increase the storage capacity of the reservoir pointed to by
+       #preservoir by #growby entries.
 */
 int reservoir_grow(reservoir_t *preservoir, size_t growby);
 
 /*     reservoir_remember
-               Remember #buf, forgetting another buf at random if the reservoir
-               pointed to by #preservoir is already full to capacity.
+               Remember #buf, forgetting (and freeing) another buf_t chosen at random
+       if the reservoir pointed to by #preservoir is already full to capacity.
 */
 void reservoir_remember(reservoir_t reservoir, buf_t buf);
 
 /*     reservoir_write
-       Write the contents of the bufs within #reservoir to #fd, each with a
-       trailing #delim.
+               Write the contents of the buf_t entries within #reservoir to #fd, each
+       with a trailing #delim character.
 */
 int reservoir_write(int fd, reservoir_t reservoir, char delim);
 
 /*     reservoir_write_meta
-       Write metadata of #reservoir and #samples to fd.
+               Write metadata about #reservoir and #samples to fd.
 */
 int reservoir_write_meta(int fd, reservoir_t reservoir, unsigned long samples, char delim);
 
index 1c64f6fe5598179395241f48c643df05c4b94a45..50a1235642f9d2528fa7db8da096ec829486339f 100644 (file)
@@ -51,8 +51,9 @@ static struct options_ {
        .status_fd = STDERR_FILENO,
 };
 
-/*     #status_requested_ will be set whenever a view of the current sample
-       reservoir is desired.
+/*     #status_requested_
+               This will be set whenever a snapshot of the current state of the sample
+       reservoir is desired.  Typically this will occur on a signal.
 */
 static int status_requested_ = 0;
 
@@ -76,9 +77,9 @@ void usage_(const char *prog, unsigned int full) {
                           "\t-n <num>   -- returns <num> samples [default: %zu]\n"
                           "\t-d <delim> -- use <delim> as input and output delimiter [default: '\\%03hho']\n"
                           "\t-r <file>  -- read randomness from <file> [default: '%s']\n"
-                          "\t-b <bytes> -- set input buffer size [default: %zu]\n"
+                          "\t-b <bytes> -- input buffer size [default: %zu]\n"
                           "\t-i <num>   -- grow reservoir by 1 for every <num> input samples (0 inhibits behavior) [default: %lu]\n"
-                          "\t-h <num>   -- double the reservoir growth interval every <num> input samples (0 inhibits behavior) [default: %lu]\n "
+                          "\t-j <num>   -- double the reservoir growth interval every <num> input samples (0 inhibits behavior) [default: %lu]\n "
                           "\t-s <file>  -- USR1 signals will write reservoir contents to <file> rather than stderr (has no effect on normal output to stdout upon input EOF)\n"
                           "\t-v         -- increase verbosity\n"
                           "\t-h         -- this screen\n",
@@ -105,10 +106,13 @@ void request_snapshot_(int signum) {
        status_requested_ = 1;
 }
 
-/*  accumulate_input_
-        Read (up to #read_block_sz bytes at a time) from #fd (until EOF) into an
-    accumulator buffer.  For each #delimiter character found in what was just
-    read, occasionally remember the preceeding characters.
+/*     accumulate_input_
+               Read (up to #read_block_sz bytes at a time, or until EOF) from #fd into
+       an accumulator buffer.  For each #delimiter character found in what was
+       just read, occasionally remember the group of preceeding characters.
+               This behavioral logic lives here, rather than being contained in the
+       reservoir module, simply because it was easier to reduce the number of
+       allocations by handling it here.
 */
 static
 int accumulate_input_(int fd, size_t read_block_sz, int delimiter, unsigned long *psamples, reservoir_t *preservoir) {
@@ -137,10 +141,15 @@ int accumulate_input_(int fd, size_t read_block_sz, int delimiter, unsigned long
        for (;;) {
                NOTIFY_DEBUG("read loop\n\n");
 
-               /* make sure there's enough room in our input buffer for a full read() */
+               /*
+                               Make sure there's enough room in our input buffer to append a
+                       full read() of #read_block_sz onto the end.
+               */
                if (buf_makeroom(&read_buf, read_block_sz)) {
-                       free(read_buf);
-                       free(new_buf);
+                       int e = errno;
+                       buf_del(&read_buf);
+                       buf_del(&new_buf);
+                       errno = e;
                        return -1;
                }
 
@@ -148,8 +157,12 @@ int accumulate_input_(int fd, size_t read_block_sz, int delimiter, unsigned long
                do {
                        len = read(fd, read_buf->buf + read_buf->buf_start + read_buf->buf_used, BUF_ROOM(read_buf));
 
-                       /* a signal may have interrupted read(), deal with that before
-                          doing anything else */
+                       /*
+                                       A signal may have interrupted read(), deal with that before
+                               doing anything else.  Status output is best handled here anyhow,
+                               due to the naturally quiescent state of things, and also due to
+                               not being about to finish and output the final state.
+                       */
                        if (status_requested_) {
                                status_requested_ = 0;
                                NOTIFY_DEBUG("dumping reservoir due to signal");
@@ -163,8 +176,8 @@ int accumulate_input_(int fd, size_t read_block_sz, int delimiter, unsigned long
                } while (len == -1 && (errno == EINTR || errno == EAGAIN));
                if (len < 0) {
                        NOTIFY_ERROR("%s:%s", "read", strerror(errno));
-                       free(read_buf);
-                       free(new_buf);
+                       buf_del(&read_buf);
+                       buf_del(&new_buf);
                        return -1;
                }
                if (len == 0) {
@@ -175,6 +188,10 @@ int accumulate_input_(int fd, size_t read_block_sz, int delimiter, unsigned long
                NOTIFY_DEBUG("len:%zd", len);
                D_BUF("read_buf: ", read_buf);
 
+               /*
+                       Accumulator has been filled, pare and process.
+               */
+
                while (len > 0) {
                        ssize_t len_flensed;
 
@@ -186,7 +203,7 @@ int accumulate_input_(int fd, size_t read_block_sz, int delimiter, unsigned long
                                grow_count = 0;
                                if (reservoir_grow(preservoir, 1)) {
                                        NOTIFY_ERROR("failed to increase reservoir size");
-                                       free(read_buf);
+                                       buf_del(&read_buf);
                                        return -1;
                                }
                        }
@@ -221,7 +238,7 @@ int accumulate_input_(int fd, size_t read_block_sz, int delimiter, unsigned long
                                        NOTIFY_DEBUG("next buffer will be remembered..");
                                        new_buf = buf_new(0);
                                        if (new_buf == NULL) {
-                                               free(read_buf);
+                                               buf_del(&read_buf);
                                                return -1;
                                        }
                                } else {
@@ -231,8 +248,8 @@ int accumulate_input_(int fd, size_t read_block_sz, int delimiter, unsigned long
 
                        len_flensed = buf_flense(&read_buf, bytes_scanned, delimiter, new_buf ? &new_buf : NULL);
                        if (len_flensed < 0) {
-                               free(read_buf);
-                               free(new_buf);
+                               buf_del(&read_buf);
+                               buf_del(&new_buf);
                                return -1;
                        }
                        if (len_flensed == 0) {
@@ -274,18 +291,36 @@ int accumulate_input_(int fd, size_t read_block_sz, int delimiter, unsigned long
                *psamples += 1;
        }
 
-       free(read_buf);
-       free(new_buf);
+       buf_del(&read_buf);
+       buf_del(&new_buf);
 
        return 0;
 }
 
+/*     validate_strtoul_
+       Wrap strtoul and check for success.
+*/
+static
+unsigned long validate_strtoul_(const char *str, unsigned int *invalid) {
+       char *ep;
+       unsigned long num;
+
+       errno = 0;
+       num = strtoul(str, &ep, 0);
+       if (errno
+       ||  ! (optarg && *optarg != '\0' && *ep == '\0') ) {
+               *invalid += 1;
+               NOTIFY_ERROR("could not parse '%s' as a number: %s", str, strerror(errno));
+       }
+       return num;
+}
 
 int main(int argc, char *argv[]) {
        struct sigaction sa;
        char *status_filename = NULL;
        reservoir_t reservoir;
        unsigned long num_samples = 0;
+       unsigned int options_error = 0;
        int c;
 
        while ( (c = getopt(argc, argv, "hvb:d:D:i:j:s:n:r:")) != EOF ) {
@@ -295,8 +330,7 @@ int main(int argc, char *argv[]) {
                                break;
 
                        case 'b':
-                               options_.read_buf_sz = strtoul(optarg, NULL, 0);
-                               /* XXX: validate */
+                               options_.read_buf_sz = validate_strtoul_(optarg, &options_error);
                                break;
 
                        case 'd':
@@ -307,11 +341,11 @@ int main(int argc, char *argv[]) {
                                break;
 
                        case 'i':
-                               options_.reservoir_grow_per = strtoul(optarg, NULL, 0);
+                               options_.reservoir_grow_per = validate_strtoul_(optarg, &options_error);
                                break;
 
                        case 'j':
-                               options_.reservoir_grow_double_per = strtoul(optarg, NULL, 0);
+                               options_.reservoir_grow_double_per = validate_strtoul_(optarg, &options_error);
                                break;
 
                        case 's':
@@ -319,7 +353,7 @@ int main(int argc, char *argv[]) {
                                break;
 
                        case 'n':
-                               options_.reservoir_sz = strtoul(optarg, NULL, 0);
+                               options_.reservoir_sz = validate_strtoul_(optarg, &options_error);
                                break;
 
                        case 'r':
@@ -336,6 +370,10 @@ int main(int argc, char *argv[]) {
                }
        }
 
+       if (options_error) {
+               exit(EX_USAGE);
+       }
+
 #if 0
        /* zero-or-more arguments required */
        /* if that ever changes... */
diff --git a/test/validate-statistics.sh b/test/validate-statistics.sh
new file mode 100755 (executable)
index 0000000..0df8fb1
--- /dev/null
@@ -0,0 +1,29 @@
+#!/bin/bash
+#
+
+# Collects and counts the output of multiple invocations over the same input stream.
+# Counts ought to be roughly the same, based on quality of random source.
+
+invoke_n() {
+       seq $1 | ./reservoir_sample -r /dev/random 2>/dev/null
+}
+
+invoke_n_i() {
+       local i=0
+       while [ $i -lt $2 ]
+       do
+               invoke_n $1
+               ((i++))
+               local note=`expr $i % 1000`
+               if [ $note -eq 0 ]
+                       then
+                       echo "... iteration $i of $2" 1>&2
+               fi
+       done
+}
+
+tally_n_i() {
+       invoke_n_i $1 $2 | sort -n | uniq -c
+}
+
+tally_n_i 20 10000