Initial commit of functionally-complete code.
authorJustin Wind <justin.wind@gmail.com>
Mon, 15 Oct 2012 18:57:41 +0000 (11:57 -0700)
committerJustin Wind <justin.wind@gmail.com>
Mon, 15 Oct 2012 18:57:41 +0000 (11:57 -0700)
.gitignore [new file with mode: 0644]
Makefile [new file with mode: 0644]
notify.c [new file with mode: 0644]
notify.h [new file with mode: 0644]
reservoir_sample.c [new file with mode: 0644]
test_suite.c [new file with mode: 0644]
test_suite.h [new file with mode: 0644]
version.h [new file with mode: 0644]
version.sh [new file with mode: 0755]

diff --git a/.gitignore b/.gitignore
new file mode 100644 (file)
index 0000000..c3a0398
--- /dev/null
@@ -0,0 +1,5 @@
+*.o
+.depend
+._*
+.DS_Store
+.DS_Store?
diff --git a/Makefile b/Makefile
new file mode 100644 (file)
index 0000000..2762a29
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,49 @@
+#!make
+
+ifeq "$(origin CC)" "default"
+CC = clang
+endif
+
+CFLAGS += -g -Wall -Wextra
+#CPPFLAGS += -DNDEBUG
+LDFLAGS +=
+
+MAKEDEPEND = $(CC) -MM
+
+TARGETS = reservoir_sample
+TEST_DIR = test
+TESTS = $(addprefix $(TEST_DIR)/, reservoir_sample_test)
+SOURCES = reservoir_sample.c notify.c
+OBJECTS = $(SOURCES:.c=.o)
+TEST_OBJECTS = $(TESTS:=.o) test_suite.o
+
+.PHONY:        all test check clean
+
+all:   $(TARGETS) .depend
+
+version.h:     
+       ./version.sh 0 0 prototype
+
+.depend:       $(SOURCES) version.h
+       $(MAKEDEPEND) $(CPPFLAGS) $^ > $@
+
+clean: 
+       @rm -rf $(TARGETS) $(TESTS) $(TEST_OBJECTS) $(OBJECTS) *.dSYM .depend core
+
+test:  $(TESTS) $(TARGETS)
+
+
+check: test
+       @for t in $(TESTS); do \
+               echo Checking $${t};\
+               ./$${t} > /dev/null || echo -- $${t} FAILED --;\
+       done
+
+$(TEST_DIR)/%_test.o:  %.c
+       $(CC) $(CFLAGS) $(CPPFLAGS) -DTEST -c -o $@ $<
+
+$(TEST_DIR)/reservoir_sample_test:     %: %.o test_suite.o notify.o
+
+reservoir_sample:      %: %.o notify.o
+
+-include .depend
diff --git a/notify.c b/notify.c
new file mode 100644 (file)
index 0000000..78e7e44
--- /dev/null
+++ b/notify.c
@@ -0,0 +1,36 @@
+#include <stdlib.h>
+#include <stdarg.h>
+#include <stdio.h>
+
+#include "notify.h"
+
+/* common messaging hook */
+
+/* generic notifier */
+static
+void notify_default_(unsigned int l, const char *fmt, ...) {
+       FILE *f = stderr;
+       va_list ap;
+       const char *level_names[] = {
+               "FATAL",
+               "ERROR",
+               "INFO",
+               "VERBOSE",
+               "DEBUG",
+               "DEBUG_LOCK",
+       };
+       const size_t levels = sizeof level_names / sizeof *level_names;
+
+       va_start(ap, fmt);
+       flockfile(f);
+       if (l < levels) {
+               fprintf(f, "%s", level_names[l]);
+       } else {
+               fprintf(f, "[%d] ", l);
+       }
+       vfprintf(f, fmt, ap);
+       funlockfile(f);
+       va_end(ap);
+}
+
+notify_fn_t *notify __attribute__((format(printf, 2, 3))) = notify_default_;
diff --git a/notify.h b/notify.h
new file mode 100644 (file)
index 0000000..d9411c9
--- /dev/null
+++ b/notify.h
@@ -0,0 +1,29 @@
+#ifndef NOTIFY_H_V4O5JY61
+#define NOTIFY_H_V4O5JY61
+
+/* simple common notify hook */
+
+typedef void (notify_fn_t)(unsigned int, const char *, ...);
+extern notify_fn_t *notify __attribute__((format(printf, 2, 3)));
+
+#define NOTIFY_LEVEL_FATAL 0
+#define NOTIFY_LEVEL_ERROR 1
+#define NOTIFY_LEVEL_INFO 2
+#define NOTIFY_LEVEL_VERBOSE 3
+#define NOTIFY_LEVEL_DEBUG 4
+#define NOTIFY_LEVEL_LOCK 5
+
+#define NOTIFY(__level__,__fmt__,...) do { if (notify) notify((__level__), __fmt__, ## __VA_ARGS__); } while (0)
+#define NOTIFY_FATAL(fmt,...)   NOTIFY(NOTIFY_LEVEL_FATAL,   "@%s:%d/%s/" fmt "\n", __FILE__, __LINE__, __func__, ## __VA_ARGS__)
+#define NOTIFY_ERROR(fmt,...)   NOTIFY(NOTIFY_LEVEL_ERROR,   "@%s:%d/%s/" fmt "\n", __FILE__, __LINE__, __func__, ## __VA_ARGS__)
+#define NOTIFY_INFO(fmt,...)    NOTIFY(NOTIFY_LEVEL_INFO,    "@%s:%d/%s/" fmt "\n", __FILE__, __LINE__, __func__, ## __VA_ARGS__)
+#define NOTIFY_VERBOSE(fmt,...) NOTIFY(NOTIFY_LEVEL_VERBOSE, "@%s:%d/%s/" fmt "\n", __FILE__, __LINE__, __func__, ## __VA_ARGS__)
+#ifndef NDEBUG
+#define NOTIFY_DEBUG(fmt,...)   NOTIFY(NOTIFY_LEVEL_DEBUG,   "@%s:%d/%s/" fmt "\n", __FILE__, __LINE__, __func__, ## __VA_ARGS__)
+#define NOTIFY_DEBUG_LOCK(fmt,...) NOTIFY((NOTIFY_LEVEL_LOCK), "@%s:%d/%s/" fmt "\n", __FILE__, __LINE__, __func__, ## __VA_ARGS__)
+#else /* !NDEBUG */
+#define NOTIFY_DEBUG(fmt,...) do { } while (0)
+#define NOTIFY_DEBUG_LOCK(fmt,...) do { } while (0)
+#endif /* !NDEBUG */
+
+#endif /* NOTIFY_H_V4O5JY61 */
diff --git a/reservoir_sample.c b/reservoir_sample.c
new file mode 100644 (file)
index 0000000..492dd64
--- /dev/null
@@ -0,0 +1,707 @@
+/*  reservoir_sample.c
+        This generates a randomized subset of its input, by means of reservoir-
+    sampling, and a Fisher-Yates shuffle.
+*/
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <errno.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <sys/uio.h>
+#include <time.h>
+#include <sysexits.h>
+#include <assert.h>
+
+#include "version.h"
+#include "notify.h"
+#include "test_suite.h"
+
+static const char * const src_id_ = "v" VERSION_STR " " VERSION_DATE;
+
+static struct options_ {
+       unsigned int verbosity;
+       size_t read_buf_sz;
+       size_t reservoir_sz;
+       char delim;
+       char delim_out;
+       char *rand_file;
+} options_ = {
+       .verbosity = 0,
+       .read_buf_sz = 8192,
+       .reservoir_sz = 1,
+       .delim = '\n',
+       .delim_out = '\n',
+       .rand_file = "/dev/random",
+};
+
+static int rand_fd_ = -1;
+
+typedef struct buf_ {
+       size_t buf_sz;
+       size_t buf_start;
+       size_t buf_used;
+       unsigned char buf[];
+} *buf_t;
+#define BUF_ROOM(__b__) ( (__b__)->buf_sz - ( (__b__)->buf_start + (__b__)->buf_used ) )
+
+typedef struct reservoir_ {
+       size_t reservoir_sz;
+       buf_t reservoir[];
+} *reservoir_t;
+
+#ifndef NDEBUG
+#define D_BUF(__pre__,__b__,...) do {\
+       if ( (__b__) == NULL )\
+               NOTIFY_DEBUG(__pre__ "buf:%p", ## __VA_ARGS__, (__b__));\
+       else {\
+               NOTIFY_DEBUG(__pre__ "buf:%p sz:%zu start:%zu used:%zu free:%zu '%.*s'",\
+                            ## __VA_ARGS__,\
+                            (__b__),\
+                            (__b__)->buf_sz,\
+                            (__b__)->buf_start,\
+                            (__b__)->buf_used,\
+                            BUF_ROOM((__b__)),\
+                            (int)(__b__)->buf_used, (__b__)->buf + (__b__)->buf_start);\
+               assert( (__b__)->buf_sz >= (__b__)->buf_start + (__b__)->buf_used );\
+       }\
+} while (0)
+#define D_RESERVOIR(__r__) do {\
+       size_t i;\
+       for (i = 0; i < (__r__)->reservoir_sz; i++) {\
+               D_BUF("reservoir[%zu] ", (__r__)->reservoir[i], i); } } while (0)
+#else
+#define D_RESERVOIR(...)
+#define D_BUF(...)
+#endif
+
+
+static
+void usage_(const char *prog, unsigned int full) {
+       FILE *f = full ? stdout : stderr;
+       char *x = strrchr(prog, '/');
+
+       if (x && *(x + 1))
+               prog = x + 1;
+
+       if (full)
+               fprintf(f, "%s -- returns a random sampling of input\n\n",
+                       prog);
+
+       fprintf(f, "Usage: %s options\n",
+               prog);
+
+       if (full) {
+               fprintf(f, "\nOptions:\n"
+                          "\t-n <num> -- returns <num> samples [default: %zu]\n"
+                          "\t-d <delim> -- use <delim> as input delimiter [default: '\\%03hho']\n"
+                          "\t-r <file> -- read randomness from <file> [default: '%s']\n"
+                          "\t-b <bytes> -- read buffer size [default: %zu]\n"
+                          "\t-v -- increase verbosity\n"
+                          "\t-h -- this screen\n",
+                       options_.reservoir_sz,
+                       options_.delim,
+                       options_.rand_file,
+                       options_.read_buf_sz);
+
+               fprintf(f, "\n%78s\n", src_id_);
+       }
+       fflush(f);
+}
+
+
+/*  rand_upto_inclusive_
+    Room for improvement: constrain bits of randomness consumed, based on #limit
+    also maybe read chunks of randomness at a time
+ */
+static
+unsigned long rand_upto_inclusive_(unsigned long limit) {
+       unsigned long randomness;
+
+       if (limit == 0)
+               return 0;
+
+       if (rand_fd_ != -1) {
+               ssize_t len;
+
+               len = read(rand_fd_, &randomness, sizeof randomness);
+               if (len == sizeof randomness) {
+                       randomness %= limit + 1;
+
+                       return randomness;
+               }
+               NOTIFY_ERROR("%s(%d, %zu):%zd:%s", "read", rand_fd_, sizeof randomness, len, (len < 0) ? strerror(errno) : ( (len == 0) ? "EOF" : "not enough read consecutively") );
+       }
+
+       randomness = mrand48();
+       randomness %= limit + 1;
+
+       return randomness;
+}
+
+
+static
+int rand_init_(char *rand_file) {
+       srand48(time(NULL) ^ getpid());
+       if (rand_file) {
+               rand_fd_ = open(rand_file, O_RDONLY);
+               if (rand_fd_ == -1) {
+                       NOTIFY_ERROR("%s('%s'):%s", "open", rand_file, strerror(errno));
+                       return -1;
+               }
+       }
+       return 0;
+}
+
+
+static
+buf_t buf_new_(size_t sz) {
+       buf_t buf = malloc(sz + sizeof *buf);
+       if (buf) {
+               buf->buf_sz = sz;
+               buf->buf_start = buf->buf_used = 0;
+               memset(buf->buf, 0, sz);
+       }
+       return buf;
+}
+
+
+static
+void buf_rebase_(buf_t buf) {
+       if (buf->buf_start == 0)
+               return;
+       memmove(buf->buf, buf->buf + buf->buf_start, buf->buf_used);
+       buf->buf_start = 0;
+}
+
+
+static
+int buf_makeroom_(buf_t *pbuf, size_t roomfor) {
+       size_t new_sz;
+       void *tmp_ptr;
+
+       assert(pbuf != NULL);
+
+       if (*pbuf == NULL) {
+               *pbuf = buf_new_(roomfor);
+               if (*pbuf == NULL) {
+                       return -1;
+               }
+       }
+
+       buf_rebase_(*pbuf);
+
+       if (BUF_ROOM(*pbuf) >= roomfor)
+               return 0;
+
+       new_sz = (*pbuf)->buf_used + roomfor;
+       tmp_ptr = realloc(*pbuf, new_sz + sizeof **pbuf);
+       if (tmp_ptr == NULL) {
+               NOTIFY_ERROR("%s:%s", "realloc", strerror(errno));
+               return -1;
+       }
+       *pbuf = tmp_ptr;
+       (*pbuf)->buf_sz = new_sz;
+
+       return 0;
+}
+
+
+static
+int buf_range_dup_or_append_(buf_t src, size_t src_skip, size_t n, buf_t *dst) {
+       assert(src != NULL);
+       assert(dst != NULL);
+       assert(src_skip + n <= src->buf_used);
+
+       if (buf_makeroom_(dst, n)) {
+               return -1;
+       }
+
+       memcpy((*dst)->buf + (*dst)->buf_used, src->buf + src->buf_start + src_skip, n);
+       (*dst)->buf_used += n;
+
+       return 0;
+}
+
+
+/*  buf_flense_
+        Starting after #src_offset characters, scan through #src, stopping at
+    the first character matching #delimiter, whereupon all the characters
+    leading up to #delimiter are copied into *#dst if #dst is not NULL.  #src
+    becomes the characters following #delimiter.
+        Returns the number of characters flensed from #src.
+
+    Room for improvement:
+        If flensed segment is more than half the buffer, copy remainder of src
+    into dst, then return src, leaving dst in its place.
+*/
+static
+ssize_t buf_flense_(buf_t *src, size_t src_offset, int delimiter, buf_t *dst) {
+       const size_t delimiter_len = 1;
+       size_t i;
+
+       assert(src != NULL);
+       assert(src_offset <= (*src)->buf_used);
+
+       NOTIFY_DEBUG("src_offset:%zu", src_offset);
+       D_BUF("src ", *src);
+       D_BUF("dst ", dst ? *dst : NULL);
+
+       for (i = src_offset; i < (*src)->buf_used; i++) {
+               if ((*src)->buf[(*src)->buf_start + i] == delimiter) {
+
+                       if (dst != NULL) {
+                               if (buf_range_dup_or_append_((*src), 0, i, dst)) {
+                                       return -1;
+                               }
+                       }
+
+                       (*src)->buf_start += i + delimiter_len;
+                       (*src)->buf_used -= i + delimiter_len;
+
+                       D_BUF("src ", *src);
+                       D_BUF("dst ", dst ? *dst : NULL);
+                       return i + delimiter_len;
+               }
+       }
+
+       return 0;
+}
+
+#ifdef TEST
+
+static const char buf_flense_testdata1[] = "a\nbc\ndef\nghij\nklmno\npqr\ns\ntuv\nwxyz0\n1234\n567\n89\n0leftovers";
+static const char buf_flense_testdata2[] = "\n\nfoo\nbar\n\n";
+
+struct buf_flense_expected_result_ {
+       ssize_t r;
+       const char *buf;
+} buf_flense_expected1[] = {
+       { 1 + 1, "a" },
+       { 2 + 1, "bc" },
+       { 3 + 1, "def" },
+       { 4 + 1, "ghij" },
+       { 5 + 1, "klmno" },
+       { 3 + 1, "pqr" },
+       { 1 + 1, "s" },
+       { 3 + 1, "tuv" },
+       { 5 + 1, "wxyz0" },
+       { 4 + 1, "1234" },
+       { 3 + 1, "567" },
+       { 2 + 1, "89" },
+       { 0, "0leftovers" },
+}, buf_flense_expected2[] = {
+       { 0 + 1, "" },
+       { 0 + 1, "" },
+       { 3 + 1, "foo" },
+       { 3 + 1, "bar" },
+       { 0 + 1, "" },
+       { 0, "" },
+};
+
+struct test_buf_flense_data_ {
+       const char *src;
+       const struct buf_flense_expected_result_ *expected;
+} test_buf_flense_data1 = {
+       .src = buf_flense_testdata1,
+       .expected = buf_flense_expected1,
+}, test_buf_flense_data2 = {
+       .src = buf_flense_testdata2,
+       .expected = buf_flense_expected2,
+};
+
+static int test_buf_flense_(void *test_arg, void *suite_arg) {
+       (void)suite_arg;
+       struct test_buf_flense_data_ *test_data = (struct test_buf_flense_data_ *)test_arg;
+       const char testdata_len = strlen(test_data->src);
+       const struct buf_flense_expected_result_ *next_result = test_data->expected;
+       int retval = 0;
+       buf_t src;
+
+       TEST_INFO("initializing src buffer with %zu characters", testdata_len);
+       src = buf_new_(testdata_len);
+       if (src == NULL) {
+               TEST_ERROR("%s:%s", "new_buf_", "failed");
+               return -1;
+       }
+       memcpy(src->buf, test_data->src, testdata_len);
+       src->buf_used += testdata_len;
+
+       D_BUF("src ", src);
+
+       for (;;) {
+               ssize_t r;
+               buf_t dst = NULL;
+
+               r = buf_flense_(&src, 0, '\n', &dst);
+               if (r != next_result->r) {
+                       TEST_ERROR("result '%zd' does not match expected result '%zd'", r, next_result->r);
+                       retval = -1;
+               }
+               if (r == 0) {
+                       TEST_INFO("finished flensing");
+                       break;
+               }
+
+               if (strlen(next_result->buf) > dst->buf_used) {
+                       TEST_ERROR("flensed buffer smaller than expected result '%s'", next_result->buf);
+                       D_BUF("dst ", dst);
+                       retval = -1;
+               } else if (memcmp(next_result->buf, dst->buf + dst->buf_start, strlen(next_result->buf))) {
+                       TEST_ERROR("flensed buffer does not match expected result '%s'", next_result->buf);
+                       D_BUF("dst ", dst);
+                       retval = -1;
+               }
+
+               TEST_INFO("flensed: '%.*s'", (int)dst->buf_used, dst->buf + dst->buf_start);
+
+               memset(dst, 0, dst->buf_sz + sizeof *dst);
+               free(dst);
+               dst = NULL;
+
+               next_result++;
+       }
+       if (strlen(next_result->buf) > src->buf_used) {
+               TEST_ERROR("remaining buffer smaller than expected result '%s'", next_result->buf);
+               D_BUF("src ", src);
+               retval = -1;
+       } else if (memcmp(next_result->buf, src->buf + src->buf_start, strlen(next_result->buf))) {
+               TEST_ERROR("remaining buffer does not match expected result '%s'", next_result->buf);
+               D_BUF("src ", src);
+               retval = -1;
+       }
+
+       TEST_INFO("remains: '%.*s'", (int)src->buf_used, src->buf + src->buf_start);
+
+       memset(src, 0, src->buf_sz + sizeof *src);
+       free(src);
+       src = NULL;
+
+       return retval;
+}
+#endif /* TEST */
+
+
+/*  reservoir_remember_
+        Remember #line, forgetting a line if more than #num_lines have already
+    been remembered.  Remembers them in random order.
+*/
+static
+void reservoir_remember_(struct reservoir_ *reservoir, size_t num_lines, buf_t buf) {
+       unsigned long randomness;
+       buf_t old_buf;
+
+       assert(reservoir != NULL);
+
+       D_BUF("reserving ", buf);
+
+       if (reservoir->reservoir_sz > 0) {
+               if (num_lines < reservoir->reservoir_sz) {
+                       randomness = rand_upto_inclusive_(num_lines);
+                       NOTIFY_DEBUG("moving index %zu to end (%zu)", randomness, num_lines);
+                       reservoir->reservoir[num_lines] = reservoir->reservoir[randomness];
+                       old_buf = NULL;
+               } else {
+                       randomness = rand_upto_inclusive_(reservoir->reservoir_sz - 1);
+                       old_buf = reservoir->reservoir[randomness];
+               }
+               NOTIFY_DEBUG("replacing reservoir index %zu", randomness);
+               reservoir->reservoir[randomness] = buf;
+       } else {
+               old_buf = buf;
+       }
+
+       if (old_buf != NULL) {
+               D_BUF("FREEING ", old_buf);
+               memset(old_buf, 0, old_buf->buf_sz + sizeof *old_buf);
+               free(old_buf);
+       }
+
+       D_RESERVOIR(reservoir);
+}
+
+
+/*  reservoir_read_
+        Read (up to #read_block_sz bytes at a time) from #fd (until EOF) into an
+    accumulator buffer.  For each #delimiter character found in what was just
+    read, occasionally remember the preceeding characters.
+*/
+static
+int reservoir_read_(int fd, size_t read_block_sz, int delimiter, size_t *num_lines, struct reservoir_ *reservoir) {
+       buf_t read_buf, new_buf = NULL;
+       size_t line_scanned; /* how much of the buffer has already been searched for delimiter */
+       ssize_t len;
+
+       assert(read_block_sz > 0);
+       assert(num_lines != NULL);
+       assert(reservoir != NULL);
+
+       if (fd < 0) {
+               return -1;
+       }
+
+       read_buf = buf_new_(read_block_sz);
+       if (read_buf == NULL) {
+               return -1;
+       }
+       line_scanned = 0;
+
+       for (;;) {
+               NOTIFY_DEBUG("read loop\n\n");
+
+               if (buf_makeroom_(&read_buf, read_block_sz)) {
+                       free(read_buf);
+                       free(new_buf);
+                       return -1;
+               }
+
+               NOTIFY_DEBUG("read(%d, %p + %zu + %zu, %zu)", fd, read_buf->buf, read_buf->buf_start, line_scanned, BUF_ROOM(read_buf));
+               len = read(fd, read_buf->buf + read_buf->buf_start + line_scanned, BUF_ROOM(read_buf));
+               if (len < 0) {
+                       NOTIFY_ERROR("%s:%s", "read", strerror(errno));
+                       free(read_buf);
+                       free(new_buf);
+                       return -1;
+               }
+               if (len == 0) {
+                       break;
+               }
+               read_buf->buf_used += len;
+
+               NOTIFY_DEBUG("len:%zd", len);
+               D_BUF("read_buf: ", read_buf);
+
+               while (len > 0) {
+                       ssize_t len_flensed;
+
+                       NOTIFY_DEBUG("len:%zd", len);
+
+                       /* determine if we want to save the next buffer */
+                       if (new_buf == NULL) {
+                               /* if new_buf is not null, we already want to save the next one.. */
+                               /* otherwise, save if we've read in fewer lines than the reservoir holds */
+                               /* or else there's a reservoir_sz-in-num_lines chance of saving the next line */
+
+                               if (*num_lines < reservoir->reservoir_sz) {
+                                       NOTIFY_DEBUG("still filling reservoir.. %zu filled out of %zu..", *num_lines, reservoir->reservoir_sz);
+                               } else {
+                                       NOTIFY_DEBUG("will save if random [0-%zu] is less than %zu...", *num_lines, reservoir->reservoir_sz);
+                               }
+
+                               if (*num_lines < reservoir->reservoir_sz
+                               ||  rand_upto_inclusive_(*num_lines) < reservoir->reservoir_sz) {
+                                       NOTIFY_DEBUG("next buffer will be reserved..");
+                                       new_buf = buf_new_(0);
+                                       if (new_buf == NULL) {
+                                               free(read_buf);
+                                               return -1;
+                                       }
+                               } else {
+                                       NOTIFY_DEBUG("not saving next buffer..");
+                               }
+                       }
+
+                       len_flensed = buf_flense_(&read_buf, line_scanned, delimiter, new_buf ? &new_buf : NULL);
+                       if (len_flensed < 0) {
+                               free(read_buf);
+                               free(new_buf);
+                               return -1;
+                       }
+                       if (len_flensed == 0) {
+                               /* no delimiter found yet, stop parsing and read more */
+                               NOTIFY_DEBUG("no delim found after %zd", len);
+                               line_scanned = len;
+                               buf_rebase_(read_buf);
+                               break;
+                       }
+
+                       len -= len_flensed;
+                       line_scanned = 0;
+
+                       D_BUF("read_buf: ", read_buf);
+
+                       if (new_buf != NULL) {
+                               D_BUF("parsed complete line: ", new_buf);
+                               reservoir_remember_(reservoir, *num_lines, new_buf);
+                               new_buf = NULL;
+                               *num_lines += 1;
+                       }
+
+               }
+       }
+       /* leftovers */
+       NOTIFY_DEBUG("loop done\n\n");
+       if (read_buf->buf_used) {
+               D_BUF("leftovers: ", read_buf);
+
+               if (new_buf != NULL
+               ||  *num_lines < reservoir->reservoir_sz) {
+                       reservoir_remember_(reservoir, *num_lines, read_buf);
+                       read_buf = NULL;
+               }
+
+               *num_lines += 1;
+       }
+
+       free(read_buf);
+       free(new_buf);
+
+       return 0;
+}
+
+
+/*  reservoir_write_
+    Room for improvement: unroll reservoir into (up to IOV_MAX) more iov slots.
+*/
+static
+int reservoir_write_(int fd, struct reservoir_ *reservoir, char delim) {
+       ssize_t len;
+       size_t i;
+       struct iovec iov[2];
+
+       iov[1].iov_base = &delim;
+       iov[1].iov_len = sizeof delim;
+
+       assert(reservoir != NULL);
+       D_RESERVOIR(reservoir);
+
+       for (i = 0; i < reservoir->reservoir_sz; i++) {
+               if (reservoir->reservoir[i]) {
+                       iov[0].iov_base = reservoir->reservoir[i]->buf + reservoir->reservoir[i]->buf_start;
+                       iov[0].iov_len = reservoir->reservoir[i]->buf_used;
+               } else {
+                       iov[0].iov_base = NULL;
+                       iov[0].iov_len = 0;
+               }
+
+               len = writev(fd, iov, sizeof iov / sizeof *iov);
+               if (len < 0) {
+                       NOTIFY_ERROR("%s:%s", "writev", strerror(errno));
+                       return -1;
+               }
+       }
+
+       return 0;
+}
+
+#ifndef TEST
+
+int main(int argc, char *argv[]) {
+       struct reservoir_ *reservoir;
+       size_t num_lines = 0;
+       int fd;
+       int c;
+
+       while ( (c = getopt(argc, argv, "hvb:n:d:")) != EOF ) {
+               switch (c) {
+                       case 'v':
+                               options_.verbosity++;
+                               break;
+
+                       case 'b':
+                               options_.read_buf_sz = atoi(optarg);
+                               break;
+
+                       case 'd':
+                               options_.delim = *optarg;
+                               options_.delim_out = *optarg;
+                               break;
+
+                       case 'n':
+                               options_.reservoir_sz = atoi(optarg);
+                               break;
+
+                       case 'h':
+                               usage_(argv[0], 1);
+                               exit(EX_OK);
+
+                       default:
+                       usage_(argv[0], 0);
+                       exit(EX_USAGE);
+               }
+       }
+
+       if (argc - optind < 0) {
+               usage_(argv[0], 0);
+               exit(EX_USAGE);
+       }
+
+       if (rand_init_(options_.rand_file)) {
+               NOTIFY_ERROR("failed to initialize randomness source\n");
+               exit(EX_NOINPUT);
+       }
+
+       reservoir = malloc((options_.reservoir_sz * sizeof *reservoir->reservoir) + sizeof *reservoir);
+       if (reservoir == NULL) {
+               NOTIFY_ERROR("%s:%s", "calloc", strerror(errno));
+               exit(EX_OSERR);
+       }
+       reservoir->reservoir_sz = options_.reservoir_sz;
+       memset(reservoir->reservoir, 0, reservoir->reservoir_sz * sizeof *reservoir->reservoir);
+
+       if (argc - optind == 0) {
+               fd = STDIN_FILENO;
+               if (reservoir_read_(fd, options_.read_buf_sz, options_.delim, &num_lines, reservoir)) {
+                       exit(EX_SOFTWARE);
+               }
+       } else {
+               while (optind < argc) {
+                       if (strcmp("-", argv[optind]) == 0) {
+                               fd = STDIN_FILENO;
+                               if (reservoir_read_(fd, options_.read_buf_sz, options_.delim, &num_lines, reservoir)) {
+                                       exit(EX_SOFTWARE);
+                               }
+                       } else {
+                               fd = open(argv[optind], O_RDONLY);
+                               if (fd < 0) {
+                                       NOTIFY_ERROR("%s('%s'):%s", "open", argv[optind], strerror(errno));
+                                       exit(EX_NOINPUT);
+                               }
+                               if (reservoir_read_(fd, options_.read_buf_sz, options_.delim, &num_lines, reservoir)) {
+                                       exit(EX_SOFTWARE);
+                               }
+                               if (close(fd)) {
+                                       NOTIFY_ERROR("%s:%s", "close", strerror(errno));
+                                       exit(EX_OSERR);
+                               }
+                       }
+                       optind++;
+               }
+       }
+
+       if (options_.verbosity) {
+               fprintf(stderr, "%zu sample%s, out of %zu total choices\n", options_.reservoir_sz, options_.reservoir_sz > 1 ? "s" : "", num_lines);
+       }
+
+       if (reservoir_write_(STDOUT_FILENO, reservoir, options_.delim_out)) {
+               exit(EX_SOFTWARE);
+       }
+
+       exit(EX_OK);
+}
+
+#else /* TEST */
+
+void *test_suite_data;
+
+int test_suite_pre(void *suite_data) {
+       (void)suite_data;
+       if (rand_init_(NULL)) {
+               return -1;
+       }
+       return 0;
+}
+int test_suite_post(void *suite_data) {
+       (void)suite_data;
+       return 0;
+}
+
+test_t test_suite[] = {
+       { "test_buf_flense_ 1", test_buf_flense_, NULL, NULL, &test_buf_flense_data1 },
+       { "test_buf_flense_ 2", test_buf_flense_, NULL, NULL, &test_buf_flense_data2 },
+       { NULL, NULL, NULL, NULL, NULL },
+};
+
+
+#endif /* TEST */
diff --git a/test_suite.c b/test_suite.c
new file mode 100644 (file)
index 0000000..9d4ad41
--- /dev/null
@@ -0,0 +1,99 @@
+#include <stdlib.h>
+#include <stdarg.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdio.h>
+
+#define TEST
+#include "test_suite.h"
+
+static struct options_ {
+       unsigned int verbosity;
+} options_ = {
+       .verbosity = 0,
+};
+
+void test_verbose(const char *fmt, ...) {
+       va_list ap;
+
+       if (options_.verbosity < 1)
+               return;
+
+       va_start(ap, fmt);
+       vfprintf(stdout, fmt, ap);
+       va_end(ap);
+}
+
+void test_info(const char *fmt, ...) {
+       va_list ap;
+
+       va_start(ap, fmt);
+       vfprintf(stdout, fmt, ap);
+       va_end(ap);
+}
+
+void test_error(const char *fmt, ...) {
+       va_list ap;
+
+       va_start(ap, fmt);
+       vfprintf(stderr, fmt, ap);
+       va_end(ap);
+}
+
+static
+int run_test_(test_t *t) {
+       int r = t->test_fn(t->test_data, test_suite_data);
+       test_info("-- test '%s': %s --\n", t->test_name, r ? "FAILED" : "OK");
+       return r;
+}
+
+int main(int argc, char **argv) {
+       test_t *t;
+       int c;
+
+       while ( (c = getopt(argc, argv, "vl")) != EOF ) {
+               switch (c) {
+                       case 'v':
+                               options_.verbosity++;
+                               break;
+                       case 'l':
+                               test_info("available test suites:\n");
+                               for (t = test_suite; t->test_name; t++) {
+                                       test_info("\t%s\n", t->test_name);
+                               }
+                               exit(EXIT_SUCCESS);
+                       default:
+                               exit(EXIT_FAILURE);
+               }
+       }
+
+       if (test_suite_pre(test_suite_data)) {
+               test_error("%s: %s", "test_suite_pre", "FAILED");
+               exit(EXIT_FAILURE);
+       }
+
+       if (argc - optind) {
+               while (argc - optind) {
+                       for (t = test_suite; t->test_name && strcmp(argv[optind], t->test_name); t++) {}
+                       if (t->test_name == NULL) {
+                               test_error("unknown suite '%s'\n", argv[optind]);
+                               exit(EXIT_FAILURE);
+                       }
+                       if (run_test_(t))
+                               exit(EXIT_FAILURE);
+                       optind++;
+               }
+       } else {
+               for (t = test_suite; t->test_name; t++) {
+                       if (run_test_(t))
+                               exit(EXIT_FAILURE);
+               }
+       }
+
+       if (test_suite_post(test_suite_data)) {
+               test_error("%s: %s\n", "test_suite_post", "FAILED");
+               exit(EXIT_FAILURE);
+       }
+
+       exit(EXIT_SUCCESS);
+}
\ No newline at end of file
diff --git a/test_suite.h b/test_suite.h
new file mode 100644 (file)
index 0000000..09fd24d
--- /dev/null
@@ -0,0 +1,35 @@
+#ifndef TEST_SUITE_H_6XEHCUS5
+#define TEST_SUITE_H_6XEHCUS5
+
+#ifdef TEST
+
+typedef struct test {
+       const char * const test_name;
+       int (* const test_fn)(void *test_data, void *suite_data);
+       int (* const test_pre_fn)(void *test_data, void *suite_data);
+       int (* const test_post_fn)(void *test_data, void *suite_data);
+       void *test_data;
+} test_t;
+
+/* define all of these within module to test */
+extern test_t test_suite[];
+extern int test_suite_pre(void *suite_data);
+extern int test_suite_post(void *suite_data);
+extern void *test_suite_data;
+
+/* use these in tests */
+void test_verbose(const char *fmt, ...) __attribute__((format(printf, 1, 2))); /* stdout only with -v flag */
+void test_info(const char *fmt, ...) __attribute__((format(printf, 1, 2)));    /* stdout */
+void test_error(const char *fmt, ...) __attribute__((format(printf, 1, 2)));   /* stderr */
+
+#define TEST_VERBOSE(fmt,...) do { test_verbose("%s@%s:%d/%s/" fmt "\n",  "verbose", __FILE__, __LINE__, __func__, ##__VA_ARGS__); } while (0)
+#define TEST_INFO(fmt,...)    do { test_info("%s@%s:%d/%s/" fmt "\n",  "info", __FILE__, __LINE__, __func__, ##__VA_ARGS__); } while (0)
+#define TEST_ERROR(fmt,...)   do { test_error("%s@%s:%d/%s/" fmt "\n",  "error", __FILE__, __LINE__, __func__, ##__VA_ARGS__); } while (0)
+
+#else /* TEST */
+#define TEST_VERBOSE(...)
+#define TEST_INFO(...)
+#define TEST_ERROR(...)
+#endif /* TEST */
+
+#endif /* TEST_SUITE_H_6XEHCUS5 */
diff --git a/version.h b/version.h
new file mode 100644 (file)
index 0000000..4a032a2
--- /dev/null
+++ b/version.h
@@ -0,0 +1,10 @@
+#ifndef VERSION_H
+#define VERSION_H
+
+#define VERSION_MAJOR 0
+#define VERSION_MINOR 0
+#define VERSION_EXTRA_STR "prototype"
+#define VERSION_STR "0.0-prototype"
+#define VERSION_DATE  "2012-09-27"
+
+#endif
diff --git a/version.sh b/version.sh
new file mode 100755 (executable)
index 0000000..dc50674
--- /dev/null
@@ -0,0 +1,34 @@
+#!/bin/sh
+# generate new version.h
+
+if [ $# -lt 2 ]
+       then
+       echo "usage: `basename $0` <major> <minor> [<extra>]"
+       exit 1
+fi
+
+maj=`printf "%u" $1` || exit 1
+min=`printf "%u" $2` || exit 1
+shift 2
+ext="$@"
+
+str="${maj}.${min}"
+if [ -n "${ext}" ]
+       then
+       str="${str}-${ext}"
+fi
+now=`TZ=UTC date "+%Y-%m-%d"` || exit 1
+
+cat>"version.h"<<EOF
+#ifndef VERSION_H
+#define VERSION_H
+
+#define VERSION_MAJOR ${maj}
+#define VERSION_MINOR ${min}
+#define VERSION_EXTRA_STR "${ext}"
+#define VERSION_STR "${str}"
+#define VERSION_DATE  "${now}"
+
+#endif
+EOF
+