Unified textual style in comments.
Fixed wrong option identifier in usage_().
Added _del() to match _new() call in module interfaces.
Numeric command-line arguments now do basic error-checking, but still do no sane range-checking yet.
Added simple statistical test to 'make check' target.
echo Checking $${t};\
./$${t} > /dev/null || echo -- $${t} FAILED --;\
done
+ $(TEST_DIR)/validate-statistics.sh
$(TEST_DIR)/%_test.o: %.c
$(CC) $(CFLAGS) $(CPPFLAGS) -DTEST -c -o $@ $<
#include "notify.h"
#include "test_suite.h"
+/* If these ever need to be overridden... */
+void *(*buf_malloc_)(size_t) = malloc;
+void *(*buf_realloc_)(void *, size_t) = realloc;
+void (*buf_free_)(void *) = free;
+
inline
buf_t buf_new(size_t sz) {
- buf_t buf = malloc(sz + sizeof *buf);
+ buf_t buf = buf_malloc_(sz + sizeof *buf);
if (buf) {
buf->buf_sz = sz;
buf->buf_start = buf->buf_used = 0;
return buf;
}
+inline
+void buf_del(buf_t *pbuf) {
+ if (pbuf) {
+ if (*pbuf) {
+ buf_free_(*pbuf);
+ *pbuf = NULL;
+ }
+ }
+}
+
inline
void buf_rebase(buf_t buf) {
if (buf->buf_start == 0)
return 0;
new_sz = (*pbuf)->buf_used + roomfor;
- tmp_ptr = realloc(*pbuf, new_sz + sizeof **pbuf);
+ tmp_ptr = buf_realloc_(*pbuf, new_sz + sizeof **pbuf);
if (tmp_ptr == NULL) {
NOTIFY_ERROR("%s:%s", "realloc", strerror(errno));
return -1;
TEST_INFO("flensed: '%.*s'", (int)dst->buf_used, dst->buf + dst->buf_start);
memset(dst, 0, dst->buf_sz + sizeof *dst);
- free(dst);
- dst = NULL;
+ buf_del(&dst);
next_result++;
}
TEST_INFO("remains: '%.*s'", (int)src->buf_used, src->buf + src->buf_start);
memset(src, 0, src->buf_sz + sizeof *src);
- free(src);
- src = NULL;
+ buf_del(&src);
return retval;
}
#include "notify.h"
#endif /* NDEBUG */
-/* A simple sliding-window byte buffer. */
+/* A simple sliding-window byte buffer. */
typedef struct buf_ {
size_t buf_sz;
#define BUF_ROOM(__b__) ( (__b__)->buf_sz - ( (__b__)->buf_start + (__b__)->buf_used ) )
#ifndef NDEBUG
+/* For debugging, dump a buffer. */
#define D_BUF(__pre__,__b__,...) do {\
if ( (__b__) == NULL )\
NOTIFY_DEBUG(__pre__ "buf:%p", ## __VA_ARGS__, (__b__));\
#define D_BUF(...)
#endif /* NDEBUG */
-/* buf_new
- Allocate and return a new buf_t capable of holding #sz bytes.
+/* buf_new
+ Allocate and return a new buf_t capable of holding #sz bytes.
*/
buf_t buf_new(size_t sz);
-/* buf_rebase
- Reclaim any free space from the start of #buf, preserving active content.
+/* buf_del
+ Deallocate the buf_t pointed to by #pbuf.
+*/
+void buf_del(buf_t *pbuf);
+
+/* buf_rebase
+ Reclaim any free space from the start of #buf, preserving active content.
*/
void buf_rebase(buf_t buf);
-/* buf_makeroom
- Assures that the buf pointed to by #pbuf has at space to hold #roomfor more
- bytes.
+/* buf_makeroom
+ Assures that the buf pointed to by #pbuf has at space to hold #roomfor
+ more bytes.
*/
int buf_makeroom(buf_t *pbuf, size_t roomfor);
-/* buf_range_dup_or_append
- Starting at the #src_offset byte of #src, appends the following #n bytes to
- the buffer pointed to by #dstp, which will be re/allocated if needed.
+/* buf_range_dup_or_append
+ Starting at the #src_offset byte of #src, appends the following #n
+ bytes to the buffer pointed to by #dstp, which will be re/allocated if
+ needed.
*/
int buf_range_dup_or_append(buf_t src, size_t src_offset, size_t n, buf_t *pdst);
-/* buf_flense
+/* buf_flense
Starting after #src_offset characters, scan through the buffer pointed
to by #psrc, stopping at the first byte matching #delimiter, whereupon, if
#pdst is not NULL, all the bytes previous to #delimiter are appended onto
#include "randomness.h"
#include "test_suite.h"
+/* If these ever need to be overridden.. */
+void *(*reservoir_malloc_)(size_t) = malloc;
+void *(*reservoir_realloc_)(void *, size_t) = realloc;
+void (*reservoir_free_)(void *) = free;
+
reservoir_t reservoir_new(size_t sz) {
reservoir_t reservoir;
- reservoir = malloc((sz * sizeof *reservoir->reservoir) + sizeof *reservoir);
+ reservoir = reservoir_malloc_((sz * sizeof *reservoir->reservoir) + sizeof *reservoir);
if (reservoir == NULL) {
NOTIFY_ERROR("%s:%s", "malloc", strerror(errno));
return NULL;
return reservoir;
}
+void reservoir_del(reservoir_t *preservoir) {
+ if (preservoir) {
+ if (*preservoir) {
+ while ((*preservoir)->reservoir_used) {
+ (*preservoir)->reservoir_used -= 1;
+ buf_del(&((*preservoir)->reservoir[(*preservoir)->reservoir_used]));
+ }
+ reservoir_free_(*preservoir);
+ *preservoir = NULL;
+ }
+ }
+}
+
int reservoir_grow(reservoir_t *preservoir, size_t growby) {
assert(preservoir != NULL);
if (growby) {
- void *tmp_ptr = realloc(*preservoir, (((*preservoir)->reservoir_sz + growby) * sizeof *(*preservoir)->reservoir) + sizeof **preservoir);
+ void *tmp_ptr = reservoir_realloc_(*preservoir, (((*preservoir)->reservoir_sz + growby) * sizeof *(*preservoir)->reservoir) + sizeof **preservoir);
if (tmp_ptr == NULL) {
NOTIFY_ERROR("%s:%s", "realloc", strerror(errno));
return -1;
if (old_buf != NULL) {
D_BUF("FREEING ", old_buf);
memset(old_buf, 0, old_buf->buf_sz + sizeof *old_buf);
- free(old_buf);
+ buf_del(&old_buf);
+ assert(old_buf == NULL);
}
D_RESERVOIR(reservoir);
#include "notify.h"
#endif /* NDEBUG */
-/* A pool of buf_t */
+/*
+ A size-limited pool of buf_t entries. When the reservoir is filled with
+ more entries than there is room for, some spill away and are lost.
+*/
typedef struct reservoir_ {
size_t reservoir_sz; /* allocated slots */
} *reservoir_t;
#ifndef NDEBUG
+/* For debugging, dump an entire reservoir's contents. */
#define D_RESERVOIR(__r__) do {\
size_t i;\
NOTIFY_DEBUG("reservoir:%p sz: %zu used:%zu tally:%zu", (__r__), (__r__)->reservoir_sz, (__r__)->reservoir_used, (__r__)->reservoir_tally);\
#endif /* NDEBUG */
/* reservoir_new
- Allocate and return a new reservoir capable of holding #sz bufs.
+ Allocate and return a new reservoir capable of holding #sz buf_t
+ entries.
*/
reservoir_t reservoir_new(size_t sz);
+/* reservoir_del
+ Deallocate the reservoir pointed to by #preservoir.
+*/
+void reservoir_del(reservoir_t *preservoir);
+
/* reservoir_grow
- Increase the storage capacity of the reservoir pointed to by #preservoir
- by #growby bufs.
+ Increase the storage capacity of the reservoir pointed to by
+ #preservoir by #growby entries.
*/
int reservoir_grow(reservoir_t *preservoir, size_t growby);
/* reservoir_remember
- Remember #buf, forgetting another buf at random if the reservoir
- pointed to by #preservoir is already full to capacity.
+ Remember #buf, forgetting (and freeing) another buf_t chosen at random
+ if the reservoir pointed to by #preservoir is already full to capacity.
*/
void reservoir_remember(reservoir_t reservoir, buf_t buf);
/* reservoir_write
- Write the contents of the bufs within #reservoir to #fd, each with a
- trailing #delim.
+ Write the contents of the buf_t entries within #reservoir to #fd, each
+ with a trailing #delim character.
*/
int reservoir_write(int fd, reservoir_t reservoir, char delim);
/* reservoir_write_meta
- Write metadata of #reservoir and #samples to fd.
+ Write metadata about #reservoir and #samples to fd.
*/
int reservoir_write_meta(int fd, reservoir_t reservoir, unsigned long samples, char delim);
.status_fd = STDERR_FILENO,
};
-/* #status_requested_ will be set whenever a view of the current sample
- reservoir is desired.
+/* #status_requested_
+ This will be set whenever a snapshot of the current state of the sample
+ reservoir is desired. Typically this will occur on a signal.
*/
static int status_requested_ = 0;
"\t-n <num> -- returns <num> samples [default: %zu]\n"
"\t-d <delim> -- use <delim> as input and output delimiter [default: '\\%03hho']\n"
"\t-r <file> -- read randomness from <file> [default: '%s']\n"
- "\t-b <bytes> -- set input buffer size [default: %zu]\n"
+ "\t-b <bytes> -- input buffer size [default: %zu]\n"
"\t-i <num> -- grow reservoir by 1 for every <num> input samples (0 inhibits behavior) [default: %lu]\n"
- "\t-h <num> -- double the reservoir growth interval every <num> input samples (0 inhibits behavior) [default: %lu]\n "
+ "\t-j <num> -- double the reservoir growth interval every <num> input samples (0 inhibits behavior) [default: %lu]\n "
"\t-s <file> -- USR1 signals will write reservoir contents to <file> rather than stderr (has no effect on normal output to stdout upon input EOF)\n"
"\t-v -- increase verbosity\n"
"\t-h -- this screen\n",
status_requested_ = 1;
}
-/* accumulate_input_
- Read (up to #read_block_sz bytes at a time) from #fd (until EOF) into an
- accumulator buffer. For each #delimiter character found in what was just
- read, occasionally remember the preceeding characters.
+/* accumulate_input_
+ Read (up to #read_block_sz bytes at a time, or until EOF) from #fd into
+ an accumulator buffer. For each #delimiter character found in what was
+ just read, occasionally remember the group of preceeding characters.
+ This behavioral logic lives here, rather than being contained in the
+ reservoir module, simply because it was easier to reduce the number of
+ allocations by handling it here.
*/
static
int accumulate_input_(int fd, size_t read_block_sz, int delimiter, unsigned long *psamples, reservoir_t *preservoir) {
for (;;) {
NOTIFY_DEBUG("read loop\n\n");
- /* make sure there's enough room in our input buffer for a full read() */
+ /*
+ Make sure there's enough room in our input buffer to append a
+ full read() of #read_block_sz onto the end.
+ */
if (buf_makeroom(&read_buf, read_block_sz)) {
- free(read_buf);
- free(new_buf);
+ int e = errno;
+ buf_del(&read_buf);
+ buf_del(&new_buf);
+ errno = e;
return -1;
}
do {
len = read(fd, read_buf->buf + read_buf->buf_start + read_buf->buf_used, BUF_ROOM(read_buf));
- /* a signal may have interrupted read(), deal with that before
- doing anything else */
+ /*
+ A signal may have interrupted read(), deal with that before
+ doing anything else. Status output is best handled here anyhow,
+ due to the naturally quiescent state of things, and also due to
+ not being about to finish and output the final state.
+ */
if (status_requested_) {
status_requested_ = 0;
NOTIFY_DEBUG("dumping reservoir due to signal");
} while (len == -1 && (errno == EINTR || errno == EAGAIN));
if (len < 0) {
NOTIFY_ERROR("%s:%s", "read", strerror(errno));
- free(read_buf);
- free(new_buf);
+ buf_del(&read_buf);
+ buf_del(&new_buf);
return -1;
}
if (len == 0) {
NOTIFY_DEBUG("len:%zd", len);
D_BUF("read_buf: ", read_buf);
+ /*
+ Accumulator has been filled, pare and process.
+ */
+
while (len > 0) {
ssize_t len_flensed;
grow_count = 0;
if (reservoir_grow(preservoir, 1)) {
NOTIFY_ERROR("failed to increase reservoir size");
- free(read_buf);
+ buf_del(&read_buf);
return -1;
}
}
NOTIFY_DEBUG("next buffer will be remembered..");
new_buf = buf_new(0);
if (new_buf == NULL) {
- free(read_buf);
+ buf_del(&read_buf);
return -1;
}
} else {
len_flensed = buf_flense(&read_buf, bytes_scanned, delimiter, new_buf ? &new_buf : NULL);
if (len_flensed < 0) {
- free(read_buf);
- free(new_buf);
+ buf_del(&read_buf);
+ buf_del(&new_buf);
return -1;
}
if (len_flensed == 0) {
*psamples += 1;
}
- free(read_buf);
- free(new_buf);
+ buf_del(&read_buf);
+ buf_del(&new_buf);
return 0;
}
+/* validate_strtoul_
+ Wrap strtoul and check for success.
+*/
+static
+unsigned long validate_strtoul_(const char *str, unsigned int *invalid) {
+ char *ep;
+ unsigned long num;
+
+ errno = 0;
+ num = strtoul(str, &ep, 0);
+ if (errno
+ || ! (optarg && *optarg != '\0' && *ep == '\0') ) {
+ *invalid += 1;
+ NOTIFY_ERROR("could not parse '%s' as a number: %s", str, strerror(errno));
+ }
+ return num;
+}
int main(int argc, char *argv[]) {
struct sigaction sa;
char *status_filename = NULL;
reservoir_t reservoir;
unsigned long num_samples = 0;
+ unsigned int options_error = 0;
int c;
while ( (c = getopt(argc, argv, "hvb:d:D:i:j:s:n:r:")) != EOF ) {
break;
case 'b':
- options_.read_buf_sz = strtoul(optarg, NULL, 0);
- /* XXX: validate */
+ options_.read_buf_sz = validate_strtoul_(optarg, &options_error);
break;
case 'd':
break;
case 'i':
- options_.reservoir_grow_per = strtoul(optarg, NULL, 0);
+ options_.reservoir_grow_per = validate_strtoul_(optarg, &options_error);
break;
case 'j':
- options_.reservoir_grow_double_per = strtoul(optarg, NULL, 0);
+ options_.reservoir_grow_double_per = validate_strtoul_(optarg, &options_error);
break;
case 's':
break;
case 'n':
- options_.reservoir_sz = strtoul(optarg, NULL, 0);
+ options_.reservoir_sz = validate_strtoul_(optarg, &options_error);
break;
case 'r':
}
}
+ if (options_error) {
+ exit(EX_USAGE);
+ }
+
#if 0
/* zero-or-more arguments required */
/* if that ever changes... */
--- /dev/null
+#!/bin/bash
+#
+
+# Collects and counts the output of multiple invocations over the same input stream.
+# Counts ought to be roughly the same, based on quality of random source.
+
+invoke_n() {
+ seq $1 | ./reservoir_sample -r /dev/random 2>/dev/null
+}
+
+invoke_n_i() {
+ local i=0
+ while [ $i -lt $2 ]
+ do
+ invoke_n $1
+ ((i++))
+ local note=`expr $i % 1000`
+ if [ $note -eq 0 ]
+ then
+ echo "... iteration $i of $2" 1>&2
+ fi
+ done
+}
+
+tally_n_i() {
+ invoke_n_i $1 $2 | sort -n | uniq -c
+}
+
+tally_n_i 20 10000