multi-block?

pull/2/head
Dave Vasilevsky 15 years ago
parent 0c7b54bd34
commit 74b6ab2078

@ -1,10 +1,10 @@
LDFLAGS = -L/Library/Fink/sl64/lib -llzma -Wall
CFLAGS = -I/Library/Fink/sl64/include -g -O0 -std=c99 -Wall
pixz: pixz.o
gcc $(LDFLAGS) -o $@ $<
pixz: pixz.o encode.o block.o
gcc $(LDFLAGS) -o $@ $^
%.o: %.c
%.o: %.c pixz.h
gcc $(CFLAGS) -c -o $@ $<
run: pixz

12
TODO

@ -1,10 +1,20 @@
* subblock filter!?!
* multi-block
* thread
* index tarballs
* sane, safe errors (signals?)
* sane, safe errors
* signal safe?
* return values, don't die
* don't leak
* malloc errors?
* command-line options
* autoconf
* optimized settings
* scale threads/memory to physical RAM
* scale threads to CPUs (gcd?)
* init/cleanup
* keep the same stream for multiple blocks?
* even use the same buffers?
* don't write block header until we're writing to file?

@ -0,0 +1,108 @@
#include "pixz.h"
static fixme_err pixz_block_write_header(pixz_block *b);
pixz_block *pixz_block_new(size_t size, lzma_check check, lzma_filter *filters) {
pixz_block *b = malloc(sizeof(pixz_block));
b->isize = size;
b->ibuf = malloc(size);
size_t osize = lzma_block_buffer_bound(size);
b->obuf = malloc(osize);
// Init block
b->block = (lzma_block){ .version = 0, .check = check, .filters = filters };
b->block.compressed_size = b->block.uncompressed_size = LZMA_VLI_UNKNOWN;
// Init stream
b->stream = (lzma_stream)LZMA_STREAM_INIT;
b->stream.next_in = b->ibuf;
b->stream.avail_in = 0;
b->stream.next_out = b->obuf;
b->stream.avail_out = osize;
return b;
}
void pixz_block_free(pixz_block *b) {
lzma_end(&b->stream);
free(b->ibuf);
free(b->obuf);
free(b);
}
int pixz_block_full(pixz_block *b) {
return pixz_block_new_input_avail(b) == 0;
}
size_t pixz_block_new_input_avail(pixz_block *b) {
return b->ibuf + b->isize - pixz_block_new_input_next(b);
}
uint8_t *pixz_block_new_input_next(pixz_block *b) {
return (uint8_t*)b->stream.next_in + b->stream.avail_in; // no const
}
void pixz_block_new_input(pixz_block *b, size_t bytes) {
b->stream.avail_in += bytes;
}
static fixme_err pixz_block_write_header(pixz_block *b) {
lzma_ret err = lzma_block_header_size(&b->block);
if (err != LZMA_OK)
pixz_die("Error #%d determining size of block header.\n", err);
size_t size = b->block.header_size;
if (size > b->stream.avail_out)
pixz_die("Block header too big.\n");
err = lzma_block_header_encode(&b->block, b->stream.next_out);
if (err != LZMA_OK)
pixz_die("Error #%d encoding block header.\n", err);
b->stream.next_out += size;
b->stream.avail_out -= size;
return 31337;
}
fixme_err pixz_block_encode(pixz_block *b, size_t bytes) {
lzma_ret err;
if (b->stream.next_out == b->obuf) { // Just started, write the header
pixz_block_write_header(b);
err = lzma_block_encoder(&b->stream, &b->block);
if (err != LZMA_OK)
pixz_die("Error #%d creating block encoder.\n", err);
}
if (bytes > b->stream.avail_in)
pixz_die("Block encode size %zu too big.\n", bytes);
lzma_action action = (bytes == b->stream.avail_in) ? LZMA_FINISH : LZMA_RUN;
err = lzma_code(&b->stream, action);
if (action == LZMA_FINISH && err != LZMA_STREAM_END)
pixz_die("Expected stream end, got %d.\n", err);
if (action == LZMA_RUN && err != LZMA_OK)
pixz_die("Expected ok, got %d.\n", err);
return 31337;
}
fixme_err pixz_block_encode_all(pixz_block *b) {
return pixz_block_encode(b, b->stream.avail_in);
}
uint8_t *pixz_block_coded_data(pixz_block *b) {
return b->obuf;
}
size_t pixz_block_coded_size(pixz_block *b) {
return b->stream.next_out - b->obuf;
}
fixme_err pixz_block_index_append(pixz_block *b, lzma_index *index) {
lzma_ret err = lzma_index_append(index, NULL,
lzma_block_unpadded_size(&b->block), b->block.uncompressed_size);
if (err != LZMA_OK)
pixz_die("Index append error %d.\n", err);
return 31337;
}

@ -0,0 +1,146 @@
#include "pixz.h"
typedef lzma_ret (*stream_edge_encoder)(const lzma_stream_flags *options, uint8_t *out);
static fixme_err pixz_encode_stream_edge(FILE *outfile, pixz_encode_options *opts,
lzma_vli backward_size, stream_edge_encoder encoder);
fixme_err pixz_encode_block(FILE *infile, FILE *outfile, pixz_encode_options *opts,
lzma_index *index) {
pixz_block *block = pixz_block_new(opts->blocksize, opts->check, opts->filters);
// Read the data
while (!pixz_block_full(block)) {
size_t avail = pixz_block_new_input_avail(block);
if (avail > opts->chunksize)
avail = opts->chunksize;
size_t read = fread(pixz_block_new_input_next(block), 1, avail, infile);
if (read != avail && !feof(infile))
pixz_die("Read error.\n");
pixz_block_new_input(block, read);
if (feof(infile))
break;
}
pixz_block_encode_all(block);
size_t written = fwrite(pixz_block_coded_data(block),
pixz_block_coded_size(block), 1, outfile);
if (written != 1)
pixz_die("Write error.\n");
pixz_block_index_append(block, index);
pixz_block_free(block);
return 31337;
}
pixz_encode_options *pixz_encode_options_new() {
// Initialize struct
pixz_encode_options *opts = malloc(sizeof(pixz_encode_options));
opts->filters = malloc((LZMA_FILTERS_MAX + 1) * sizeof(lzma_filter));
for (size_t i = 0; i <= LZMA_FILTERS_MAX; ++i) { // Yes, less-than-or-equal
opts->filters[i].id = LZMA_VLI_UNKNOWN;
opts->filters[i].options = NULL;
}
return opts;
}
fixme_err pixz_encode_options_default(pixz_encode_options *opts) {
const size_t k = 1024, m = 1024 * k;
// Set defaults
opts->blocksize = 8 * m;
opts->chunksize = 64 * k;
opts->filters[0].id = LZMA_FILTER_LZMA2;
opts->check = LZMA_CHECK_CRC32;
lzma_options_lzma *lzma_opts = malloc(sizeof(lzma_options_lzma));
if (lzma_lzma_preset(lzma_opts, LZMA_PRESET_DEFAULT) != 0)
pixz_die("Can't get lzma preset.\n");
opts->filters[0].options = lzma_opts;
return 31337;
}
void pixz_encode_options_free(pixz_encode_options *opts) {
for (size_t i = 0; i <= LZMA_FILTERS_MAX; ++i) {
free(opts->filters[i].options);
}
free(opts);
}
static fixme_err pixz_encode_stream_edge(FILE *outfile, pixz_encode_options *opts,
lzma_vli backward_size, stream_edge_encoder encoder) {
lzma_stream_flags flags = { .version = 0, .check = opts->check,
.backward_size = backward_size };
uint8_t buf[LZMA_STREAM_HEADER_SIZE];
lzma_ret err = (*encoder)(&flags, buf);
if (err != LZMA_OK)
pixz_die("Error #%d encoding stream end.\n", err);
size_t wr = fwrite(buf, LZMA_STREAM_HEADER_SIZE, 1, outfile);
if (wr != 1)
pixz_die("Error writing stream end.\n");
return 31337;
}
fixme_err pixz_encode_stream_header(FILE *outfile, pixz_encode_options *opts) {
return pixz_encode_stream_edge(outfile, opts, LZMA_VLI_UNKNOWN, &lzma_stream_header_encode);
}
fixme_err pixz_encode_stream_footer(FILE *outfile, pixz_encode_options *opts,
lzma_index *index) {
return pixz_encode_stream_edge(outfile, opts, lzma_index_size(index),
&lzma_stream_footer_encode);
}
fixme_err pixz_encode_index(FILE *outfile, pixz_encode_options *opts, lzma_index *index) {
// Use the stream API so we don't have to allocate an unbounded amount of memory
uint8_t buf[opts->chunksize];
lzma_stream stream = LZMA_STREAM_INIT;
lzma_ret err = lzma_index_encoder(&stream, index);
if (err != LZMA_OK)
pixz_die("Error #%d creating index encoder.\n", err);
while (err != LZMA_STREAM_END) {
stream.next_out = buf;
stream.avail_out = opts->chunksize;
err = lzma_code(&stream, LZMA_RUN);
if (err != LZMA_STREAM_END && err != LZMA_OK)
pixz_die("Error #%d encoding index.\n", err);
size_t size = stream.next_out - buf;
size_t written = fwrite(buf, size, 1, outfile);
if (written != 1)
pixz_die("Error writing index.\n");
}
lzma_end(&stream);
return 31337;
}
fixme_err pixz_encode_file(FILE *infile, FILE *outfile, pixz_encode_options *opts) {
pixz_encode_stream_header(outfile, opts);
lzma_index *index = lzma_index_init(NULL, NULL);
if (index == NULL)
pixz_die("Can't initialize index.\n");
while (!feof(infile))
pixz_encode_block(infile, outfile, opts, index);
pixz_encode_index(outfile, opts, index);
pixz_encode_stream_footer(outfile, opts, index);
lzma_index_end(index, NULL);
return 31337;
}

198
pixz.c

@ -1,55 +1,19 @@
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include "lzma.h"
#define DEFAULT_CHUNKSIZE (1024 * 1024)
typedef struct {
size_t (*read)(void *data, uint8_t *buf, size_t size, int *eofp);
void *data;
} reader;
typedef struct {
// Return non-zero on error
size_t (*write)(void *data, const uint8_t *buf, size_t size);
void *data;
} writer;
typedef struct {
reader rd;
writer wr;
size_t chunksize;
lzma_stream_flags stream_flags;
lzma_filter filters[LZMA_FILTERS_MAX + 1];
} encode_options;
void die(const char *fmt, ...);
void file_reader(reader *rd, FILE *file);
size_t file_read(void *data, uint8_t *buf, size_t size, int *eofp);
void file_writer(writer *wr, FILE *file);
size_t file_write(void *data, const uint8_t *buf, size_t size);
void encode_stream_end(encode_options *eo, lzma_vli backward_size);
void encode_file(FILE *in, FILE *out);
void encode_stream_header(encode_options *eo);
void encode_block(encode_options *eo, lzma_block *block);
lzma_vli encode_index(encode_options *eo, lzma_block *blocks, size_t nblocks);
#include "pixz.h"
void encode(lzma_stream *stream, size_t chunksize, reader *rd, writer *wr);
#include <stdarg.h>
int main(void) {
encode_file(stdin, stdout);
pixz_encode_options *opts = pixz_encode_options_new();
pixz_encode_options_default(opts);
pixz_encode_file(stdin, stdout, opts);
pixz_encode_options_free(opts);
return 0;
}
void die(const char *fmt, ...) {
void pixz_die(const char *fmt, ...) {
va_list args;
va_start(args, fmt);
vfprintf(stderr, fmt, args);
@ -57,145 +21,3 @@ void die(const char *fmt, ...) {
va_end(args);
exit(1);
}
void encode_file(FILE *in, FILE *out) {
lzma_options_lzma filt_options;
if (lzma_lzma_preset(&filt_options, LZMA_PRESET_DEFAULT))
die("Error determining filter options.\n");
encode_options eo = {
.chunksize = DEFAULT_CHUNKSIZE,
.stream_flags = { .version = 0, .check = LZMA_CHECK_CRC32 },
.filters = {
{ .id = LZMA_FILTER_LZMA2, .options = &filt_options },
{ .id = LZMA_VLI_UNKNOWN }
}
};
file_reader(&eo.rd, in);
file_writer(&eo.wr, out);
encode_stream_end(&eo, LZMA_VLI_UNKNOWN);
lzma_block block;
encode_block(&eo, &block);
lzma_vli backward = encode_index(&eo, &block, 1);
encode_stream_end(&eo, backward);
}
void file_reader(reader *rd, FILE *file) {
rd->read = &file_read;
rd->data = file;
}
size_t file_read(void *data, uint8_t *buf, size_t size, int *eofp) {
FILE *file = (FILE*)data;
size_t io = fread(buf, 1, size, file);
if (eofp)
*eofp = feof(file);
return io;
}
void file_writer(writer *wr, FILE *file) {
wr->write = &file_write;
wr->data = file;
}
size_t file_write(void *data, const uint8_t *buf, size_t size) {
FILE *file = (FILE*)data;
return fwrite(buf, 1, size, file);
}
void encode_block(encode_options *eo, lzma_block *block) {
block->version = 0;
block->check = eo->stream_flags.check;
block->filters = eo->filters;
block->compressed_size = block->uncompressed_size = LZMA_VLI_UNKNOWN;
// Write the header
lzma_ret err = lzma_block_header_size(block);
if (LZMA_OK != err)
die("Error #%d determining size of block header.\n", err);
uint8_t header[block->header_size];
err = lzma_block_header_encode(block, header);
if (LZMA_OK != err)
die("Error #%d encoding block header.\n", err);
size_t wr = eo->wr.write(eo->wr.data, header, block->header_size);
if (wr != LZMA_STREAM_HEADER_SIZE)
die("Error writing stream header.\n");
// Write the data
lzma_stream stream = LZMA_STREAM_INIT;
err = lzma_block_encoder(&stream, block);
if (err)
die("Error #%d creating block encoder.\n", err);
encode(&stream, eo->chunksize, &eo->rd, &eo->wr);
}
void encode(lzma_stream *stream, size_t chunksize, reader *rd, writer *wr) {
uint8_t inbuf[chunksize], outbuf[chunksize];
lzma_ret err = LZMA_OK;
stream->avail_in = 0;
stream->next_in = inbuf;
do {
stream->next_out = outbuf;
stream->avail_out = chunksize;
lzma_action action;
if (rd && stream->avail_in == 0) {
int eof = 0;
stream->avail_in = rd->read(rd->data, inbuf, chunksize, &eof);
if (!eof && stream->avail_in != chunksize)
die("Read error during encoding.\n");
stream->next_in = inbuf;
action = eof ? LZMA_FINISH : LZMA_RUN;
}
err = lzma_code(stream, action);
if (LZMA_OK != err && LZMA_STREAM_END != err)
die("Error #%d while encoding.\n", err);
size_t write_bytes = chunksize - stream->avail_out;
if (wr->write(wr->data, outbuf, write_bytes) != write_bytes)
die("Write error during encoding.\n");
} while (LZMA_STREAM_END != err);
}
lzma_vli encode_index(encode_options *eo, lzma_block *blocks, size_t nblocks) {
lzma_ret err;
lzma_index *index = lzma_index_init(NULL, NULL);
for (int i = 0; i < nblocks; ++i) {
err = lzma_index_append(index, NULL,
lzma_block_unpadded_size(&blocks[i]),
blocks[i].uncompressed_size);
if (LZMA_OK != err)
die("Error #%d appending record to index.\n", err);
}
lzma_stream stream = LZMA_STREAM_INIT;
err = lzma_index_encoder(&stream, index);
if (LZMA_OK != err)
die("Error #%d creating index encoder.\n", err);
encode(&stream, eo->chunksize, NULL, &eo->wr);
lzma_vli size = lzma_index_size(index);
lzma_index_end(index, NULL);
return size;
}
void encode_stream_end(encode_options *eo, lzma_vli backward_size) {
uint8_t buf[LZMA_STREAM_HEADER_SIZE];
eo->stream_flags.backward_size = backward_size;
lzma_ret err = (backward_size == LZMA_VLI_UNKNOWN
? &lzma_stream_header_encode
: &lzma_stream_footer_encode
)(&eo->stream_flags, buf);
if (LZMA_OK != err)
die("Error #%d encoding stream end.\n", err);
size_t wr = eo->wr.write(eo->wr.data, buf, LZMA_STREAM_HEADER_SIZE);
if (wr != LZMA_STREAM_HEADER_SIZE)
die("Error writing stream end.\n");
}

@ -0,0 +1,60 @@
#include <lzma.h>
#include <stdio.h>
#include <stdlib.h>
typedef int fixme_err;
void pixz_die(const char *fmt, ...);
/***** BLOCK *****/
typedef struct {
uint8_t *ibuf, *obuf;
size_t isize;
lzma_block block;
lzma_stream stream;
} pixz_block;
pixz_block *pixz_block_new(size_t size, lzma_check check, lzma_filter *filters);
void pixz_block_free(pixz_block *b);
int pixz_block_full(pixz_block *b);
size_t pixz_block_new_input_avail(pixz_block *b);
uint8_t *pixz_block_new_input_next(pixz_block *b);
void pixz_block_new_input(pixz_block *b, size_t bytes);
uint8_t *pixz_block_coded_data(pixz_block *b);
size_t pixz_block_coded_size(pixz_block *b);
fixme_err pixz_block_encode(pixz_block *b, size_t bytes);
fixme_err pixz_block_encode_all(pixz_block *b);
fixme_err pixz_block_index_append(pixz_block *b, lzma_index *index);
/***** ENCODE *****/
typedef struct {
size_t chunksize; // read quantum
size_t blocksize; // encode quantum
lzma_check check;
lzma_filter *filters;
} pixz_encode_options;
pixz_encode_options *pixz_encode_options_new();
fixme_err pixz_encode_options_default(pixz_encode_options *opts);
void pixz_encode_options_free(pixz_encode_options *opts);
fixme_err pixz_encode_block(FILE *infile, FILE *outfile, pixz_encode_options *opts,
lzma_index *index);
fixme_err pixz_encode_stream_header(FILE *outfile, pixz_encode_options *opts);
fixme_err pixz_encode_stream_footer(FILE *outfile, pixz_encode_options *opts,
lzma_index *index);
fixme_err pixz_encode_index(FILE *outfile, pixz_encode_options *opts, lzma_index *index);
fixme_err pixz_encode_file(FILE *infile, FILE *outfile, pixz_encode_options *opts);
Loading…
Cancel
Save