factor out indexing

pull/2/head
Dave Vasilevsky 15 years ago
parent 69dc231f72
commit 64ee987eb8

@ -16,11 +16,10 @@ pixz: pixz.o encode.o block.o util.o
pixzlist: pixzlist.o
$(LD) $@ $^ -llzma
pixztar: tar.o util.o
pixztar: tar.o util.o index.o
$(LD) $@ $^ -larchive
run: pixz
time ./$< < test.in > test.out
@md5sum test.in

@ -0,0 +1,72 @@
#include "pixz.h"
#include <string.h>
static bool pixz_index_is_prefix(const char *name);
static void pixz_index_add_record(pixz_index *i, size_t offset, const char *name);
pixz_index *pixz_index_new(void) {
pixz_index *i = malloc(sizeof(pixz_index));
i->first = NULL;
i->last = NULL;
i->have_last_offset = false;
return i;
}
void pixz_index_free(pixz_index *i) {
for (pixz_index_record *rec = i->first; rec; rec = rec->next) {
free(rec->name);
free(rec);
}
free(i);
}
static void pixz_index_add_record(pixz_index *i, size_t offset, const char *name) {
pixz_index_record *rec = malloc(sizeof(pixz_index_record));
rec->next = NULL;
rec->name = name ? strdup(name) : NULL;
rec->offset = offset;
if (!i->first)
i->first = rec;
if (i->last)
i->last->next = rec;
i->last = rec;
}
void pixz_index_add(pixz_index *i, size_t offset, const char *name) {
if (pixz_index_is_prefix(name)) {
if (!i->have_last_offset)
i->last_offset = offset;
i->have_last_offset = true;
return;
}
pixz_index_add_record(i, i->have_last_offset ? i->last_offset : offset, name);
i->have_last_offset = false;
}
static bool pixz_index_is_prefix(const char *name) {
// Unfortunately, this is the only way I can think of to identify
// copyfile data.
// basename(3) is not thread-safe
size_t i = strlen(name);
while (i != 0 && name[i - 1] != '/')
--i;
return strncmp(name + i, "._", 2) == 0;
}
void pixz_index_finish(pixz_index *i, size_t offset) {
pixz_index_add_record(i, offset, NULL);
}
void pixz_index_dump(pixz_index *i, FILE *out) {
pixz_index_record *rec;
for (rec = i->first; rec && rec->name; rec = rec->next) {
fprintf(out, "%12zu %s\n", rec->offset, rec->name);
}
fprintf(out, "Total: %zu\n", rec->offset);
}

@ -2,6 +2,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
typedef int fixme_err;
@ -58,3 +59,29 @@ fixme_err pixz_encode_stream_footer(FILE *outfile, pixz_encode_options *opts,
fixme_err pixz_encode_index(FILE *outfile, pixz_encode_options *opts, lzma_index *index);
fixme_err pixz_encode_file(FILE *infile, FILE *outfile, pixz_encode_options *opts);
/***** INDEX *****/
typedef struct pixz_index_record pixz_index_record;
struct pixz_index_record {
size_t offset;
char *name;
pixz_index_record *next;
};
typedef struct {
pixz_index_record *first;
pixz_index_record *last;
size_t last_offset;
bool have_last_offset;
} pixz_index;
pixz_index *pixz_index_new(void);
void pixz_index_free(pixz_index *i);
void pixz_index_add(pixz_index *i, size_t offset, const char *name);
void pixz_index_finish(pixz_index *i, size_t offset);
void pixz_index_dump(pixz_index *i, FILE *out);

122
tar.c

@ -4,100 +4,70 @@
#include <archive_entry.h>
#include <sys/errno.h>
#include <string.h>
#include <libgen.h>
// Tar uses records of 512 bytes
#define CHUNKSIZE 512
typedef struct pixz_tar_index_record pixz_tar_index_record;
struct pixz_tar_index_record {
size_t offset;
char *name;
pixz_tar_index_record *next;
};
typedef struct {
pixz_tar_index_record *first;
pixz_tar_index_record *last;
} pixz_tar_index;
static pixz_tar_index *pixz_tar_index_new(void);
static void pixz_tar_index_add(pixz_tar_index *i, size_t offset, const char *name);
static void pixz_tar_index_dump(pixz_tar_index *i, FILE *out);
static void pixz_tar_index_free(pixz_tar_index *i);
static int pixz_tar_index_is_metadata(struct archive_entry *entry);
typedef struct {
FILE *file;
uint8_t buf[CHUNKSIZE];
} pixz_tar_input;
} pixz_tar;
static int pixz_tar_input_open(struct archive *a, void *refp);
static int pixz_tar_input_close(struct archive *a, void *refp);
static ssize_t pixz_tar_input_read(struct archive *a, void *refp, const void **buf);
static int pixz_tar_open(struct archive *a, void *refp);
static int pixz_tar_close(struct archive *a, void *refp);
static ssize_t pixz_tar_read(struct archive *a, void *refp, const void **buf);
int main(void) {
pixz_tar_index *index = pixz_tar_index_new();
pixz_index *index = pixz_index_new();
struct archive *a = archive_read_new();
archive_read_support_compression_none(a);
archive_read_support_format_tar(a);
pixz_tar_input input = { .file = stdin };
if (archive_read_open(a, &input, pixz_tar_input_open, pixz_tar_input_read,
pixz_tar_input_close) != ARCHIVE_OK)
FILE *infile = stdin;
pixz_tar input = { .file = infile };
if (archive_read_open(a, &input, pixz_tar_open, pixz_tar_read,
pixz_tar_close) != ARCHIVE_OK)
pixz_die("Can't open archive\n");
int want_offset = 0;
size_t offset = 0;
while (1) {
struct archive_entry *entry;
int aerr = archive_read_next_header(a, &entry);
if (aerr == ARCHIVE_EOF)
if (aerr == ARCHIVE_EOF) {
pixz_index_finish(index, ftello(stdin));
break;
else if (aerr != ARCHIVE_OK)
} else if (aerr != ARCHIVE_OK)
pixz_die("Error reading header\n");
if (!pixz_tar_index_is_metadata(entry)) {
const char *name = archive_entry_pathname(entry);
pixz_tar_index_add(index, offset, name);
want_offset = 1;
}
const char *name = archive_entry_pathname(entry);
size_t offset = archive_read_header_position(a);
pixz_index_add(index, offset, name);
if (archive_read_data_skip(a) != ARCHIVE_OK)
pixz_die("Error skipping data\n");
if (want_offset) {
offset = ftell(input.file);
want_offset = 0;
}
pixz_die("Error skipping data\n");
}
if (archive_read_finish(a) != ARCHIVE_OK)
pixz_die("Error finishing read\n");
pixz_tar_index_dump(index, stdout);
pixz_tar_index_free(index);
pixz_index_dump(index, stdout);
pixz_index_free(index);
return 0;
}
static int pixz_tar_input_open(struct archive *a, void *refp) {
static int pixz_tar_open(struct archive *a, void *refp) {
return ARCHIVE_OK;
}
static int pixz_tar_input_close(struct archive *a, void *refp) {
fclose(((pixz_tar_input*)refp)->file);
static int pixz_tar_close(struct archive *a, void *refp) {
fclose(((pixz_tar*)refp)->file);
return ARCHIVE_OK;
}
static ssize_t pixz_tar_input_read(struct archive *a, void *refp, const void **buf) {
pixz_tar_input *input = (pixz_tar_input*)refp;
static ssize_t pixz_tar_read(struct archive *a, void *refp, const void **buf) {
pixz_tar *input = (pixz_tar*)refp;
size_t rd = fread(input->buf, 1, CHUNKSIZE, input->file);
if (ferror(input->file)) {
archive_set_error(a, errno, "Read error");
@ -106,49 +76,3 @@ static ssize_t pixz_tar_input_read(struct archive *a, void *refp, const void **b
*buf = input->buf;
return rd;
}
static pixz_tar_index *pixz_tar_index_new(void) {
pixz_tar_index *i = malloc(sizeof(pixz_tar_index));
i->first = NULL;
i->last = NULL;
return i;
}
static void pixz_tar_index_add(pixz_tar_index *i, size_t offset, const char *name) {
pixz_tar_index_record *rec = malloc(sizeof(pixz_tar_index_record));
rec->next = NULL;
rec->name = strdup(name);
rec->offset = offset;
if (!i->first)
i->first = rec;
if (i->last)
i->last->next = rec;
i->last = rec;
}
static void pixz_tar_index_dump(pixz_tar_index *i, FILE *out) {
for (pixz_tar_index_record *rec = i->first; rec; rec = rec->next) {
fprintf(out, "%12zu %s\n", rec->offset, rec->name);
}
}
static void pixz_tar_index_free(pixz_tar_index *i) {
for (pixz_tar_index_record *rec = i->first; rec; rec = rec->next) {
free(rec->name);
free(rec);
}
free(i);
}
static int pixz_tar_index_is_metadata(struct archive_entry *entry) {
// FIXME: better copyfile detection?
const char *name = archive_entry_pathname(entry);
size_t i = strlen(name);
while (i != 0 && name[i - 1] != '/')
--i;
return strncmp(name + i, "._", 2) == 0;
}

Loading…
Cancel
Save