associated content from (like images for an html stream
will be loaded from this). Maybe NULL. May be ignored.
+ recognize_state: NULL, or a state pointer passed back from the call
+ to recognise_content_fn. Ownership does not pass in. The
+ caller remains responsible for freeing state.
+
Pointer to opened document. Throws exception in case of error.
*/
-typedef fz_document *(fz_document_open_fn)(fz_context *ctx, const fz_document_handler *handler, fz_stream *stream, fz_stream *accel, fz_archive *dir);
+typedef fz_document *(fz_document_open_fn)(fz_context *ctx, const fz_document_handler *handler, fz_stream *stream, fz_stream *accel, fz_archive *dir, void *recognize_state);
/**
Recognize a document type from
*/
typedef int (fz_document_recognize_fn)(fz_context *ctx, const fz_document_handler *handler, const char *magic);
+typedef void (fz_document_recognize_state_free_fn)(fz_context *ctx, void *state);
+
/**
Recognize a document type from stream contents.
dir: directory context from which stream is loaded.
+ recognize_state: pointer to retrieve opaque state that may be used
+ by the open routine, or NULL.
+
+ free_recognize_state: pointer to retrieve a function pointer to
+ free the opaque state, or NULL.
+
+ Note: state and free_state should either both be NULL or
+ both be non-NULL!
+
Returns a number between 0 (not recognized) and 100
(fully recognized) based on how certain the recognizer
is that this is of the required type.
*/
-typedef int (fz_document_recognize_content_fn)(fz_context *ctx, const fz_document_handler *handler, fz_stream *stream, fz_archive *dir);
+typedef int (fz_document_recognize_content_fn)(fz_context *ctx, const fz_document_handler *handler, fz_stream *stream, fz_archive *dir, void **recognize_state, fz_document_recognize_state_free_fn **free_recognize_state);
/**
Finalise a document handler.
}
static fz_document *
-cbz_open_document(fz_context *ctx, const fz_document_handler *handler, fz_stream *file, fz_stream *accel, fz_archive *dir)
+cbz_open_document(fz_context *ctx, const fz_document_handler *handler, fz_stream *file, fz_stream *accel, fz_archive *dir, void *state)
{
cbz_document *doc = fz_new_derived_document(ctx, cbz_document);
};
static int
-cbz_recognize_doc_content(fz_context *ctx, const fz_document_handler *handler, fz_stream *stream, fz_archive *dir)
+cbz_recognize_doc_content(fz_context *ctx, const fz_document_handler *handler, fz_stream *stream, fz_archive *dir, void **state, fz_document_recognize_state_free_fn **freestate)
{
fz_archive *arch = NULL;
int ret = 0;
}
static fz_document *
-img_open_document(fz_context *ctx, const fz_document_handler *handler, fz_stream *file, fz_stream *accel, fz_archive *dir)
+img_open_document(fz_context *ctx, const fz_document_handler *handler, fz_stream *file, fz_stream *accel, fz_archive *dir, void *state)
{
img_document *doc = fz_new_derived_document(ctx, img_document);
}
static int
-img_recognize_content(fz_context *ctx, const fz_document_handler *handler, fz_stream *stream, fz_archive *dir)
+img_recognize_content(fz_context *ctx, const fz_document_handler *handler, fz_stream *stream, fz_archive *dir, void **state, fz_document_recognize_state_free_fn **free_state)
{
unsigned char data[8];
size_t n;
if (stream == NULL)
return 0;
+ if (state)
+ *state = NULL;
+ if (free_state)
+ *free_state = NULL;
+
n = fz_read(ctx, stream, data, 8);
if (n != 8)
}
const fz_document_handler *
-do_recognize_document_stream_and_dir_content(fz_context *ctx, fz_stream **streamp, fz_archive *dir, const char *magic)
+do_recognize_document_stream_and_dir_content(fz_context *ctx, fz_stream **streamp, fz_archive *dir, const char *magic, void **handler_state, fz_document_recognize_state_free_fn **handler_free_state)
{
fz_document_handler_context *dc;
int i, best_score, best_i;
+ void *best_state = NULL;
+ fz_document_recognize_state_free_fn *best_free_state = NULL;
const char *ext;
int drop_stream = 0;
fz_stream *stream = *streamp;
+ if (handler_state)
+ *handler_state = NULL;
+ if (handler_free_state)
+ *handler_free_state = NULL;
+
dc = ctx->handler;
if (dc->count == 0)
fz_throw(ctx, FZ_ERROR_ARGUMENT, "No document handlers registered");
{
for (i = 0; i < dc->count; i++)
{
+ void *state = NULL;
+ fz_document_recognize_state_free_fn *free_state = NULL;
int score = 0;
if (dc->handler[i]->recognize_content)
fz_seek(ctx, stream, 0, SEEK_SET);
fz_try(ctx)
{
- score = dc->handler[i]->recognize_content(ctx, dc->handler[i], stream, dir);
+ score = dc->handler[i]->recognize_content(ctx, dc->handler[i], stream, dir, &state, &free_state);
}
fz_catch(ctx)
{
{
best_score = score;
best_i = i;
+ if (best_free_state)
+ best_free_state(ctx, best_state);
+ best_free_state = free_state;
+ best_state = state;
}
+ else if (free_state)
+ free_state(ctx, state);
}
if (stream)
fz_seek(ctx, stream, 0, SEEK_SET);
}
fz_catch(ctx)
{
+ if (best_free_state)
+ best_free_state(ctx, best_state);
if (drop_stream)
fz_drop_stream(ctx, stream);
fz_rethrow(ctx);
/* Only if we found a handler, do we make our modified stream available to the
* caller. */
*streamp = stream;
+
+ if (handler_state && handler_free_state)
+ {
+ *handler_state = best_state;
+ *handler_free_state = best_free_state;
+ }
+ else if (best_free_state)
+ best_free_state(ctx, best_state);
+
return dc->handler[best_i];
}
fz_stream *stm = stream;
const fz_document_handler *res;
- res = do_recognize_document_stream_and_dir_content(ctx, &stm, dir, magic);
+ res = do_recognize_document_stream_and_dir_content(ctx, &stm, dir, magic, NULL, NULL);
if (stm != stream)
fz_drop_stream(ctx, stm);
return res;
}
-const fz_document_handler *fz_recognize_document_content(fz_context *ctx, const char *filename)
+static const fz_document_handler *do_recognize_document_content(fz_context *ctx, const char *filename, void **handler_state, fz_document_recognize_state_free_fn **handler_free_state)
{
fz_stream *stream = NULL;
const fz_document_handler *handler = NULL;
fz_archive *zip = NULL;
+ fz_stream *stm;
if (fz_is_directory(ctx, filename))
zip = fz_open_directory(ctx, filename);
else
stream = fz_open_file(ctx, filename);
+ stm = stream;
fz_try(ctx)
- handler = fz_recognize_document_stream_and_dir_content(ctx, stream, zip, filename);
+ handler = do_recognize_document_stream_and_dir_content(ctx, &stm, zip, filename, handler_state, handler_free_state);
fz_always(ctx)
{
+ if (stm != stream)
+ fz_drop_stream(ctx, stm);
fz_drop_stream(ctx, stream);
fz_drop_archive(ctx, zip);
}
return handler;
}
+const fz_document_handler *fz_recognize_document_content(fz_context* ctx, const char* filename)
+{
+ return do_recognize_document_content(ctx, filename, NULL, NULL);
+}
+
const fz_document_handler *
fz_recognize_document(fz_context *ctx, const char *magic)
{
const fz_document_handler *handler;
fz_stream *wrapped_stream = stream;
fz_document *ret;
+ void *state = NULL;
+ fz_document_recognize_state_free_fn *free_state = NULL;
if (stream == NULL && dir == NULL)
fz_throw(ctx, FZ_ERROR_ARGUMENT, "no document to open");
/* If this finds a handler, then this might wrap stream. If it does, we reuse the wrapped one in
* the open call (hence avoiding us having to 'file-back' a stream twice), but we must free it. */
- handler = do_recognize_document_stream_and_dir_content(ctx, &wrapped_stream, dir, magic);
+ handler = do_recognize_document_stream_and_dir_content(ctx, &wrapped_stream, dir, magic, &state, &free_state);
if (!handler)
fz_throw(ctx, FZ_ERROR_UNSUPPORTED, "cannot find document handler for file type: '%s'", magic);
fz_try(ctx)
- ret = handler->open(ctx, handler, wrapped_stream, accel, dir);
+ ret = handler->open(ctx, handler, wrapped_stream, accel, dir, state);
fz_always(ctx)
{
if (wrapped_stream != stream)
fz_drop_stream(ctx, wrapped_stream);
+ if (free_state && state)
+ free_state(ctx, state);
}
fz_catch(ctx)
fz_rethrow(ctx);
fz_open_accelerated_document(fz_context *ctx, const char *filename, const char *accel)
{
const fz_document_handler *handler;
- fz_stream *file;
+ fz_stream *file = NULL;
fz_stream *afile = NULL;
fz_document *doc = NULL;
fz_archive *dir = NULL;
char dirname[PATH_MAX];
-
- fz_var(afile);
+ void *state = NULL;
+ fz_document_recognize_state_free_fn *free_state = NULL;
if (filename == NULL)
fz_throw(ctx, FZ_ERROR_ARGUMENT, "no document to open");
- handler = fz_recognize_document_content(ctx, filename);
- if (!handler)
- fz_throw(ctx, FZ_ERROR_UNSUPPORTED, "cannot find document handler for file: %s", filename);
-
if (fz_is_directory(ctx, filename))
{
/* Cannot accelerate directories, currently. */
return doc;
}
- file = fz_open_file(ctx, filename);
+ handler = do_recognize_document_content(ctx, filename, &state, &free_state);
+ if (!handler)
+ fz_throw(ctx, FZ_ERROR_UNSUPPORTED, "cannot find document handler for file: %s", filename);
+
+ fz_var(afile);
+ fz_var(file);
fz_try(ctx)
{
+ file = fz_open_file(ctx, filename);
+
if (accel)
afile = fz_open_file(ctx, accel);
if (handler->wants_dir)
fz_dirname(dirname, filename, sizeof dirname);
dir = fz_open_directory(ctx, dirname);
}
- doc = handler->open(ctx, handler, file, afile, dir);
+ doc = handler->open(ctx, handler, file, afile, dir, state);
}
fz_always(ctx)
{
+ if (free_state)
+ free_state(ctx, state);
fz_drop_archive(ctx, dir);
fz_drop_stream(ctx, afile);
fz_drop_stream(ctx, file);
#endif
static fz_document *
-gz_open_document(fz_context *ctx, const fz_document_handler *handler, fz_stream *ostm, fz_stream *accel, fz_archive *dir)
+gz_open_document(fz_context *ctx, const fz_document_handler *handler, fz_stream *ostm, fz_stream *accel, fz_archive *dir, void *state)
{
fz_stream *stm = fz_open_flated(ctx, ostm, 16 + MAX_WBITS);
fz_buffer *buf = NULL;
};
static int
-gz_recognize_doc_content(fz_context *ctx, const fz_document_handler *handler, fz_stream *stream, fz_archive *dir)
+gz_recognize_doc_content(fz_context *ctx, const fz_document_handler *handler, fz_stream *stream, fz_archive *dir, void **state, fz_document_recognize_state_free_fn **free_state)
{
int ret = 0;
uint8_t data[10];
+ if (state)
+ *state = NULL;
+ if (free_state)
+ *free_state = NULL;
+
if (stream == NULL)
return 0;
}
static fz_document *
-epub_open_document(fz_context *ctx, const fz_document_handler *handler, fz_stream *file, fz_stream *accel, fz_archive *dir)
+epub_open_document(fz_context *ctx, const fz_document_handler *handler, fz_stream *file, fz_stream *accel, fz_archive *dir, void *state)
{
fz_stream *file2 = NULL;
fz_document *doc;
}
static int
-epub_recognize_content(fz_context *ctx, const fz_document_handler *handler, fz_stream *stream, fz_archive *dir)
+epub_recognize_content(fz_context *ctx, const fz_document_handler *handler, fz_stream *stream, fz_archive *dir, void **state, fz_document_recognize_state_free_fn **free_state)
{
fz_archive *arch = NULL;
int ret = 0;
fz_var(arch);
fz_var(ret);
+ if (state)
+ *state = NULL;
+ if (free_state)
+ *free_state = NULL;
+
fz_try(ctx)
{
if (stream == NULL)
return c == 32 || c == 9 || c == 10 || c == 13 || c == 12;
}
-int htdoc_recognize_html_content(fz_context *ctx, const fz_document_handler *handler, fz_stream *stream, fz_archive *dir)
+int htdoc_recognize_html_content(fz_context *ctx, const fz_document_handler *handler, fz_stream *stream, fz_archive *dir, void **hstate, fz_document_recognize_state_free_fn **free_state)
{
uint8_t buffer[4096];
size_t i, n, m;
int state = state_top;
int type = 0;
+ if (hstate)
+ *hstate = NULL;
+ if (free_state)
+ *free_state = NULL;
+
if (stream == NULL)
return 0;
};
static fz_document *
-htdoc_open_document(fz_context *ctx, const fz_document_handler *handler, fz_stream *file, fz_stream *accel, fz_archive *dir)
+htdoc_open_document(fz_context *ctx, const fz_document_handler *handler, fz_stream *file, fz_stream *accel, fz_archive *dir, void *state)
{
return fz_htdoc_open_document_with_stream_and_dir(ctx, file, dir, &fz_htdoc_html5);
}
};
static fz_document *
-xhtdoc_open_document(fz_context *ctx, const fz_document_handler *handler, fz_stream *file, fz_stream *accel, fz_archive *dir)
+xhtdoc_open_document(fz_context *ctx, const fz_document_handler *handler, fz_stream *file, fz_stream *accel, fz_archive *dir, void *state)
{
return fz_htdoc_open_document_with_stream_and_dir(ctx, file, dir, &fz_htdoc_xhtml);
}
};
static fz_document *
-fb2doc_open_document(fz_context *ctx, const fz_document_handler *handler, fz_stream *file, fz_stream *accel, fz_archive *dir)
+fb2doc_open_document(fz_context *ctx, const fz_document_handler *handler, fz_stream *file, fz_stream *accel, fz_archive *dir, void *state)
{
return fz_htdoc_open_document_with_stream_and_dir(ctx, file, dir, &fz_htdoc_fb2);
}
}
static fz_document *
-mobi_open_document(fz_context *ctx, const fz_document_handler *handler, fz_stream *file, fz_stream *accel, fz_archive *dir)
+mobi_open_document(fz_context *ctx, const fz_document_handler *handler, fz_stream *file, fz_stream *accel, fz_archive *dir, void *state)
{
return mobi_open_document_with_buffer(ctx, fz_read_all(ctx, file, 0));
}
};
static fz_document *
-office_open_document(fz_context *ctx, const fz_document_handler *handler, fz_stream *file, fz_stream *accel, fz_archive *zip)
+office_open_document(fz_context *ctx, const fz_document_handler *handler, fz_stream *file, fz_stream *accel, fz_archive *zip, void *state)
{
return fz_htdoc_open_document_with_stream_and_dir(ctx, file, zip, &fz_htdoc_office);
}
/* We are only ever 75% sure here, to allow a 'better' handler, such as sodochandler
* to override us by returning 100. */
static int
-office_recognize_doc_content(fz_context *ctx, const fz_document_handler *handler, fz_stream *stream, fz_archive *zip)
+office_recognize_doc_content(fz_context *ctx, const fz_document_handler *handler, fz_stream *stream, fz_archive *zip, void **state, fz_document_recognize_state_free_fn **free_state)
{
fz_archive *arch = NULL;
int ret = 0;
fz_xml *xml = NULL;
+ if (state)
+ *state = NULL;
+ if (free_state)
+ *free_state = NULL;
+
fz_var(arch);
fz_var(ret);
fz_var(xml);
};
static fz_document *
-txt_open_document(fz_context *ctx, const fz_document_handler *handler, fz_stream *file, fz_stream *accel, fz_archive *zip)
+txt_open_document(fz_context *ctx, const fz_document_handler *handler, fz_stream *file, fz_stream *accel, fz_archive *zip, void *state)
{
return fz_htdoc_open_document_with_stream_and_dir(ctx, file, zip, &fz_htdoc_txt);
}
};
static int
-pdf_recognize_doc_content(fz_context *ctx, const fz_document_handler *handler, fz_stream *stream, fz_archive *dir)
+pdf_recognize_doc_content(fz_context *ctx, const fz_document_handler *handler, fz_stream *stream, fz_archive *dir, void **state, fz_document_recognize_state_free_fn **free_state)
{
const char *match = "%PDF-";
int pos = 0;
int n = 4096+5;
int c;
+ if (state)
+ *state = NULL;
+ if (free_state)
+ *free_state = NULL;
+
if (stream == NULL)
return 0;
}
static fz_document *
-open_document(fz_context *ctx, const fz_document_handler *handler, fz_stream *file, fz_stream *accel, fz_archive *zip)
+open_document(fz_context *ctx, const fz_document_handler *handler, fz_stream *file, fz_stream *accel, fz_archive *zip, void *state)
{
if (file == NULL)
return NULL;
}
static fz_document *
-svg_open_document(fz_context *ctx, const fz_document_handler *handler, fz_stream *file, fz_stream *accel, fz_archive *zip)
+svg_open_document(fz_context *ctx, const fz_document_handler *handler, fz_stream *file, fz_stream *accel, fz_archive *zip, void *state)
{
fz_buffer *buf = fz_read_all(ctx, file, 0);
fz_document *doc = NULL;
};
static int
-svg_recognize_doc_content(fz_context *ctx, const fz_document_handler *handler, fz_stream *stm, fz_archive *dir)
+svg_recognize_doc_content(fz_context *ctx, const fz_document_handler *handler, fz_stream *stm, fz_archive *dir, void **state, fz_document_recognize_state_free_fn **free_state)
{
// A standalone SVG document is an XML document with an <svg> root element.
//
int c;
+ if (state)
+ *state = NULL;
+ if (free_state)
+ *free_state = NULL;
+
if (stm == NULL)
return 0;
};
static int
-xps_recognize_doc_content(fz_context *ctx, const fz_document_handler *handler, fz_stream *stream, fz_archive *dir)
+xps_recognize_doc_content(fz_context *ctx, const fz_document_handler *handler, fz_stream *stream, fz_archive *dir, void **state, fz_document_recognize_state_free_fn **free_state)
{
fz_archive *arch = NULL;
int ret = 0;
fz_xml *xml = NULL;
fz_xml *pos;
+ if (state)
+ *state = NULL;
+ if (free_state)
+ *free_state = NULL;
+
fz_var(arch);
fz_var(ret);
fz_var(xml);
}
static fz_document *
-xps_open(fz_context *ctx, const fz_document_handler *handler, fz_stream *file, fz_stream *accel, fz_archive *dir)
+xps_open(fz_context *ctx, const fz_document_handler *handler, fz_stream *file, fz_stream *accel, fz_archive *dir, void *state)
{
if (file)
return xps_open_document_with_stream(ctx, file);