#include <string.h>
-typedef struct
-{
- pdf_document *doc;
- fz_context *ctx;
-} globals;
-
static int
string_in_names_list(fz_context *ctx, pdf_obj *p, pdf_obj *names_list)
{
static void retainpage(fz_context *ctx, pdf_document *doc, pdf_obj *parent, pdf_obj *kids, int page)
{
- pdf_obj *pageref = pdf_lookup_page_obj(ctx, doc, page-1);
+ pdf_obj *pageref = pdf_lookup_page_obj(ctx, doc, page);
pdf_flatten_inheritable_page_items(ctx, pageref);
return nc;
}
-static void retainpages(fz_context *ctx, globals *glo, int argc, char **argv)
+static void pdf_rearrange_pages_imp(fz_context *ctx, pdf_document *doc, int count, int *new_page_list)
{
- pdf_obj *oldroot, *root, *pages, *kids, *countobj, *olddests;
- pdf_document *doc = glo->doc;
- int argidx = 0;
+ pdf_obj *oldroot, *pages, *kids, *olddests;
+ pdf_obj *root = NULL;
pdf_obj *names_list = NULL;
pdf_obj *outlines;
pdf_obj *ocproperties;
- pdf_obj *allfields;
- int pagecount;
- int i;
- int *page_object_nums;
+ pdf_obj *allfields = NULL;
+ int pagecount, i;
+ int *page_object_nums = NULL;
+ fz_page *page, *next;
/* Keep only pages/type and (reduced) dest entries to avoid
* references to unretained pages */
outlines = pdf_dict_get(ctx, oldroot, PDF_NAME(Outlines));
ocproperties = pdf_dict_get(ctx, oldroot, PDF_NAME(OCProperties));
- root = pdf_new_dict(ctx, doc, 3);
- pdf_dict_put(ctx, root, PDF_NAME(Type), pdf_dict_get(ctx, oldroot, PDF_NAME(Type)));
- pdf_dict_put(ctx, root, PDF_NAME(Pages), pdf_dict_get(ctx, oldroot, PDF_NAME(Pages)));
- if (outlines)
- pdf_dict_put(ctx, root, PDF_NAME(Outlines), outlines);
- if (ocproperties)
- pdf_dict_put(ctx, root, PDF_NAME(OCProperties), ocproperties);
-
- pdf_update_object(ctx, doc, pdf_to_num(ctx, oldroot), root);
-
- /* Create a new kids array with only the pages we want to keep */
- kids = pdf_new_array(ctx, doc, 1);
+ fz_var(root);
+ fz_var(names_list);
+ fz_var(allfields);
+ fz_var(page_object_nums);
+ fz_var(kids);
- /* Retain pages specified */
- while (argc - argidx)
+ fz_try(ctx)
{
- int page, spage, epage;
- const char *pagelist = argv[argidx];
-
- pagecount = pdf_count_pages(ctx, doc);
-
- while ((pagelist = fz_parse_page_range(ctx, pagelist, &spage, &epage, pagecount)))
+ /* Adjust the fz layer of cached pages (by nuking it!) */
+ fz_lock(ctx, FZ_LOCK_ALLOC);
{
- if (spage < epage)
- for (page = spage; page <= epage; ++page)
- retainpage(ctx, doc, pages, kids, page);
- else
- for (page = spage; page >= epage; --page)
- retainpage(ctx, doc, pages, kids, page);
+ for (page = doc->super.open; page != NULL; page = next)
+ {
+ next = page->next;
+ page->prev = NULL;
+ page->next = NULL;
+ }
+ doc->super.open = NULL;
}
+ fz_unlock(ctx, FZ_LOCK_ALLOC);
- argidx++;
- }
+ root = pdf_new_dict(ctx, doc, 3);
+ pdf_dict_put(ctx, root, PDF_NAME(Type), pdf_dict_get(ctx, oldroot, PDF_NAME(Type)));
+ pdf_dict_put(ctx, root, PDF_NAME(Pages), pdf_dict_get(ctx, oldroot, PDF_NAME(Pages)));
+ if (outlines)
+ pdf_dict_put(ctx, root, PDF_NAME(Outlines), outlines);
+ if (ocproperties)
+ pdf_dict_put(ctx, root, PDF_NAME(OCProperties), ocproperties);
- /* Update page count and kids array */
- countobj = pdf_new_int(ctx, pdf_array_len(ctx, kids));
- pdf_dict_put_drop(ctx, pages, PDF_NAME(Count), countobj);
- pdf_dict_put_drop(ctx, pages, PDF_NAME(Kids), kids);
+ pdf_update_object(ctx, doc, pdf_to_num(ctx, oldroot), root);
- pagecount = pdf_count_pages(ctx, doc);
- page_object_nums = fz_calloc(ctx, pagecount, sizeof(*page_object_nums));
- for (i = 0; i < pagecount; i++)
- {
- pdf_obj *pageref = pdf_lookup_page_obj(ctx, doc, i);
- page_object_nums[i] = pdf_to_num(ctx, pageref);
- }
+ /* Create a new kids array with only the pages we want to keep */
+ kids = pdf_new_array(ctx, doc, 1);
- /* If we had an old Dests tree (now reformed as an olddests
- * dictionary), keep any entries in there that point to
- * valid pages. This may mean we keep more than we need, but
- * it's safe at least. */
- if (olddests)
- {
- pdf_obj *names = pdf_new_dict(ctx, doc, 1);
- pdf_obj *dests = pdf_new_dict(ctx, doc, 1);
- int len = pdf_dict_len(ctx, olddests);
+ /* Retain pages specified */
+ for (i = 0; i < count; ++i)
+ retainpage(ctx, doc, pages, kids, new_page_list[i]);
- names_list = pdf_new_array(ctx, doc, 32);
+ /* Update page count */
+ pdf_dict_put_int(ctx, pages, PDF_NAME(Count), pdf_array_len(ctx, kids));
+ pdf_dict_put(ctx, pages, PDF_NAME(Kids), kids);
- for (i = 0; i < len; i++)
+ pagecount = pdf_count_pages(ctx, doc);
+ page_object_nums = fz_calloc(ctx, pagecount, sizeof(*page_object_nums));
+ for (i = 0; i < pagecount; i++)
{
- pdf_obj *key = pdf_dict_get_key(ctx, olddests, i);
- pdf_obj *val = pdf_dict_get_val(ctx, olddests, i);
- pdf_obj *dest = pdf_dict_get(ctx, val, PDF_NAME(D));
+ pdf_obj *pageref = pdf_lookup_page_obj(ctx, doc, i);
+ page_object_nums[i] = pdf_to_num(ctx, pageref);
+ }
- dest = pdf_array_get(ctx, dest ? dest : val, 0);
- if (dest_is_valid_page(ctx, dest, page_object_nums, pagecount))
+ /* If we had an old Dests tree (now reformed as an olddests
+ * dictionary), keep any entries in there that point to
+ * valid pages. This may mean we keep more than we need, but
+ * it's safe at least. */
+ if (olddests)
+ {
+ pdf_obj *names, *dests;
+ int len = pdf_dict_len(ctx, olddests);
+
+ names = pdf_dict_put_dict(ctx, root, PDF_NAME(Names), 1);
+ dests = pdf_dict_put_dict(ctx, names, PDF_NAME(Dests), 1);
+ names_list = pdf_dict_put_array(ctx, dests, PDF_NAME(Names), 32);
+
+ for (i = 0; i < len; i++)
{
- pdf_obj *key_str = pdf_new_string(ctx, pdf_to_name(ctx, key), strlen(pdf_to_name(ctx, key)));
- pdf_array_push_drop(ctx, names_list, key_str);
- pdf_array_push(ctx, names_list, val);
+ pdf_obj *key = pdf_dict_get_key(ctx, olddests, i);
+ pdf_obj *val = pdf_dict_get_val(ctx, olddests, i);
+ pdf_obj *dest = pdf_dict_get(ctx, val, PDF_NAME(D));
+
+ dest = pdf_array_get(ctx, dest ? dest : val, 0);
+ if (dest_is_valid_page(ctx, dest, page_object_nums, pagecount))
+ {
+ pdf_array_push_string(ctx, names_list, pdf_to_name(ctx, key), strlen(pdf_to_name(ctx, key)));
+ pdf_array_push(ctx, names_list, val);
+ }
}
+
+ pdf_drop_obj(ctx, olddests);
}
- pdf_dict_put(ctx, dests, PDF_NAME(Names), names_list);
- pdf_dict_put(ctx, names, PDF_NAME(Dests), dests);
- pdf_dict_put(ctx, root, PDF_NAME(Names), names);
+ /* Edit each pages /Annot list to remove any links that point to nowhere. */
+ for (i = 0; i < pagecount; i++)
+ {
+ pdf_obj *pageref = pdf_lookup_page_obj(ctx, doc, i);
- pdf_drop_obj(ctx, names);
- pdf_drop_obj(ctx, dests);
- pdf_drop_obj(ctx, olddests);
- }
+ pdf_obj *annots = pdf_dict_get(ctx, pageref, PDF_NAME(Annots));
- /* Edit each pages /Annot list to remove any links that point to nowhere. */
- for (i = 0; i < pagecount; i++)
- {
- pdf_obj *pageref = pdf_lookup_page_obj(ctx, doc, i);
+ int len = pdf_array_len(ctx, annots);
+ int j;
- pdf_obj *annots = pdf_dict_get(ctx, pageref, PDF_NAME(Annots));
+ for (j = 0; j < len; j++)
+ {
+ pdf_obj *o = pdf_array_get(ctx, annots, j);
- int len = pdf_array_len(ctx, annots);
- int j;
+ if (!pdf_name_eq(ctx, pdf_dict_get(ctx, o, PDF_NAME(Subtype)), PDF_NAME(Link)))
+ continue;
- for (j = 0; j < len; j++)
+ if (!dest_is_valid(ctx, o, pagecount, page_object_nums, names_list))
+ {
+ /* Remove this annotation */
+ pdf_array_delete(ctx, annots, j);
+ len--;
+ j--;
+ }
+ }
+ }
+
+ /* Locate all fields on retained pages */
+ allfields = pdf_new_array(ctx, doc, 1);
+ for (i = 0; i < pagecount; i++)
{
- pdf_obj *o = pdf_array_get(ctx, annots, j);
+ pdf_obj *pageref = pdf_lookup_page_obj(ctx, doc, i);
- if (!pdf_name_eq(ctx, pdf_dict_get(ctx, o, PDF_NAME(Subtype)), PDF_NAME(Link)))
- continue;
+ pdf_obj *annots = pdf_dict_get(ctx, pageref, PDF_NAME(Annots));
- if (!dest_is_valid(ctx, o, pagecount, page_object_nums, names_list))
+ int len = pdf_array_len(ctx, annots);
+ int j;
+
+ for (j = 0; j < len; j++)
{
- /* Remove this annotation */
- pdf_array_delete(ctx, annots, j);
- len--;
- j--;
+ pdf_obj *f = pdf_array_get(ctx, annots, j);
+
+ if (pdf_dict_get(ctx, f, PDF_NAME(Subtype)) == PDF_NAME(Widget))
+ pdf_array_push(ctx, allfields, f);
}
}
- }
- /* Locate all fields on retained pages */
- allfields = pdf_new_array(ctx, doc, 1);
- for (i = 0; i < pagecount; i++)
- {
- pdf_obj *pageref = pdf_lookup_page_obj(ctx, doc, i);
+ /* From non-terminal widget fields, strip out annot references not
+ * belonging to any retained page. */
+ for (i = 0; i < pdf_array_len(ctx, allfields); i++)
+ {
+ pdf_obj *f = pdf_array_get(ctx, allfields, i);
- pdf_obj *annots = pdf_dict_get(ctx, pageref, PDF_NAME(Annots));
+ while (pdf_dict_get(ctx, f, PDF_NAME(Parent)))
+ f = pdf_dict_get(ctx, f, PDF_NAME(Parent));
- int len = pdf_array_len(ctx, annots);
- int j;
+ strip_stale_annot_refs(ctx, f, pagecount, page_object_nums);
+ }
- for (j = 0; j < len; j++)
+ /* For terminal fields, if action destination is not valid,
+ * remove the action */
+ for (i = 0; i < pdf_array_len(ctx, allfields); i++)
{
- pdf_obj *f = pdf_array_get(ctx, annots, j);
+ pdf_obj *f = pdf_array_get(ctx, allfields, i);
- if (pdf_dict_get(ctx, f, PDF_NAME(Subtype)) == PDF_NAME(Widget))
- pdf_array_push(ctx, allfields, f);
+ if (!dest_is_valid(ctx, f, pagecount, page_object_nums, names_list))
+ pdf_dict_del(ctx, f, PDF_NAME(A));
}
- }
- /* From non-terminal widget fields, strip out annot references not
- * belonging to any retained page. */
- for (i = 0; i < pdf_array_len(ctx, allfields); i++)
+ if (strip_outlines(ctx, doc, outlines, pagecount, page_object_nums, names_list) == 0)
+ {
+ pdf_dict_del(ctx, root, PDF_NAME(Outlines));
+ }
+ }
+ fz_always(ctx)
{
- pdf_obj *f = pdf_array_get(ctx, allfields, i);
-
- while (pdf_dict_get(ctx, f, PDF_NAME(Parent)))
- f = pdf_dict_get(ctx, f, PDF_NAME(Parent));
-
- strip_stale_annot_refs(ctx, f, pagecount, page_object_nums);
+ fz_free(ctx, page_object_nums);
+ pdf_drop_obj(ctx, allfields);
+ pdf_drop_obj(ctx, root);
+ pdf_drop_obj(ctx, kids);
}
-
- /* For terminal fields, if action destination is not valid,
- * remove the action */
- for (i = 0; i < pdf_array_len(ctx, allfields); i++)
+ fz_catch(ctx)
{
- pdf_obj *f = pdf_array_get(ctx, allfields, i);
-
- if (!dest_is_valid(ctx, f, pagecount, page_object_nums, names_list))
- pdf_dict_del(ctx, f, PDF_NAME(A));
+ fz_rethrow(ctx);
}
+}
- if (strip_outlines(ctx, doc, outlines, pagecount, page_object_nums, names_list) == 0)
+void pdf_rearrange_pages(fz_context *ctx, pdf_document *doc, int count, int *new_page_list)
+{
+ pdf_begin_operation(ctx, doc, "Rearrange pages");
+ fz_try(ctx)
{
- pdf_dict_del(ctx, root, PDF_NAME(Outlines));
+ pdf_rearrange_pages_imp(ctx, doc, count, new_page_list);
+ pdf_end_operation(ctx, doc);
+ }
+ fz_catch(ctx)
+ {
+ pdf_abandon_operation(ctx, doc);
+ fz_rethrow(ctx);
}
-
- fz_free(ctx, page_object_nums);
- pdf_drop_obj(ctx, allfields);
- pdf_drop_obj(ctx, names_list);
- pdf_drop_obj(ctx, root);
}
void pdf_clean_file(fz_context *ctx, char *infile, char *outfile, char *password, pdf_write_options *opts, int argc, char *argv[])
{
- globals glo = { 0 };
+ pdf_document *pdf = NULL;
+ int *pages = NULL;
+ int cap, len, page;
- glo.ctx = ctx;
+ fz_var(pdf);
+ fz_var(pages);
fz_try(ctx)
{
- glo.doc = pdf_open_document(ctx, infile);
- if (pdf_needs_password(ctx, glo.doc))
- if (!pdf_authenticate_password(ctx, glo.doc, password))
- fz_throw(glo.ctx, FZ_ERROR_GENERIC, "cannot authenticate password: %s", infile);
+ pdf = pdf_open_document(ctx, infile);
+ if (pdf_needs_password(ctx, pdf))
+ if (!pdf_authenticate_password(ctx, pdf, password))
+ fz_throw(ctx, FZ_ERROR_GENERIC, "cannot authenticate password: %s", infile);
/* Only retain the specified subset of the pages */
if (argc)
- retainpages(ctx, &glo, argc, argv);
+ {
+ int pagecount = pdf_count_pages(ctx, pdf);
+ int argidx = 0;
+
+ len = cap = 0;
+
+ while (argc - argidx)
+ {
+ int spage, epage;
+ const char *pagelist = argv[argidx];
+
+ while ((pagelist = fz_parse_page_range(ctx, pagelist, &spage, &epage, pagecount)))
+ {
+ if (len + (epage - spage + 1) >= cap)
+ {
+ int n = cap ? cap * 2 : 8;
+ pages = fz_realloc_array(ctx, pages, n, int);
+ cap = n;
+ }
+
+ if (spage < epage)
+ for (page = spage; page <= epage; ++page)
+ pages[len++] = page - 1;
+ else
+ for (page = spage; page >= epage; --page)
+ pages[len++] = page - 1;
+ }
+
+ argidx++;
+ }
+
+ pdf_rearrange_pages(ctx, pdf, len, pages);
+ }
- pdf_save_document(ctx, glo.doc, outfile, opts);
+ pdf_save_document(ctx, pdf, outfile, opts);
}
fz_always(ctx)
{
- pdf_drop_document(ctx, glo.doc);
+ fz_free(ctx, pages);
+ pdf_drop_document(ctx, pdf);
}
fz_catch(ctx)
{