有时候需要从pdf中提取图片,那么此时可以使用mupdf库,如果采用vs2013版本,那么此时就需要考虑mupdf库的版本了,编译过多次之后,最终成功的版本是1.15.0,代码实例如下:
// TestPdf2png.cpp : 定义控制台应用程序的入口点。 //
#include "stdafx.h"
/* * pdfextract -- the ultimate way to extract images and fonts from pdfs */
#include "mupdf/fitz.h" #include "mupdf/pdf.h"
#include <stdlib.h> #include <stdio.h>
static pdf_document *doc = NULL; static fz_context *ctx = NULL; static int dorgb = 0;
static void usage(void) { ?? ?fprintf(stderr, "usage: mutool extract [options] file.pdf [object numbers]\n"); ?? ?fprintf(stderr, "\t-p\tpassword\n"); ?? ?fprintf(stderr, "\t-r\tconvert images to rgb\n"); ?? ?exit(1); }
static int isimage(pdf_obj *obj) { ?? ?pdf_obj *type = pdf_dict_get(ctx, obj, PDF_NAME(Subtype)); ?? ?return pdf_name_eq(ctx, type, PDF_NAME(Image)); }
static int isfontdesc(pdf_obj *obj) { ?? ?pdf_obj *type = pdf_dict_get(ctx, obj, PDF_NAME(Type)); ?? ?return pdf_name_eq(ctx, type, PDF_NAME(FontDescriptor)); }
static void writepixmap(fz_context *ctx, fz_pixmap *pix, char *file, int dorgb) { ?? ?char buf[1024]; ?? ?fz_pixmap *rgb = NULL;
?? ?if (!pix) ?? ??? ?return;
?? ?if (dorgb && pix->colorspace && pix->colorspace != fz_device_rgb(ctx)) ?? ?{ ?? ??? ?rgb = fz_convert_pixmap(ctx, pix, fz_device_rgb(ctx), NULL, NULL, NULL /* FIXME */, 1); ?? ??? ?pix = rgb; ?? ?}
?? ?if (pix->n - pix->alpha <= 3) ?? ?{ ?? ??? ?fz_snprintf(buf, sizeof(buf), "%s.png", file); ?? ??? ?printf("extracting image %s\n", buf); ?? ??? ?fz_save_pixmap_as_png(ctx, pix, buf); ?? ?} ?? ?else ?? ?{ ?? ??? ?fz_snprintf(buf, sizeof(buf), "%s.pam", file); ?? ??? ?printf("extracting image %s\n", buf); ?? ??? ?fz_save_pixmap_as_pam(ctx, pix, buf); ?? ?}
?? ?fz_drop_pixmap(ctx, rgb); }
static void writejpeg(fz_context *ctx, const unsigned char *data, size_t len, const char *file) { ?? ?char buf[1024]; ?? ?fz_output *out;
?? ?fz_snprintf(buf, sizeof(buf), "%s.jpg", file);
?? ?out = fz_new_output_with_path(ctx, buf, 0); ?? ?fz_try(ctx) ?? ?{ ?? ??? ?printf("extracting image %s\n", buf); ?? ??? ?fz_write_data(ctx, out, data, len); ?? ??? ?fz_close_output(ctx, out); ?? ?} ?? ?fz_always(ctx) ?? ??? ?fz_drop_output(ctx, out); ?? ?fz_catch(ctx) ?? ??? ?fz_rethrow(ctx); }
static void saveimage(pdf_obj *ref) { ?? ?fz_image *image = NULL; ?? ?fz_pixmap *pix = NULL; ?? ?char buf[32]; ?? ?fz_compressed_buffer *cbuf; ?? ?int type;
?? ?fz_var(image); ?? ?fz_var(pix);
?? ?fz_try(ctx) ?? ?{ ?? ??? ?image = pdf_load_image(ctx, doc, ref); ?? ??? ?cbuf = fz_compressed_image_buffer(ctx, image); ?? ??? ?fz_snprintf(buf, sizeof(buf), "img-%04d", pdf_to_num(ctx, ref)); ?? ??? ?type = cbuf == NULL ? FZ_IMAGE_UNKNOWN : cbuf->params.type;
?? ??? ?if (image->use_colorkey) ?? ??? ??? ?type = FZ_IMAGE_UNKNOWN; ?? ??? ?if (image->use_decode) ?? ??? ??? ?type = FZ_IMAGE_UNKNOWN; ?? ??? ?if (image->mask) ?? ??? ??? ?type = FZ_IMAGE_UNKNOWN; ?? ??? ?if (dorgb) ?? ??? ?{ ?? ??? ??? ?enum fz_colorspace_type ctype = fz_colorspace_type(ctx, image->colorspace); ?? ??? ??? ?if (ctype != FZ_COLORSPACE_RGB && ctype != FZ_COLORSPACE_GRAY) ?? ??? ??? ??? ?type = FZ_IMAGE_UNKNOWN; ?? ??? ?}
?? ??? ?if (type == FZ_IMAGE_JPEG) ?? ??? ?{ ?? ??? ??? ?unsigned char *data; ?? ??? ??? ?size_t len = fz_buffer_storage(ctx, cbuf->buffer, &data); ?? ??? ??? ?writejpeg(ctx, data, len, buf); ?? ??? ?} ?? ??? ?else ?? ??? ?{ ?? ??? ??? ?pix = fz_get_pixmap_from_image(ctx, image, NULL, NULL, 0, 0); ?? ??? ??? ?writepixmap(ctx, pix, buf, dorgb); ?? ??? ?} ?? ?} ?? ?fz_always(ctx) ?? ?{ ?? ??? ?fz_drop_image(ctx, image); ?? ??? ?fz_drop_pixmap(ctx, pix); ?? ?} ?? ?fz_catch(ctx) ?? ??? ?fz_rethrow(ctx); }
static void savefont(pdf_obj *dict) { ?? ?char namebuf[1024]; ?? ?fz_buffer *buf; ?? ?pdf_obj *stream = NULL; ?? ?pdf_obj *obj; ?? ?char *ext = ""; ?? ?fz_output *out; ?? ?const char *fontname = "font"; ?? ?size_t len; ?? ?unsigned char *data;
?? ?obj = pdf_dict_get(ctx, dict, PDF_NAME(FontName)); ?? ?if (obj) ?? ??? ?fontname = pdf_to_name(ctx, obj);
?? ?obj = pdf_dict_get(ctx, dict, PDF_NAME(FontFile)); ?? ?if (obj) ?? ?{ ?? ??? ?stream = obj; ?? ??? ?ext = "pfa"; ?? ?}
?? ?obj = pdf_dict_get(ctx, dict, PDF_NAME(FontFile2)); ?? ?if (obj) ?? ?{ ?? ??? ?stream = obj; ?? ??? ?ext = "ttf"; ?? ?}
?? ?obj = pdf_dict_get(ctx, dict, PDF_NAME(FontFile3)); ?? ?if (obj) ?? ?{ ?? ??? ?stream = obj;
?? ??? ?obj = pdf_dict_get(ctx, obj, PDF_NAME(Subtype)); ?? ??? ?if (obj && !pdf_is_name(ctx, obj)) ?? ??? ??? ?fz_throw(ctx, FZ_ERROR_GENERIC, "invalid font descriptor subtype");
?? ??? ?if (pdf_name_eq(ctx, obj, PDF_NAME(Type1C))) ?? ??? ??? ?ext = "cff"; ?? ??? ?else if (pdf_name_eq(ctx, obj, PDF_NAME(CIDFontType0C))) ?? ??? ??? ?ext = "cid"; ?? ??? ?else if (pdf_name_eq(ctx, obj, PDF_NAME(OpenType))) ?? ??? ??? ?ext = "otf"; ?? ??? ?else ?? ??? ??? ?fz_throw(ctx, FZ_ERROR_GENERIC, "unhandled font type '%s'", pdf_to_name(ctx, obj)); ?? ?}
?? ?if (!stream) ?? ?{ ?? ??? ?fz_warn(ctx, "unhandled font type"); ?? ??? ?return; ?? ?}
?? ?buf = pdf_load_stream(ctx, stream); ?? ?len = fz_buffer_storage(ctx, buf, &data); ?? ?fz_try(ctx) ?? ?{ ?? ??? ?fz_snprintf(namebuf, sizeof(namebuf), "%s-%04d.%s", fontname, pdf_to_num(ctx, dict), ext); ?? ??? ?printf("extracting font %s\n", namebuf); ?? ??? ?out = fz_new_output_with_path(ctx, namebuf, 0); ?? ??? ?fz_try(ctx) ?? ??? ?{ ?? ??? ??? ?fz_write_data(ctx, out, data, len); ?? ??? ??? ?fz_close_output(ctx, out); ?? ??? ?} ?? ??? ?fz_always(ctx) ?? ??? ??? ?fz_drop_output(ctx, out); ?? ??? ?fz_catch(ctx) ?? ??? ??? ?fz_rethrow(ctx); ?? ?} ?? ?fz_always(ctx) ?? ??? ?fz_drop_buffer(ctx, buf); ?? ?fz_catch(ctx) ?? ??? ?fz_rethrow(ctx); }
static void extractobject(int num) { ?? ?pdf_obj *ref;
?? ?if (!doc) ?? ??? ?fz_throw(ctx, FZ_ERROR_GENERIC, "no file specified");
?? ?fz_try(ctx) ?? ?{ ?? ??? ?ref = pdf_new_indirect(ctx, doc, num, 0); ?? ??? ?if (isimage(ref)) ?? ??? ??? ?saveimage(ref); ?? ??? ?if (isfontdesc(ref)) ?? ??? ??? ?savefont(ref); ?? ?} ?? ?fz_always(ctx) ?? ??? ?pdf_drop_obj(ctx, ref); ?? ?fz_catch(ctx) ?? ??? ?fz_warn(ctx, "ignoring object %d", num); }
int _tmain(int argc, _TCHAR* argv[]) { ?? ?char *infile; ?? ?char *password = ""; ?? ?int c, o; ?? ?infile = "C:\\Users\\14713\\Desktop\\1.pdf";
?? ?ctx = fz_new_context(NULL, NULL, FZ_STORE_UNLIMITED); ?? ?if (!ctx) ?? ?{ ?? ??? ?fprintf(stderr, "cannot initialise context\n"); ?? ??? ?exit(1); ?? ?}
?? ?doc = pdf_open_document(ctx, infile); ?? ?if (pdf_needs_password(ctx, doc)) ?? ??? ?if (!pdf_authenticate_password(ctx, doc, password)) ?? ??? ??? ?fz_throw(ctx, FZ_ERROR_GENERIC, "cannot authenticate password: %s", infile);
?? ?int len = pdf_count_objects(ctx, doc); ?? ?for (o = 1; o < len; o++) ?? ??? ?extractobject(o);
?? ?pdf_drop_document(ctx, doc); ?? ?fz_flush_warnings(ctx); ?? ?fz_drop_context(ctx);
?? ?return 0; }
|