1 module pdfinfod;
2
3 import std.array;
4 import std.algorithm.iteration : filter, map, splitter;
5 import std.datetime.systime;
6 import std.typecons;
7 import std.conv : to;
8 import std.exception : enforce;
9 import std.process : execute;
10 import std.file : exists;
11 import std.string;
12
13 @safe:
14
15 ///
16 struct PdfInfo {
17 string title;
18 string author;
19 string producer;
20 string creator;
21 SysTime creationDate;
22 SysTime modDate;
23 bool custom_metadata;
24 bool metadata_stream;
25 bool tagged;
26 bool userProperties;
27 bool suspects;
28 string form;
29 bool javaScript;
30 long pages;
31 bool encrypted;
32 string page_size;
33 double page_rot;
34 long sizeInBytes;
35 bool linearized;
36 bool optimized;
37 string pdf_version;
38 }
39
40 ///
41 PdfInfo extractPdfInfo(string pdfFilename) {
42 import std.file : getSize;
43
44 enforce(exists(pdfFilename), "'" ~ pdfFilename ~ "' does not exist");
45
46 auto pdfinfo = execute(["pdfinfo", "-isodates", pdfFilename]);
47
48 PdfInfo ret = parsePdfInfo(pdfinfo.output);
49 ret.sizeInBytes = getSize(pdfFilename);
50 return ret;
51 }
52
53 unittest {
54 import std.math : isClose;
55 auto parsed = extractPdfInfo("test.pdf");
56 assert(parsed.title == "Dlang pdfinfo test file");
57 assert(parsed.author == "Yours truly");
58 assert(parsed.producer == "pdflatex with hyperref on archlinux");
59 assert(parsed.creator == "pdflatex");
60 assert(parsed.creationDate == SysTime.fromISOExtString("2022-04-13T08:40:11+02:00"));
61 assert(parsed.modDate == SysTime.fromISOExtString("2022-04-13T08:40:11+02:00"));
62 assert(parsed.page_size == "595.276 x 841.89 pts (A4)");
63 assert(isClose(parsed.page_rot, 0));
64 assert(parsed.pdf_version == "1.5");
65 }
66
67
68 private:
69
70 PdfInfo parsePdfInfo(string input) {
71 import std.traits : FieldNameTuple;
72
73 PdfLine[] lines = splitOutput(input);
74 PdfInfo ret;
75
76 foreach(line; lines) {
77 static foreach(mem; FieldNameTuple!PdfInfo) {{
78 enum memLower = mem.toLower();
79 if(line.key == memLower) {
80 alias MemType = typeof(__traits(getMember, PdfInfo, mem));
81 static if(is(MemType == bool)) {
82 __traits(getMember, ret, mem) = line.value == "yes";
83 } else static if(is(MemType == string)) {
84 __traits(getMember, ret, mem) = line.value;
85 } else static if(is(MemType == long)) {
86 __traits(getMember, ret, mem) = line.value.to!long();
87 } else static if(is(MemType == double)) {
88 __traits(getMember, ret, mem) = line.value.to!double();
89 } else static if(is(MemType == SysTime)) {
90 __traits(getMember, ret, mem) = parseSystime(line.value);
91 }
92 }
93 }}
94 }
95
96 return ret;
97 }
98
99 struct PdfLine {
100 string key;
101 string value;
102 }
103
104 pure PdfLine[] splitOutput(string output) {
105 return output.splitter("\n")
106 .map!(line => splitLine(line))
107 .filter!(n => !n.isNull())
108 .map!(n => n.get())
109 .array;
110 }
111
112 pure Nullable!PdfLine splitLine(string line) {
113 ptrdiff_t firstColon = line.indexOf(":");
114 if(firstColon == -1) {
115 return Nullable!(PdfLine).init;
116 }
117
118 return PdfLine(line[0 .. firstColon].strip().replace(" ", "_").toLower()
119 , line[firstColon + 1 .. $].strip()
120 ).nullable();
121 }
122
123 SysTime parseSystime(string datetime) {
124 return SysTime.fromISOExtString(datetime);
125 }