1 module pdfinfod;
2 
3 import std.array;
4 import std.algorithm.iteration : filter, map, splitter;
5 import std.datetime.systime;
6 import std.typecons;
7 import std.conv : to;
8 import std.exception : enforce;
9 import std.process : execute;
10 import std.file : exists;
11 import std.string;
12 
13 @safe:
14 
15 ///
16 struct PdfInfo {
17 	string title;           
18 	string author;          
19 	string producer;        
20 	string creator;        
21 	SysTime creationDate;    
22 	SysTime modDate;         
23 	bool custom_metadata; 
24 	bool metadata_stream; 
25 	bool tagged;          
26 	bool userProperties;  
27 	bool suspects;        
28 	string form;            
29 	bool javaScript;      
30 	long pages;           
31 	bool encrypted;       
32 	string page_size;       
33 	double page_rot;        
34 	long sizeInBytes;
35 	bool linearized;        
36 	bool optimized;       
37 	string pdf_version;     
38 }
39 
40 ///
41 PdfInfo extractPdfInfo(string pdfFilename) {
42 	import std.file : getSize;
43 
44 	enforce(exists(pdfFilename), "'" ~ pdfFilename ~ "' does not exist");
45 
46 	auto pdfinfo = execute(["pdfinfo", "-isodates", pdfFilename]);
47 
48 	PdfInfo ret = parsePdfInfo(pdfinfo.output);
49 	ret.sizeInBytes = getSize(pdfFilename);
50 	return ret;
51 }
52 
53 unittest {
54 	import std.math : isClose;
55 	auto parsed = extractPdfInfo("test.pdf");
56 	assert(parsed.title == "Dlang pdfinfo test file");
57 	assert(parsed.author == "Yours truly");
58 	assert(parsed.producer == "pdflatex with hyperref on archlinux");
59 	assert(parsed.creator == "pdflatex");
60 	assert(parsed.creationDate == SysTime.fromISOExtString("2022-04-13T08:40:11+02:00"));
61 	assert(parsed.modDate == SysTime.fromISOExtString("2022-04-13T08:40:11+02:00"));
62 	assert(parsed.page_size == "595.276 x 841.89 pts (A4)");
63 	assert(isClose(parsed.page_rot, 0));
64 	assert(parsed.pdf_version == "1.5");
65 }
66 
67 
68 private:
69 
70 PdfInfo parsePdfInfo(string input) {
71 	import std.traits : FieldNameTuple;
72 
73 	PdfLine[] lines = splitOutput(input);
74 	PdfInfo ret;
75 
76 	foreach(line; lines) {
77 		static foreach(mem; FieldNameTuple!PdfInfo) {{
78 			enum memLower = mem.toLower();
79 			if(line.key == memLower) {
80 				alias MemType = typeof(__traits(getMember, PdfInfo, mem));
81 				static if(is(MemType == bool)) {
82 					__traits(getMember, ret, mem) = line.value == "yes";
83 				} else static if(is(MemType == string)) {
84 					__traits(getMember, ret, mem) = line.value;
85 				} else static if(is(MemType == long)) {
86 					__traits(getMember, ret, mem) = line.value.to!long();
87 				} else static if(is(MemType == double)) {
88 					__traits(getMember, ret, mem) = line.value.to!double();
89 				} else static if(is(MemType == SysTime)) {
90 					__traits(getMember, ret, mem) = parseSystime(line.value);
91 				}
92 			}
93 		}}
94 	}
95 
96 	return ret;
97 }
98 
99 struct PdfLine {
100 	string key;
101 	string value;
102 }
103 
104 pure PdfLine[] splitOutput(string output) {
105 	return output.splitter("\n")
106 		.map!(line => splitLine(line))
107 		.filter!(n => !n.isNull())
108 		.map!(n => n.get())
109 		.array;
110 }
111 
112 pure Nullable!PdfLine splitLine(string line) {
113 	ptrdiff_t firstColon = line.indexOf(":");
114 	if(firstColon == -1) {
115 		return Nullable!(PdfLine).init;
116 	}
117 
118 	return PdfLine(line[0 .. firstColon].strip().replace(" ", "_").toLower()
119 			, line[firstColon + 1 .. $].strip()
120 			).nullable();
121 }
122 
123 SysTime parseSystime(string datetime) {
124 	return SysTime.fromISOExtString(datetime);
125 }