1 module pdfinfod; 2 3 import std.array; 4 import std.algorithm.iteration : filter, map, splitter; 5 import std.datetime.systime; 6 import std.typecons; 7 import std.conv : to; 8 import std.exception : enforce; 9 import std.process : execute; 10 import std.file : exists; 11 import std.string; 12 13 @safe: 14 15 /// 16 struct PdfInfo { 17 string title; 18 string author; 19 string producer; 20 string creator; 21 SysTime creationDate; 22 SysTime modDate; 23 bool custom_metadata; 24 bool metadata_stream; 25 bool tagged; 26 bool userProperties; 27 bool suspects; 28 string form; 29 bool javaScript; 30 long pages; 31 bool encrypted; 32 string page_size; 33 double page_rot; 34 long sizeInBytes; 35 bool linearized; 36 bool optimized; 37 string pdf_version; 38 } 39 40 /// 41 PdfInfo extractPdfInfo(string pdfFilename) { 42 import std.file : getSize; 43 44 enforce(exists(pdfFilename), "'" ~ pdfFilename ~ "' does not exist"); 45 46 auto pdfinfo = execute(["pdfinfo", "-isodates", pdfFilename]); 47 48 PdfInfo ret = parsePdfInfo(pdfinfo.output); 49 ret.sizeInBytes = getSize(pdfFilename); 50 return ret; 51 } 52 53 unittest { 54 import std.math : isClose; 55 auto parsed = extractPdfInfo("test.pdf"); 56 assert(parsed.title == "Dlang pdfinfo test file"); 57 assert(parsed.author == "Yours truly"); 58 assert(parsed.producer == "pdflatex with hyperref on archlinux"); 59 assert(parsed.creator == "pdflatex"); 60 assert(parsed.creationDate == SysTime.fromISOExtString("2022-04-13T08:40:11+02:00")); 61 assert(parsed.modDate == SysTime.fromISOExtString("2022-04-13T08:40:11+02:00")); 62 assert(parsed.page_size == "595.276 x 841.89 pts (A4)"); 63 assert(isClose(parsed.page_rot, 0)); 64 assert(parsed.pdf_version == "1.5"); 65 } 66 67 68 private: 69 70 PdfInfo parsePdfInfo(string input) { 71 import std.traits : FieldNameTuple; 72 73 PdfLine[] lines = splitOutput(input); 74 PdfInfo ret; 75 76 foreach(line; lines) { 77 static foreach(mem; FieldNameTuple!PdfInfo) {{ 78 enum memLower = mem.toLower(); 79 if(line.key == memLower) { 80 alias MemType = typeof(__traits(getMember, PdfInfo, mem)); 81 static if(is(MemType == bool)) { 82 __traits(getMember, ret, mem) = line.value == "yes"; 83 } else static if(is(MemType == string)) { 84 __traits(getMember, ret, mem) = line.value; 85 } else static if(is(MemType == long)) { 86 __traits(getMember, ret, mem) = line.value.to!long(); 87 } else static if(is(MemType == double)) { 88 __traits(getMember, ret, mem) = line.value.to!double(); 89 } else static if(is(MemType == SysTime)) { 90 __traits(getMember, ret, mem) = parseSystime(line.value); 91 } 92 } 93 }} 94 } 95 96 return ret; 97 } 98 99 struct PdfLine { 100 string key; 101 string value; 102 } 103 104 pure PdfLine[] splitOutput(string output) { 105 return output.splitter("\n") 106 .map!(line => splitLine(line)) 107 .filter!(n => !n.isNull()) 108 .map!(n => n.get()) 109 .array; 110 } 111 112 pure Nullable!PdfLine splitLine(string line) { 113 ptrdiff_t firstColon = line.indexOf(":"); 114 if(firstColon == -1) { 115 return Nullable!(PdfLine).init; 116 } 117 118 return PdfLine(line[0 .. firstColon].strip().replace(" ", "_").toLower() 119 , line[firstColon + 1 .. $].strip() 120 ).nullable(); 121 } 122 123 SysTime parseSystime(string datetime) { 124 return SysTime.fromISOExtString(datetime); 125 }