Sign in to follow this  
Followers 0
kaotkbliss

Merging pdfs

10 posts in this topic

I've been playing with the following code trying to merge pdfs (we'll be getting hundreds-thousands of pdfs that I'll have to merge into 1 before doing other stuff to it in another program.)

The pdfs merge great the problem is, if a pdf is an odd number of pages I want to add an additional blank pdf to make it an even number but that doesn't seem to want to be merging in :(

#Region ;**** Directives created by AutoIt3Wrapper_GUI ****
#AutoIt3Wrapper_Outfile=H:\Client\BRVO\Daily\Print\TESTING FOLDER\PDF Merger.exe
#AutoIt3Wrapper_UseUpx=n
#AutoIt3Wrapper_Add_Constants=n
#EndRegion ;**** Directives created by AutoIt3Wrapper_GUI ****
#include <Math.au3>
Dim $filelist[1]

If Not FileExists(@ScriptDir & "\PDFmerge.ini") Then
    IniWrite(@ScriptDir & "\PDFmerge.ini","LastFolder",1,"")
EndIf



$sourcepath = IniRead(@ScriptDir & "\PDFmerge.ini","LastFolder",1,"")

; example  merging files
$var= FileSelectFolder("Select folder", "","",$sourcepath) & "\"
if @error Then
    Exit
EndIf
IniWrite(@ScriptDir & "\PDFmerge.ini","LastFolder",1,$var)
$var2 = FileSaveDialog( "Select Filename", $var, "PDF Files (*.pdf)", 2)
If @error Then
    Exit
EndIf
MergeFiles($var,$var2)
; example end



func GetRotation($Targetpath)
;----------------------------------------------------
; Get the Rotation of a Page (eg. page 1)
; Ex.: msgbox(0,"Rotation",GetRotation("c:\pdft\test.pdf" ))
;----------------------------------------------------

    if not FileExists($Targetpath) then
        return -1
    endif

    $SourcePDF = ObjCreate("AcroExch.PDDoc")

    if not IsObj($SourcePDF) then
        return -2
    endif

    $b = $SourcePDF.Open($Targetpath)
    $rotation = $sourcePDF.AcquirePage(0).GetRotate
    $SourcePDF.close
    $sourcePDF=""
    return $rotation                                ; in Degree
EndFunc



func MergeFiles($SourcePath , $DestinationPath)

    if StringRight($DestinationPath,4) <> ".pdf" Then
        $DestinationPath = $DestinationPath&".pdf"
    EndIf

    if fileexists($DestinationPath) then
        FileDelete($DestinationPath)
    endif


    getfiles($sourcepath, "*.pdf")                                      ; Search Files in Dir
    if $filelist[0] < 2 then
        Msgbox(32,"Info", "Ther are less than two files in folder")
        exit
    endif

    ProgressOn("PDF Merge","Processing ...")

    $SourcePDF = ObjCreate("AcroExch.PDDoc")
    if IsObj($SourcePDF) then
        $b = $SourcePDF.Open($sourcepath & $filelist[1])

    $TargetPDF = ObjCreate("AcroExch.PDDoc")
    $b = $TargetPDF.Open($sourcepath & $filelist[1])

    $intSourcePgs = $SourcePDF.GetNumPages
    $intInsertPgs = $TargetPDF.GetNumPages

    $SourcePDF.InsertPages($intSourcePgs-1, $TargetPDF, 0, $intInsertPgs, False)
    if _MathCheckDiv($intInsertPgs/2) <> 2 Then
        $TargetPDF2 = ObjCreate("AcroExch.PDDoc")
        $b2 = $TargetPDF2.Open("H:\Client\BRVO\Daily\Print\Blank.pdf")

        $intInsertPgs2 = $TargetPDF2.GetNumPages
        if not $SourcePDF.InsertPages($intSourcePgs + $intInsertPgs -1, $TargetPDF2, 0, $intInsertPgs2, False) = -1  then
            $TargetPDF2.Close
            $TargetPDF2=""
            $intInsertPgs += 1
        EndIf
    EndIf
    $b = $SourcePDF.Save(1, $DestinationPath)
    $SourcePDF.Close
    $SourcePDF=""

    $SourcePDF = ObjCreate("AcroExch.PDDoc")
    if not IsObj($SourcePDF) then
        return -2
    endif

    $b = $SourcePDF.Open($DestinationPath)
    $intSourcePgs = $SourcePDF.GetNumPages

    for $n=2 to UBound($filelist)-1

        ProgressSet((($n-1)/$filelist[0]) * 100, $filelist[$n-1])

        $TargetPDF = ObjCreate("AcroExch.PDDoc")
        $b = $TargetPDF.Open($sourcepath & $filelist[$n])


        $intInsertPgs = $TargetPDF.GetNumPages

        if not $SourcePDF.InsertPages($intSourcePgs-1, $TargetPDF, 0, $intInsertPgs, False) = -1  then
            if _MathCheckDiv($intInsertPgs/2) <> 2 Then
                $TargetPDF2 = ObjCreate("AcroExch.PDDoc")
                $b2 = $TargetPDF2.Open("H:\Client\BRVO\Daily\Print\Blank.pdf")


                $intInsertPgs2 = $TargetPDF2.GetNumPages
                if not $SourcePDF.InsertPages($intSourcePgs + $intInsertPgs -1, $TargetPDF2, 0, $intInsertPgs2, False) = -1  then
                    $TargetPDF2.Close
                    $TargetPDF2=""
                    $intInsertPgs += 1
                EndIf
            EndIf
            $TargetPDF.Close
            $TargetPDF=""
        endif

        $intSourcePgs += $intInsertPgs


    next
    $b = $SourcePDF.Save(1, $DestinationPath)
    ProgressSet(100, $filelist[$filelist[0]])
    sleep(800)
    ProgressOff()
    endif

Endfunc




func getfiles($dir, $filter) ; search files in dir
Global $filelist[1]

    $n=0
    $search= FileFindFirstFile($dir & $filter)
; Check if the search was successful
    If $search = -1 Then
        MsgBox(0, "Error", "No files/directories matched the search pattern")
        Exit
    EndIf

    While 1
        $n=$n+1
        $file = FileFindNextFile($search)
        If @error Then ExitLoop
        redim $filelist[$n+1]
        $filelist[$n]= $file
        $filelist [0] = $n
    WEnd
; Close the search handle
    FileClose($search)
EndFunc

#cs
func PDFMerge($File1, $File2, $Fileout)
; ---------------------------------------
; File1 first  file
; File2 second file (inserted after)
; Fileout is the saving name
;
; returns  1 on success
; returns -1 Error insert Pages
; returns -2 Error Object Create
;----------------------------------------

    $SourcePDF = ObjCreate("AcroExch.PDDoc")
    if not IsObj($SourcePDF) then
        return -2
    endif

    $b = $SourcePDF.Open($sourcepath & $filelist[$n-1])

    $TargetPDF = ObjCreate("AcroExch.PDDoc")
    $b = $TargetPDF.Open($sourcepath & $filelist[$n])

    $intSourcePgs = $SourcePDF.GetNumPages
    $intInsertPgs = $TargetPDF.GetNumPages
    if not $SourcePDF.InsertPages($intSourcePgs-1, $TargetPDF, 0, $intInsertPgs, False) = -1  then
        $SourcePDF.Close
        $TargetPDF.Close
        $SourcePDF=""
        $TargetPDF=""
        return -1
    endif
    $b = $SourcePDF.Save(1, $DestinationPath)
    $SourcePDF.Close
    $TargetPDF.Close
    $SourcePDF=""
    $TargetPDF=""
    return 1
endfunc
#ce


func GetPagesCount($target); count pages
    $AcroPDDoc = ObjCreate("AcroExch.PDDoc")
    if @error then return -1
    $bPDF = $AcroPDDoc.Open($target)
    if @error then return -2
    $Pages= $AcroPDDoc.GetNumPages
    $bPDF = $AcroPDDoc.Close
    return $pages
endfunc




; Feel free to improve the code...

; Greetings Mozat90

010101000110100001101001011100110010000001101001011100110010000

001101101011110010010000001110011011010010110011100100001

My Android cat and mouse game
https://play.google.com/store/apps/details?id=com.KaosVisions.WhiskersNSqueek

We're gonna need another Timmy!

Share this post


Link to post
Share on other sites



Have you checked $b2 to make sure that it is able to open $TargetPDF2?   Is that a full-page blank sheet or an empty file?

Ian


My projects:

  • IP Scanner - Multi-threaded ping tool to scan your available networks for used and available IP addresses, shows ping times, resolves IPs in to host names, and allows individual IPs to be pinged.
  • INFSniff - Great technicians tool - a tool which scans DriverPacks archives for INF files and parses out the HWIDs to a database file, and rapidly scans the local machine's HWIDs, searches the database for matches, and installs them.
  • PPK3 (Persistent Process Killer V3) - Another for the techs - suppress running processes that you need to keep away, helpful when fighting spyware/viruses.
  • Sync Tool - Folder sync tool with lots of real time information and several checking methods.
  • USMT Front End - Front End for Microsoft's User State Migration Tool, including all files needed for USMT 3.01 and 4.01, 32 bit and 64 bit versions.
  • Audit Tool - Computer audit tool to gather vital hardware, Windows, and Office information for IT managers and field techs. Capabilities include creating a customized site agent.
  • CSV Viewer - Displays CSV files with automatic column sizing and font selection. Lines can also be copied to the clipboard for data extraction.
  • MyDirStat - Lists number and size of files on a drive or specified path, allows for deletion within the app.
  • 2048 Game - My version of 2048, fun tile game.
  • Juice Lab - Ecigarette liquid making calculator.
  • Data Protector - Secure notes to save sensitive information.
  • VHD Footer - Add a footer to a forensic hard drive image to allow it to be mounted or used as a virtual machine hard drive.
  • Find in File - Searches files containing a specified phrase.

Share this post


Link to post
Share on other sites

What additional information is needed to help solve this? It shouldn't be hard to adapt for testing as it's set up for you to choose your own folder of PDFs. The only thing that would need changed is the location of the blank PDF.


010101000110100001101001011100110010000001101001011100110010000

001101101011110010010000001110011011010010110011100100001

My Android cat and mouse game
https://play.google.com/store/apps/details?id=com.KaosVisions.WhiskersNSqueek

We're gonna need another Timmy!

Share this post


Link to post
Share on other sites

What does:

$intInsertPgs2 = $TargetPDF2.GetNumPages

return?


Common sense plays a role in the basics of understanding AutoIt... If you're lacking in that, do us all a favor, and step away from the computer.

Share this post


Link to post
Share on other sites

and what if you add 1 to it?


Forum Rules         Procedure for posting code

"I like pigs.  Dogs look up to us.  Cats look down on us.  Pigs treat us as equals."

- Sir Winston Churchill

Share this post


Link to post
Share on other sites

#7 ·  Posted (edited)

I hadn't thought of checking that. I would assume it returns a 1 as that's how many pages are in the PDF, but I will check it to make sure and be back with the result.

*edit*

Just as suspected, it returns a 1. I tried kylomis' suggestion and added 1 to that, but that didn't merge all the PDFs (our test folder has 300 PDFs of 7 pages so I should get 2400 pages. I only ended up with 930 some) and there was still no blank page added to the ones that did merge :(

Edited by kaotkbliss

010101000110100001101001011100110010000001101001011100110010000

001101101011110010010000001110011011010010110011100100001

My Android cat and mouse game
https://play.google.com/store/apps/details?id=com.KaosVisions.WhiskersNSqueek

We're gonna need another Timmy!

Share this post


Link to post
Share on other sites

You could always contact the original author of the code you're using, only looks like you've changed a couple of things that mozart90 wrote, maybe they could help.

I wrote a pdf library some time ago, I may still have it on a backup flash drive somewhere, I'll have a look tomorrow.

In the meantime, this guy seems to have written a pretty extensive library that may come in better use for you:

'?do=embed' frameborder='0' data-embedContent>>

He documented the code pretty well, and has examples provided in the zip as well.


Common sense plays a role in the basics of understanding AutoIt... If you're lacking in that, do us all a favor, and step away from the computer.

Share this post


Link to post
Share on other sites

Thanks @SmOke_N

@kaotkbliss

Merge is in Lite version of DebenuLibrary, so you can use it for free.
_QPDF_Example_MergeFiles_Lite()

In QuickPDF_Examples.au3
There is _QPDF_Example_MergeFiles() but this is for commercial version DebenuLibrary.

 

But no worry look in my UDF thread for new post.

If you encounter any problems then ask in topic mentioned by @SmOke_N


Signature beginning:   Wondering who uses AutoIT and what it can be used for ?
* GHAPI UDF - modest begining - comunication with GitHub REST API *
ADO.au3 UDF     POP3.au3 UDF     XML.au3 UDF    How to use IE.au3  UDF with  AutoIt v3.3.14.x  for other useful stuff click the following button

Spoiler

Any of my own code posted anywhere on the forum is available for use by others without any restriction of any kind. 

My contribution (my own projects): * Debenu Quick PDF Library - UDF * Debenu PDF Viewer SDK - UDF * Acrobat Reader - ActiveX Viewer * UDF for PDFCreator v1.x.x * XZip - UDF * AppCompatFlags UDF * CrowdinAPI UDF * _WinMergeCompare2Files() * _JavaExceptionAdd() * _IsBeta() * Writing DPI Awareness App - workaround * _AutoIt_RequiredVersion() * Chilkatsoft.au3 UDF * TeamViewer.au3 UDF * JavaManagement UDF * VIES over SOAP * WinSCP UDF * GHAPI UDF - modest begining - comunication with GitHub REST API *

My contribution to others projects or UDF based on  others projects: * _sql.au3 UDF  * POP3.au3 UDF *  RTF Printer - UDF * XML.au3 - BETA * ADO.au3 UDF SMTP Mailer UDF *

Useful links: * Forum Rules * Forum etiquette *  Forum Information and FAQs * How to post code on the forum * AutoIt Online Documentation * AutoIt Online Beta Documentation * SciTE4AutoIt3 getting started * Convert text blocks to AutoIt code * Games made in Autoit * Programming related sites * Polish AutoIt Tutorial * DllCall Code Generator * 

Wiki: Expand your knowledge - AutoIt Wiki * Collection of User Defined Functions * How to use HelpFile * Best coding practices * 

IE Related:  * How to use IE.au3  UDF with  AutoIt v3.3.14.x * Why isn't Autoit able to click a Javascript Dialog? * Clicking javascript button with no ID * IE document >> save as MHT file * IETab Switcher (by LarsJ ) * HTML Entities * _IEquerySelectorAll() (by uncommon) * 

I encourage you to read: * Global Vars * Best Coding Practices * Please explain code used in Help file for several File functions * OOP-like approach in AutoIt * UDF-Spec Questions *  EXAMPLE: How To Catch ConsoleWrite() output to a file or to CMD *

"Homo sum; humani nil a me alienum puto" - Publius Terentius Afer
"Program are meant to be read by humans and only incidentally for computers and execute" - Donald Knuth, "The Art of Computer Programming"
:naughty:  :ranting:, be  :) and       \\//_.

Anticipating Errors :  "Any program that accepts data from a user must include code to validate that data before sending it to the data store. You cannot rely on the data store, ...., or even your programming language to notify you of problems. You must check every byte entered by your users, making sure that data is the correct type for its field and that required fields are not empty."

Signature last update: 2017-06-04

Share this post


Link to post
Share on other sites

I managed to get this to work :)

it opens the first pdf and counts the pages, if it's an odd number, inserts the blank page.

Then starts merging the rest of the pdfs and if the total number of merged pages each loop is odd, it adds another blank page.

One of the problems were the insertpages lines, that function returns a true or false and it was checking for -1 (just removed it completely)

I also renamed the pdf variables so it made more sense to me (source = pdf you're reading from, target = pdf you want to add to) which also helped me rewrite the code.

Using the function as copied from the forums here, it took about a half hour to merge around 700 pdfs.

Using the rewrite, it takes just around a minute for that same 700 (plus adding in blank pages)

Just replace the variable $blank at the top of the script to the path of whatever you want to insert if the number of pages are odd and that's it :)

#include <Math.au3>
Dim $filelist[1]
$blank = "H:\Client\BRVO\Daily\Print\Blank.pdf"

If Not FileExists(@ScriptDir & "\PDFmerge.ini") Then
    IniWrite(@ScriptDir & "\PDFmerge.ini","LastFolder",1,"")
EndIf



$sourcepath = IniRead(@ScriptDir & "\PDFmerge.ini","LastFolder",1,"")

; example  merging files
$var= FileSelectFolder("Select folder", "","",$sourcepath) & "\"
if @error Then
    Exit
EndIf
IniWrite(@ScriptDir & "\PDFmerge.ini","LastFolder",1,$var)
$var2 = FileSaveDialog( "Select Filename", $var, "PDF Files (*.pdf)", 2)
If @error Then
    Exit
EndIf
MergeFiles($var,$var2)
; example end

func MergeFiles($SourcePath , $DestinationPath)

    if StringRight($DestinationPath,4) <> ".pdf" Then
        $DestinationPath = $DestinationPath&".pdf"
    EndIf

    if fileexists($DestinationPath) then
        FileDelete($DestinationPath)
    endif


    getfiles($sourcepath, "*.pdf")                                      ; Search Files in Dir
    if $filelist[0] < 2 then
        Msgbox(32,"Info", "Ther are less than two files in folder")
        exit
    endif

    ProgressOn("PDF Merge","Processing ...")

    $SourcePDF = ObjCreate("AcroExch.PDDoc")
    if IsObj($SourcePDF) then
        $b = $SourcePDF.Open($sourcepath & $filelist[1])
        $intSourcePgs = $SourcePDF.GetNumPages
        $mathcheck = _MathCheckDiv($intSourcePgs/2)
        if $mathcheck <> 2 Then
            $SourcePDF.Close
            $SourcePDF=""
            $TargetPDF = ObjCreate("AcroExch.PDDoc")
            $b = $TargetPDF.Open($sourcepath & $filelist[1])
            $intTargetPgs = $TargetPDF.GetNumPages
            $SourcePDF = ObjCreate("AcroExch.PDDoc")
            $b2 = $SourcePDF.Open($blank)
            $intSourcePgs = $SourcePDF.GetNumPages
            $c = $TargetPDF.InsertPages($intTargetPgs-1, $SourcePDF, 0, $intSourcePgs, False)
            $b = $TargetPDF.Save(1, $DestinationPath)
            $TargetPDF.Close
            $TargetPDF=""
            $SourcePDF.Close
            $SourcePDF=""
        Else
            $SourcePDF.Close
            $SourcePDF=""
            $TargetPDF = ObjCreate("AcroExch.PDDoc")
            $b = $TargetPDF.Open($sourcepath & $filelist[1])
            $intTargetPgs = $TargetPDF.GetNumPages
            $SourcePDF = ObjCreate("AcroExch.PDDoc")
            $b2 = $SourcePDF.Open($sourcepath & $filelist[1])
            $intSourcePgs = $SourcePDF.GetNumPages
            $c = $TargetPDF.InsertPages($intTargetPgs-1, $SourcePDF, 0, $intSourcePgs, False)
            $b = $TargetPDF.Save(1, $DestinationPath)
            $TargetPDF.Close
            $TargetPDF=""
            $SourcePDF.Close
            $SourcePDF=""
        EndIf

        $TargetPDF = ObjCreate("AcroExch.PDDoc")
        $b = $TargetPDF.Open($DestinationPath)
        $intTargetPgs = $TargetPDF.GetNumPages

        for $n=2 to UBound($filelist)-1

            ProgressSet((($n-1)/$filelist[0]) * 100, $filelist[$n-1])

            $SourcePDF = ObjCreate("AcroExch.PDDoc")
            $b = $SourcePDF.Open($sourcepath & $filelist[$n])
            $intSourcePgs = $SourcePDF.GetNumPages

            $c = $TargetPDF.InsertPages($intTargetPgs-1, $SourcePDF, 0, $intSourcePgs, False)
            $intTargetPgs += $intSourcePgs
            $SourcePDF.Close
            $SourcePDF=""
            if _MathCheckDiv($intTargetPgs/2) <> 2 Then
                $SourcePDF = ObjCreate("AcroExch.PDDoc")
                $b2 = $SourcePDF.Open($blank)
                $intSourcePgs = $SourcePDF.GetNumPages
                $c = $TargetPDF.InsertPages($intTargetPgs-1, $SourcePDF, 0, $intSourcePgs, False)
                $intTargetPgs += $intSourcePgs
                $SourcePDF.Close
                $SourcePDF=""
            EndIf
        next
        $b = $TargetPDF.Save(1, $DestinationPath)
        ProgressSet(100, $filelist[$filelist[0]])
        sleep(800)
        ProgressOff()
    endif
Endfunc

func getfiles($dir, $filter) ; search files in dir
Global $filelist[1]

    $n=0
    $search= FileFindFirstFile($dir & $filter)
; Check if the search was successful
    If $search = -1 Then
        MsgBox(0, "Error", "No files/directories matched the search pattern")
        Exit
    EndIf

    While 1
        $n=$n+1
        $file = FileFindNextFile($search)
        If @error Then ExitLoop
        redim $filelist[$n+1]
        $filelist[$n]= $file
        $filelist [0] = $n
    WEnd
; Close the search handle
    FileClose($search)
EndFunc

func GetPagesCount($target); count pages
    $AcroPDDoc = ObjCreate("AcroExch.PDDoc")
    if @error then return -1
    $bPDF = $AcroPDDoc.Open($target)
    if @error then return -2
    $Pages= $AcroPDDoc.GetNumPages
    $bPDF = $AcroPDDoc.Close
    return $pages
endfunc




; Feel free to improve the code...

; Greetings Mozat90

010101000110100001101001011100110010000001101001011100110010000

001101101011110010010000001110011011010010110011100100001

My Android cat and mouse game
https://play.google.com/store/apps/details?id=com.KaosVisions.WhiskersNSqueek

We're gonna need another Timmy!

Share this post


Link to post
Share on other sites

Create an account or sign in to comment

You need to be a member in order to leave a comment

Create an account

Sign up for a new account in our community. It's easy!


Register a new account

Sign in

Already have an account? Sign in here.


Sign In Now
Sign in to follow this  
Followers 0