Jump to content

Merging pdfs


Go to solution Solved by kaotkbliss,

Recommended Posts

I've been playing with the following code trying to merge pdfs (we'll be getting hundreds-thousands of pdfs that I'll have to merge into 1 before doing other stuff to it in another program.)

The pdfs merge great the problem is, if a pdf is an odd number of pages I want to add an additional blank pdf to make it an even number but that doesn't seem to want to be merging in :(

#Region ;**** Directives created by AutoIt3Wrapper_GUI ****
#AutoIt3Wrapper_Outfile=H:\Client\BRVO\Daily\Print\TESTING FOLDER\PDF Merger.exe
#AutoIt3Wrapper_UseUpx=n
#AutoIt3Wrapper_Add_Constants=n
#EndRegion ;**** Directives created by AutoIt3Wrapper_GUI ****
#include <Math.au3>
Dim $filelist[1]

If Not FileExists(@ScriptDir & "\PDFmerge.ini") Then
    IniWrite(@ScriptDir & "\PDFmerge.ini","LastFolder",1,"")
EndIf



$sourcepath = IniRead(@ScriptDir & "\PDFmerge.ini","LastFolder",1,"")

; example  merging files
$var= FileSelectFolder("Select folder", "","",$sourcepath) & "\"
if @error Then
    Exit
EndIf
IniWrite(@ScriptDir & "\PDFmerge.ini","LastFolder",1,$var)
$var2 = FileSaveDialog( "Select Filename", $var, "PDF Files (*.pdf)", 2)
If @error Then
    Exit
EndIf
MergeFiles($var,$var2)
; example end



func GetRotation($Targetpath)
;----------------------------------------------------
; Get the Rotation of a Page (eg. page 1)
; Ex.: msgbox(0,"Rotation",GetRotation("c:\pdft\test.pdf" ))
;----------------------------------------------------

    if not FileExists($Targetpath) then
        return -1
    endif

    $SourcePDF = ObjCreate("AcroExch.PDDoc")

    if not IsObj($SourcePDF) then
        return -2
    endif

    $b = $SourcePDF.Open($Targetpath)
    $rotation = $sourcePDF.AcquirePage(0).GetRotate
    $SourcePDF.close
    $sourcePDF=""
    return $rotation                                ; in Degree
EndFunc



func MergeFiles($SourcePath , $DestinationPath)

    if StringRight($DestinationPath,4) <> ".pdf" Then
        $DestinationPath = $DestinationPath&".pdf"
    EndIf

    if fileexists($DestinationPath) then
        FileDelete($DestinationPath)
    endif


    getfiles($sourcepath, "*.pdf")                                      ; Search Files in Dir
    if $filelist[0] < 2 then
        Msgbox(32,"Info", "Ther are less than two files in folder")
        exit
    endif

    ProgressOn("PDF Merge","Processing ...")

    $SourcePDF = ObjCreate("AcroExch.PDDoc")
    if IsObj($SourcePDF) then
        $b = $SourcePDF.Open($sourcepath & $filelist[1])

    $TargetPDF = ObjCreate("AcroExch.PDDoc")
    $b = $TargetPDF.Open($sourcepath & $filelist[1])

    $intSourcePgs = $SourcePDF.GetNumPages
    $intInsertPgs = $TargetPDF.GetNumPages

    $SourcePDF.InsertPages($intSourcePgs-1, $TargetPDF, 0, $intInsertPgs, False)
    if _MathCheckDiv($intInsertPgs/2) <> 2 Then
        $TargetPDF2 = ObjCreate("AcroExch.PDDoc")
        $b2 = $TargetPDF2.Open("H:\Client\BRVO\Daily\Print\Blank.pdf")

        $intInsertPgs2 = $TargetPDF2.GetNumPages
        if not $SourcePDF.InsertPages($intSourcePgs + $intInsertPgs -1, $TargetPDF2, 0, $intInsertPgs2, False) = -1  then
            $TargetPDF2.Close
            $TargetPDF2=""
            $intInsertPgs += 1
        EndIf
    EndIf
    $b = $SourcePDF.Save(1, $DestinationPath)
    $SourcePDF.Close
    $SourcePDF=""

    $SourcePDF = ObjCreate("AcroExch.PDDoc")
    if not IsObj($SourcePDF) then
        return -2
    endif

    $b = $SourcePDF.Open($DestinationPath)
    $intSourcePgs = $SourcePDF.GetNumPages

    for $n=2 to UBound($filelist)-1

        ProgressSet((($n-1)/$filelist[0]) * 100, $filelist[$n-1])

        $TargetPDF = ObjCreate("AcroExch.PDDoc")
        $b = $TargetPDF.Open($sourcepath & $filelist[$n])


        $intInsertPgs = $TargetPDF.GetNumPages

        if not $SourcePDF.InsertPages($intSourcePgs-1, $TargetPDF, 0, $intInsertPgs, False) = -1  then
            if _MathCheckDiv($intInsertPgs/2) <> 2 Then
                $TargetPDF2 = ObjCreate("AcroExch.PDDoc")
                $b2 = $TargetPDF2.Open("H:\Client\BRVO\Daily\Print\Blank.pdf")


                $intInsertPgs2 = $TargetPDF2.GetNumPages
                if not $SourcePDF.InsertPages($intSourcePgs + $intInsertPgs -1, $TargetPDF2, 0, $intInsertPgs2, False) = -1  then
                    $TargetPDF2.Close
                    $TargetPDF2=""
                    $intInsertPgs += 1
                EndIf
            EndIf
            $TargetPDF.Close
            $TargetPDF=""
        endif

        $intSourcePgs += $intInsertPgs


    next
    $b = $SourcePDF.Save(1, $DestinationPath)
    ProgressSet(100, $filelist[$filelist[0]])
    sleep(800)
    ProgressOff()
    endif

Endfunc




func getfiles($dir, $filter) ; search files in dir
Global $filelist[1]

    $n=0
    $search= FileFindFirstFile($dir & $filter)
; Check if the search was successful
    If $search = -1 Then
        MsgBox(0, "Error", "No files/directories matched the search pattern")
        Exit
    EndIf

    While 1
        $n=$n+1
        $file = FileFindNextFile($search)
        If @error Then ExitLoop
        redim $filelist[$n+1]
        $filelist[$n]= $file
        $filelist [0] = $n
    WEnd
; Close the search handle
    FileClose($search)
EndFunc

#cs
func PDFMerge($File1, $File2, $Fileout)
; ---------------------------------------
; File1 first  file
; File2 second file (inserted after)
; Fileout is the saving name
;
; returns  1 on success
; returns -1 Error insert Pages
; returns -2 Error Object Create
;----------------------------------------

    $SourcePDF = ObjCreate("AcroExch.PDDoc")
    if not IsObj($SourcePDF) then
        return -2
    endif

    $b = $SourcePDF.Open($sourcepath & $filelist[$n-1])

    $TargetPDF = ObjCreate("AcroExch.PDDoc")
    $b = $TargetPDF.Open($sourcepath & $filelist[$n])

    $intSourcePgs = $SourcePDF.GetNumPages
    $intInsertPgs = $TargetPDF.GetNumPages
    if not $SourcePDF.InsertPages($intSourcePgs-1, $TargetPDF, 0, $intInsertPgs, False) = -1  then
        $SourcePDF.Close
        $TargetPDF.Close
        $SourcePDF=""
        $TargetPDF=""
        return -1
    endif
    $b = $SourcePDF.Save(1, $DestinationPath)
    $SourcePDF.Close
    $TargetPDF.Close
    $SourcePDF=""
    $TargetPDF=""
    return 1
endfunc
#ce


func GetPagesCount($target); count pages
    $AcroPDDoc = ObjCreate("AcroExch.PDDoc")
    if @error then return -1
    $bPDF = $AcroPDDoc.Open($target)
    if @error then return -2
    $Pages= $AcroPDDoc.GetNumPages
    $bPDF = $AcroPDDoc.Close
    return $pages
endfunc




; Feel free to improve the code...

; Greetings Mozat90

010101000110100001101001011100110010000001101001011100110010000

001101101011110010010000001110011011010010110011100100001

My Android cat and mouse game
https://play.google.com/store/apps/details?id=com.KaosVisions.WhiskersNSqueek

We're gonna need another Timmy!

Link to comment
Share on other sites

Have you checked $b2 to make sure that it is able to open $TargetPDF2? Ā  Is that a full-page blank sheet or an empty file?

Ian

My projects:

  • IP ScannerĀ - Multi-threaded ping tool to scan your available networks for used and available IP addresses, shows ping times, resolves IPs in to host names, and allows individual IPs to be pinged.
  • INFSniffĀ - Great technicians tool - a tool which scansĀ DriverPacksĀ archives for INF files and parses out the HWIDs to a database file, and rapidly scans the local machine's HWIDs, searches the database for matches, and installs them.
  • PPK3Ā (Persistent Process Killer V3) - Another for the techs - suppress running processes that you need to keep away, helpful when fighting spyware/viruses.
  • Sync ToolĀ - Folder sync tool with lots of real time information and several checking methods.
  • USMT Front EndĀ - Front End for Microsoft's User State Migration Tool, including all files needed for USMT 3.01 and 4.01, 32 bit and 64 bit versions.
  • Audit ToolĀ - Computer audit tool to gather vital hardware, Windows, and Office information for IT managers and field techs. Capabilities include creating a customized site agent.
  • CSV ViewerĀ - Displays CSV files with automatic column sizing and font selection. Lines can also be copied to the clipboard for data extraction.
  • MyDirStatĀ - Lists number and size of files on a drive or specified path, allows for deletion within the app.
  • 2048 GameĀ - My version of 2048, fun tile game.
  • Juice LabĀ - Ecigarette liquid making calculator.
  • Data ProtectorĀ - Secure notes to save sensitive information.
  • VHD FooterĀ - Add a footer to a forensic hard drive image to allow it to be mounted or used as a virtual machine hard drive.
  • Find in FileĀ - Searches files containing a specified phrase.
Link to comment
Share on other sites

It's returning "True" as expected (able to open file) and it's a full blank page.

010101000110100001101001011100110010000001101001011100110010000

001101101011110010010000001110011011010010110011100100001

My Android cat and mouse game
https://play.google.com/store/apps/details?id=com.KaosVisions.WhiskersNSqueek

We're gonna need another Timmy!

Link to comment
Share on other sites

What additional information is needed to help solve this? It shouldn't be hard to adapt for testing as it's set up for you to choose your own folder of PDFs. The only thing that would need changed is the location of the blank PDF.

010101000110100001101001011100110010000001101001011100110010000

001101101011110010010000001110011011010010110011100100001

My Android cat and mouse game
https://play.google.com/store/apps/details?id=com.KaosVisions.WhiskersNSqueek

We're gonna need another Timmy!

Link to comment
Share on other sites

  • Moderators

What does:

$intInsertPgs2 = $TargetPDF2.GetNumPages

return?

Common sense plays a role in the basics of understanding AutoIt... If you're lacking in that, do us all a favor, and step away from the computer.

Link to comment
Share on other sites

I hadn't thought of checking that. I would assume it returns a 1 as that's how many pages are in the PDF, but I will check it to make sure and be back with the result.

*edit*

Just as suspected, it returns a 1. I tried kylomis' suggestion and added 1 to that, but that didn't merge all the PDFs (our test folder has 300 PDFs of 7 pages so I should get 2400 pages. I only ended up with 930 some) and there was still no blank page added to the ones that did merge :(

Edited by kaotkbliss

010101000110100001101001011100110010000001101001011100110010000

001101101011110010010000001110011011010010110011100100001

My Android cat and mouse game
https://play.google.com/store/apps/details?id=com.KaosVisions.WhiskersNSqueek

We're gonna need another Timmy!

Link to comment
Share on other sites

  • Moderators

You could always contact the original author of the code you're using, only looks like you've changed a couple of things that mozart90 wrote, maybe they could help.

I wrote a pdf library some time ago, I may still have it on a backup flash drive somewhere, I'll have a look tomorrow.

In the meantime, this guy seems to have written a pretty extensive library that may come in better use for you:

'?do=embed' frameborder='0' data-embedContent>>

He documented the code pretty well, and has examples provided in the zip as well.

Common sense plays a role in the basics of understanding AutoIt... If you're lacking in that, do us all a favor, and step away from the computer.

Link to comment
Share on other sites

Thanks @SmOke_N

@kaotkbliss

Merge is in Lite version of DebenuLibrary, so you can use it for free.
_QPDF_Example_MergeFiles_Lite()

In QuickPDF_Examples.au3
There is _QPDF_Example_MergeFiles() but this is for commercial version DebenuLibrary.

Ā 

But no worry look in my UDF thread for new post.

If you encounter any problems then ask in topic mentioned by @SmOke_N

Signature beginning:
*Ā Please remember: "AutoIt".....Ā *Ā Ā Wondering who uses AutoIt and what it can be used for ?Ā *Ā Forum RulesĀ *
*Ā ADO.au3 UDFĀ *Ā POP3.au3 UDFĀ *Ā XML.au3 UDFĀ *Ā IE on Windows 11 * How to ask ChatGPT for AutoIt Code *Ā for other useful stuffĀ click the following button:

Spoiler

AnyĀ of myĀ own codeĀ posted anywhere on the forumĀ isĀ available for use by others without any restrictionĀ of any kind.Ā 

My contribution (my own projects):Ā *Ā Debenu Quick PDF Library - UDFĀ *Ā Debenu PDF Viewer SDK - UDFĀ *Ā Acrobat Reader - ActiveX ViewerĀ * UDF for PDFCreator v1.x.xĀ *Ā XZip - UDFĀ *Ā AppCompatFlagsĀ UDFĀ *Ā CrowdinAPIĀ UDFĀ *Ā _WinMergeCompare2Files()Ā *Ā _JavaExceptionAdd()Ā *Ā _IsBeta()Ā *Ā Writing DPI Awareness App - workaroundĀ *Ā _AutoIt_RequiredVersion()Ā * Chilkatsoft.au3 UDFĀ *Ā TeamViewer.au3 UDFĀ *Ā JavaManagement UDFĀ *Ā VIES over SOAPĀ * WinSCP UDFĀ * GHAPI UDF - modest begining - comunication with GitHub REST API *Ā ErrorLog.au3 UDF - A logging LibraryĀ *Ā Include Dependency Tree (Tool for analyzing script relations)Ā *Ā Show_Macro_Values.au3 *

Ā 

My contribution to others projects or UDF based on Ā others projects:Ā *Ā _sql.au3 UDFĀ Ā * POP3.au3 UDFĀ * Ā RTF Printer - UDFĀ * XML.au3 UDFĀ * ADO.au3 UDF *Ā SMTP Mailer UDFĀ *Ā Dual Monitor resolution detection * *Ā 2GUI on Dual Monitor System * _SciLexer.au3 UDFĀ *Ā SciTE - Lexer for console pane *Ā 

Useful links:Ā * Forum RulesĀ *Ā Forum etiquetteĀ *Ā  Forum Information and FAQsĀ *Ā How to post code on the forumĀ *Ā AutoIt Online DocumentationĀ *Ā AutoIt Online Beta DocumentationĀ *Ā SciTE4AutoIt3 getting startedĀ *Ā Convert text blocks to AutoIt codeĀ *Ā Games made in AutoitĀ *Ā Programming related sitesĀ *Ā Polish AutoIt TutorialĀ *Ā DllCall Code GeneratorĀ *Ā 

Wiki:Ā *Ā Expand your knowledge - AutoIt WikiĀ *Ā Collection of User Defined FunctionsĀ *Ā How to use HelpFileĀ *Ā Good coding practices in AutoItĀ *Ā 

OpenOffice/LibreOffice/XLS Related:Ā WriterDemo.au3Ā *Ā XLS/MDB from scratch with ADOX

IE Related:Ā Ā *Ā How to use IE.au3 Ā UDF with Ā AutoIt v3.3.14.xĀ *Ā Why isn't Autoit able to click a Javascript Dialog?Ā *Ā Clicking javascript button with no IDĀ *Ā IE document >> save as MHT fileĀ * IETab Switcher (by LarsJ )Ā *Ā HTML Entities * _IEquerySelectorAll() (by uncommon)Ā *Ā IE in TaskScheduler *Ā IE Embedded Control Versioning (use IE9+ and HTML5 in a GUI)Ā *Ā PDFĀ Related: *Ā How to get reference to PDF object embeded in IE * IE on Windows 11 *Ā 

I encourage you to read:Ā * Global VarsĀ * Best Coding PracticesĀ *Ā Please explain code used in Help file for several File functionsĀ *Ā OOP-like approach in AutoItĀ * UDF-Spec QuestionsĀ *Ā  EXAMPLE: How To Catch ConsoleWrite() output to a file or to CMDĀ *

I also encourage you to check awesome @trancexxĀ code:Ā  *Ā Create COM objects from modules without any demand on user to register anything. * Another COM object registering stuff *Ā OnHungApp handler *Ā Avoid "AutoIt Error" message box in unknown errorsĀ Ā *Ā HTML editor *Ā 

winhttp.au3 related :Ā *Ā https://www.autoitscript.com/forum/topic/206771-winhttpau3-download-problem-youre-speaking-plain-http-to-an-ssl-enabled-server-port/

"Homo sum; humani nil a me alienum puto" - Publius Terentius Afer
"Program are meant to be read by humans and only incidentally for computers and execute" - Donald Knuth, "The Art of Computer Programming"
:naughty:Ā Ā :ranting:, beĀ Ā :)Ā and Ā  Ā  Ā  \\//_.

Anticipating ErrorsĀ :Ā Ā "Any program that accepts data from a user must include code to validate that data before sending it to the data store. You cannot rely on the data store, ...., or even your programming language to notify you of problems. You must check every byte entered by your users, making sure that data is the correct type for its field and that required fields are not empty."

Signature last update: 2023-04-24

Link to comment
Share on other sites

  • Solution

I managed to get this to work :)

it opens the first pdf and counts the pages, if it's an odd number, inserts the blank page.

Then starts merging the rest of the pdfs and if the total number of merged pages each loop is odd, it adds another blank page.

OneĀ of the problems were the insertpages lines, that function returns a true or false and it was checking for -1 (just removed it completely)

I also renamed the pdf variables so it made more sense to me (source = pdf you're reading from, target = pdf you want to add to) which also helped me rewrite the code.

Using the function as copied from the forums here, it took about a half hour to merge around 700 pdfs.

Using the rewrite, it takes just around a minute for that same 700 (plus adding in blank pages)

Just replace the variable $blank at the top of the script to the path of whatever you want to insert if the number of pages are odd and that's it :)

#include <Math.au3>
Dim $filelist[1]
$blank = "H:\Client\BRVO\Daily\Print\Blank.pdf"

If Not FileExists(@ScriptDir & "\PDFmerge.ini") Then
    IniWrite(@ScriptDir & "\PDFmerge.ini","LastFolder",1,"")
EndIf



$sourcepath = IniRead(@ScriptDir & "\PDFmerge.ini","LastFolder",1,"")

; example  merging files
$var= FileSelectFolder("Select folder", "","",$sourcepath) & "\"
if @error Then
    Exit
EndIf
IniWrite(@ScriptDir & "\PDFmerge.ini","LastFolder",1,$var)
$var2 = FileSaveDialog( "Select Filename", $var, "PDF Files (*.pdf)", 2)
If @error Then
    Exit
EndIf
MergeFiles($var,$var2)
; example end

func MergeFiles($SourcePath , $DestinationPath)

    if StringRight($DestinationPath,4) <> ".pdf" Then
        $DestinationPath = $DestinationPath&".pdf"
    EndIf

    if fileexists($DestinationPath) then
        FileDelete($DestinationPath)
    endif


    getfiles($sourcepath, "*.pdf")                                      ; Search Files in Dir
    if $filelist[0] < 2 then
        Msgbox(32,"Info", "Ther are less than two files in folder")
        exit
    endif

    ProgressOn("PDF Merge","Processing ...")

    $SourcePDF = ObjCreate("AcroExch.PDDoc")
    if IsObj($SourcePDF) then
        $b = $SourcePDF.Open($sourcepath & $filelist[1])
        $intSourcePgs = $SourcePDF.GetNumPages
        $mathcheck = _MathCheckDiv($intSourcePgs/2)
        if $mathcheck <> 2 Then
            $SourcePDF.Close
            $SourcePDF=""
            $TargetPDF = ObjCreate("AcroExch.PDDoc")
            $b = $TargetPDF.Open($sourcepath & $filelist[1])
            $intTargetPgs = $TargetPDF.GetNumPages
            $SourcePDF = ObjCreate("AcroExch.PDDoc")
            $b2 = $SourcePDF.Open($blank)
            $intSourcePgs = $SourcePDF.GetNumPages
            $c = $TargetPDF.InsertPages($intTargetPgs-1, $SourcePDF, 0, $intSourcePgs, False)
            $b = $TargetPDF.Save(1, $DestinationPath)
            $TargetPDF.Close
            $TargetPDF=""
            $SourcePDF.Close
            $SourcePDF=""
        Else
            $SourcePDF.Close
            $SourcePDF=""
            $TargetPDF = ObjCreate("AcroExch.PDDoc")
            $b = $TargetPDF.Open($sourcepath & $filelist[1])
            $intTargetPgs = $TargetPDF.GetNumPages
            $SourcePDF = ObjCreate("AcroExch.PDDoc")
            $b2 = $SourcePDF.Open($sourcepath & $filelist[1])
            $intSourcePgs = $SourcePDF.GetNumPages
            $c = $TargetPDF.InsertPages($intTargetPgs-1, $SourcePDF, 0, $intSourcePgs, False)
            $b = $TargetPDF.Save(1, $DestinationPath)
            $TargetPDF.Close
            $TargetPDF=""
            $SourcePDF.Close
            $SourcePDF=""
        EndIf

        $TargetPDF = ObjCreate("AcroExch.PDDoc")
        $b = $TargetPDF.Open($DestinationPath)
        $intTargetPgs = $TargetPDF.GetNumPages

        for $n=2 to UBound($filelist)-1

            ProgressSet((($n-1)/$filelist[0]) * 100, $filelist[$n-1])

            $SourcePDF = ObjCreate("AcroExch.PDDoc")
            $b = $SourcePDF.Open($sourcepath & $filelist[$n])
            $intSourcePgs = $SourcePDF.GetNumPages

            $c = $TargetPDF.InsertPages($intTargetPgs-1, $SourcePDF, 0, $intSourcePgs, False)
            $intTargetPgs += $intSourcePgs
            $SourcePDF.Close
            $SourcePDF=""
            if _MathCheckDiv($intTargetPgs/2) <> 2 Then
                $SourcePDF = ObjCreate("AcroExch.PDDoc")
                $b2 = $SourcePDF.Open($blank)
                $intSourcePgs = $SourcePDF.GetNumPages
                $c = $TargetPDF.InsertPages($intTargetPgs-1, $SourcePDF, 0, $intSourcePgs, False)
                $intTargetPgs += $intSourcePgs
                $SourcePDF.Close
                $SourcePDF=""
            EndIf
        next
        $b = $TargetPDF.Save(1, $DestinationPath)
        ProgressSet(100, $filelist[$filelist[0]])
        sleep(800)
        ProgressOff()
    endif
Endfunc

func getfiles($dir, $filter) ; search files in dir
Global $filelist[1]

    $n=0
    $search= FileFindFirstFile($dir & $filter)
; Check if the search was successful
    If $search = -1 Then
        MsgBox(0, "Error", "No files/directories matched the search pattern")
        Exit
    EndIf

    While 1
        $n=$n+1
        $file = FileFindNextFile($search)
        If @error Then ExitLoop
        redim $filelist[$n+1]
        $filelist[$n]= $file
        $filelist [0] = $n
    WEnd
; Close the search handle
    FileClose($search)
EndFunc

func GetPagesCount($target); count pages
    $AcroPDDoc = ObjCreate("AcroExch.PDDoc")
    if @error then return -1
    $bPDF = $AcroPDDoc.Open($target)
    if @error then return -2
    $Pages= $AcroPDDoc.GetNumPages
    $bPDF = $AcroPDDoc.Close
    return $pages
endfunc




; Feel free to improve the code...

; Greetings Mozat90

010101000110100001101001011100110010000001101001011100110010000

001101101011110010010000001110011011010010110011100100001

My Android cat and mouse game
https://play.google.com/store/apps/details?id=com.KaosVisions.WhiskersNSqueek

We're gonna need another Timmy!

Link to comment
Share on other sites

Create an account or sign in to comment

You need to be a member in order to leave a comment

Create an account

Sign up for a new account in our community. It's easy!

Register a new account

Sign in

Already have an account? Sign in here.

Sign In Now
 Share

  • Recently Browsing   0 members

    • No registered users viewing this page.
×
×
  • Create New...