Jump to content
Sign in to follow this  
SlowAndSteady

Image compare and duplicate finder

Recommended Posts

SlowAndSteady

Hello to all.

recently I had the problem of sifting through hundreds of images and find duplicates.

So I came up with this script. It uses basic histogram comparison of the Grey and RGB channels and can deal with resized images.

It worked for me and is quite stable. Maybe someone else can make use of it, too.

Any comments are welcome.

JD

#include <GDIPlus.au3>
#include <GUIConstantsEx.au3>
#include <GUIButton.au3>
#include <WindowsConstants.au3>
#include <MsgBoxConstants.au3>
#include <Misc.au3>
#include <Array.au3>
#include <File.au3>
#include <String.au3>
#include <Winapi.au3>
#include <GuiStatusBar.au3>

AutoItSetOption("MustDeclareVars", 1)
AutoItSetOption("GUIOnEventMode", 1)

Global $hMainImage, $hCompImage
Global $sFile, $sPath1, $sPath2, $aAllFiles, $FileName, $sFileMain, $aAllFilesMain, $sFileNameMain, $sFileNameComp
Global $iMaxLum_Main = 0, $iMaxLum_Comp = 0, $iSize, $iHistMatches = 0, $fMatchOverall = 0, $iMatchError, $iMatchErrorOverall
Global $tChannel_Main, $tChannel_Comp
Global $aHistogramFormat[] = [$GDIP_HistogramFormatGray, $GDIP_HistogramFormatR, $GDIP_HistogramFormatG, $GDIP_HistogramFormatB], $Format
Global $aCompSize[4][2] = [[160, 120],[320, 240],[480, 360],[640, 480]]
Global $hGui, $hGraphics, $hThumbMain, $hThumbComp, $hOKButton, $hLabel, $bWait, $hName1, $hName2, $hTemp, $hStatus, $Timer, $RunTimer, $hLogFile, $iRunningTime, $bSingleDir = False, $fNormFact

; please try different parameters
Global $iSens = 150, $iMatchThreshold = 80, $iCompSize = 2, $iInteractive = 1

If Not _GDIPlus_Startup() Then
    MsgBox($MB_SYSTEMMODAL, "ERROR", "GDIPlus.dll v1.1 not available")
    Exit
EndIf

$hGui = GUICreate("DupeFinder", 280, 170)
$hStatus = _GUICtrlStatusBar_Create($hGui, -1, "")

$hGraphics = _GDIPlus_GraphicsCreateFromHWND($hGui)
GUISetOnEvent($GUI_EVENT_CLOSE, "_Exit")
GUISetOnEvent($GUI_EVENT_RESTORE, "_Redraw")

$hName1 = GUICtrlCreateLabel("", 15, 95, 115, 20)
$hName2 = GUICtrlCreateLabel("", 155, 95, 115, 20)
$hOKButton = GUICtrlCreateButton("OK", 10, 110, 30, 20)
GUICtrlSetOnEvent($hOKButton, "_Continue")
$hLabel = GUICtrlCreateLabel("", 55, 110, 200, 20)

GUISetState(@SW_SHOW)

$sPath1 = FileSelectFolder("Select a Folder to compare", "", 0, "", $hGui)
if @error then _exit()
$aAllFilesMain = _FileListToArray($sPath1, "*.jpg", 1, True)

$sPath2 = FileSelectFolder("Select a Folder to compare", "", 0, $sPath1, $hGui)
if @error then _exit()
$aAllFiles = _FileListToArray($sPath2, "*.jpg", 1, True)

$hLogFile = FileOpen(@ScriptDir & "\dupefinder.log", 2)

If $sPath1 == $sPath2 Then $bSingleDir = True

$RunTimer = TimerInit()

For $sFileMain In $aAllFilesMain

    If IsInt($sFileMain) Then ContinueLoop

    $hTemp = _GDIPlus_ImageLoadFromFile($sFileMain)
    $hMainImage = _GDIPlus_ImageResize($hTemp, $aCompSize[$iCompSize][0], $aCompSize[$iCompSize][1])
    _GDIPlus_ImageDispose($hTemp)

    $sFileNameMain = _StringExplode($sFileMain, "\")
    _ArrayReverse($sFileNameMain)
    $sFileNameMain = $sFileNameMain[0]

    $hThumbMain = _Thumb($hMainImage)
    _GDIPlus_GraphicsDrawImage($hGraphics, $hThumbMain, 10, 10)

    GUICtrlSetData($hName1, $sFileNameMain)

    $Timer = TimerInit()

    If $bSingleDir Then _ArrayDelete($aAllFiles, 1)

    For $sFile In $aAllFiles


        If IsInt($sFile) Then ContinueLoop

        $hTemp = _GDIPlus_ImageLoadFromFile($sFile)
        $hCompImage = _GDIPlus_ImageResize($hTemp, $aCompSize[$iCompSize][0], $aCompSize[$iCompSize][1])
        _GDIPlus_ImageDispose($hTemp)

        $sFileNameComp = _StringExplode($sFile, "\")
        _ArrayReverse($sFileNameComp)
        $sFileNameComp = $sFileNameComp[0]



        $hThumbComp = _Thumb($hCompImage)
        _GDIPlus_GraphicsDrawImage($hGraphics, $hThumbComp, 150, 10)

        GUICtrlSetData($hName2, $sFileNameComp)

        ; Compare Channels
        $fMatchOverall = 0
        For $Format In $aHistogramFormat
            $iSize = _GDIPlus_BitmapGetHistogramSize($Format)

            $tChannel_Main = DllStructCreate("uint[" & $iSize & "];")
            _GDIPlus_BitmapGetHistogram($hMainImage, $Format, $iSize, $tChannel_Main)
            $iMaxLum_Main = 0
            For $i = 1 To $iSize
                If DllStructGetData($tChannel_Main, 1, $i) > $iMaxLum_Main Then $iMaxLum_Main = DllStructGetData($tChannel_Main, 1, $i)
            Next

            $tChannel_Comp = DllStructCreate("uint[" & $iSize & "];")
            _GDIPlus_BitmapGetHistogram($hCompImage, $Format, $iSize, $tChannel_Comp)
            $iMaxLum_Comp = 0
            For $i = 1 To $iSize
                If DllStructGetData($tChannel_Comp, 1, $i) > $iMaxLum_Comp Then $iMaxLum_Comp = DllStructGetData($tChannel_Comp, 1, $i)
            Next

            $fNormFact = $iMaxLum_Comp / $iMaxLum_Main

            $iHistMatches = 0
            $iMatchError = 0
            For $i = 1 To $iSize
                If Abs(DllStructGetData($tChannel_Main, 1, $i) * $fNormFact - DllStructGetData($tChannel_Comp, 1, $i)) < $iSens Then $iHistMatches += 1
            Next
            $fMatchOverall += $iHistMatches / $iSize * 100
        Next
        $fMatchOverall /= 4
        If $fMatchOverall > $iMatchThreshold Then
            If $iInteractive Then
                GUICtrlSetData($hLabel, "MATCH! " & Int($fMatchOverall) & "%")
                $bWait = True
                While $bWait
                    Sleep(100)
                WEnd
                GUICtrlSetData($hLabel, "")
            EndIf
            FileWriteLine($hLogFile, $sFileNameMain & " -> " & $sFileNameComp & " : " & Int($fMatchOverall) & "%")

        EndIf
        _GDIPlus_ImageDispose($hCompImage)
        _GDIPlus_BitmapDispose($hThumbComp)

    Next
    $iRunningTime = Int((Int($aAllFilesMain[0]) * TimerDiff($Timer) / 1000) - (TimerDiff($RunTimer) / 1000))
    _GUICtrlStatusBar_SetText($hStatus, "Estimated: " & Int($iRunningTime / 3600) & " h " & Int($iRunningTime / 60) & " m " & $iRunningTime - Int($iRunningTime / 60) * 60 & " s")
    _GDIPlus_ImageDispose($hMainImage)
    _GDIPlus_BitmapDispose($hThumbMain)
Next
_Exit()

Func _Redraw()
    _GDIPlus_GraphicsDrawImage($hGraphics, $hThumbMain, 10, 10)
    _GDIPlus_GraphicsDrawImage($hGraphics, $hThumbComp, 150, 10)
EndFunc   ;==>_Redraw


Func _Exit()
    _GDIPlus_ImageDispose($hCompImage)
    _GDIPlus_ImageDispose($hMainImage)
    _GDIPlus_GraphicsDispose($hGraphics)
    _GDIPlus_Shutdown()
    GUIDelete($hGui)
    FileClose($hLogFile)
    Exit
EndFunc   ;==>_Exit

Func _Thumb($hImage)

    Return _GDIPlus_ImageResize($hImage, 120, 80)

EndFunc   ;==>_Thumb

Func _Continue()
;~  ConsoleWrite("OK")
    $bWait = False
EndFunc   ;==>_Continue

Share this post


Link to post
Share on other sites
Vincor

Very interesting idea, will definitely test it!

Thanks

Share this post


Link to post
Share on other sites

Create an account or sign in to comment

You need to be a member in order to leave a comment

Create an account

Sign up for a new account in our community. It's easy!

Register a new account

Sign in

Already have an account? Sign in here.

Sign In Now
Sign in to follow this  

×