trancexx

CRC32, MD4, MD5, SHA1 -for files

49 posts in this topic

#1 ·  Posted (edited)

When processing files, especially large ones, you will always lose battle if riding AutoIt. Not to mention RAM suffocation. So, what if I leave the whole process on a level that is few steps below me and just collect the cream?

Script includes functions and small example:

Opt("MustDeclareVars", 1)

Global $sFile = FileOpenDialog("Choose file", "", "All files (*)")
If @error Then Exit


Global $hTimer, $iTimer, $sData

;------------------------------------------------------------------------
; CRC32:
$hTimer = TimerInit()

$sData = _CRC32ForFile($sFile)
$iTimer = TimerDiff($hTimer)

ConsoleWrite("! CRC32 took " & $iTimer & " ms" & @CRLF)
ConsoleWrite("Result: " & $sData & @CRLF & @CRLF)
;------------------------------------------------------------------------
; MD4:
$hTimer = TimerInit()

$sData = _MD4ForFile($sFile)
$iTimer = TimerDiff($hTimer)

ConsoleWrite("! MD4 took " & $iTimer & " ms" & @CRLF)
ConsoleWrite("Result: " & $sData & @CRLF & @CRLF)
;------------------------------------------------------------------------
; MD5:
$hTimer = TimerInit()

$sData = _MD5ForFile($sFile)
$iTimer = TimerDiff($hTimer)

ConsoleWrite("! MD5 took " & $iTimer & " ms" & @CRLF)
ConsoleWrite("Result: " & $sData & @CRLF & @CRLF)
;------------------------------------------------------------------------
; SHA1:
$hTimer = TimerInit()

$sData = _SHA1ForFile($sFile)
$iTimer = TimerDiff($hTimer)

ConsoleWrite("! SHA1 took " & $iTimer & " ms" & @CRLF)
ConsoleWrite("Result: " & $sData & @CRLF & @CRLF)
;------------------------------------------------------------------------



; Functions...


; #FUNCTION# ;===============================================================================
;
; Name...........: _CRC32ForFile
; Description ...: Calculates CRC32 value for the specific file.
; Syntax.........: _CRC32ForFile ($sFile)
; Parameters ....: $sFile - Full path to the file to process.
; Return values .: Success - Returns CRC32 value in form of hex string
;                          - Sets @error to 0
;                  Failure - Returns empty string and sets @error:
;                  |1 - CreateFile function or call to it failed.
;                  |2 - CreateFileMapping function or call to it failed.
;                  |3 - MapViewOfFile function or call to it failed.
;                  |4 - RtlComputeCrc32 function or call to it failed.
; Author ........: trancexx
;
;==========================================================================================
Func _CRC32ForFile($sFile)

    Local $a_hCall = DllCall("kernel32.dll", "hwnd", "CreateFileW", _
            "wstr", $sFile, _
            "dword", 0x80000000, _ ; GENERIC_READ
            "dword", 3, _ ; FILE_SHARE_READ|FILE_SHARE_WRITE
            "ptr", 0, _
            "dword", 3, _ ; OPEN_EXISTING
            "dword", 0, _ ; SECURITY_ANONYMOUS
            "ptr", 0)

    If @error Or $a_hCall[0] = -1 Then
        Return SetError(1, 0, "")
    EndIf

    Local $hFile = $a_hCall[0]

    $a_hCall = DllCall("kernel32.dll", "ptr", "CreateFileMappingW", _
            "hwnd", $hFile, _
            "dword", 0, _ ; default security descriptor
            "dword", 2, _ ; PAGE_READONLY
            "dword", 0, _
            "dword", 0, _
            "ptr", 0)

    If @error Or Not $a_hCall[0] Then
        DllCall("kernel32.dll", "int", "CloseHandle", "hwnd", $hFile)
        Return SetError(2, 0, "")
    EndIf

    DllCall("kernel32.dll", "int", "CloseHandle", "hwnd", $hFile)

    Local $hFileMappingObject = $a_hCall[0]

    $a_hCall = DllCall("kernel32.dll", "ptr", "MapViewOfFile", _
            "hwnd", $hFileMappingObject, _
            "dword", 4, _ ; FILE_MAP_READ
            "dword", 0, _
            "dword", 0, _
            "dword", 0)

    If @error Or Not $a_hCall[0] Then
        DllCall("kernel32.dll", "int", "CloseHandle", "hwnd", $hFileMappingObject)
        Return SetError(3, 0, "")
    EndIf

    Local $pFile = $a_hCall[0]
    Local $iBufferSize = FileGetSize($sFile)

    Local $a_iCall = DllCall("ntdll.dll", "dword", "RtlComputeCrc32", _
            "dword", 0, _
            "ptr", $pFile, _
            "int", $iBufferSize)

    If @error Or Not $a_iCall[0] Then
        DllCall("kernel32.dll", "int", "UnmapViewOfFile", "ptr", $pFile)
        DllCall("kernel32.dll", "int", "CloseHandle", "hwnd", $hFileMappingObject)
        Return SetError(4, 0, "")
    EndIf

    DllCall("kernel32.dll", "int", "UnmapViewOfFile", "ptr", $pFile)
    DllCall("kernel32.dll", "int", "CloseHandle", "hwnd", $hFileMappingObject)

    Local $iCRC32 = $a_iCall[0]

    Return SetError(0, 0, Hex($iCRC32))

EndFunc   ;==>_CRC32ForFile


; #FUNCTION# ;===============================================================================
;
; Name...........: _MD4ForFile
; Description ...: Calculates MD4 value for the specific file.
; Syntax.........: _MD4ForFile ($sFile)
; Parameters ....: $sFile - Full path to the file to process.
; Return values .: Success - Returns MD4 value in form of hex string
;                          - Sets @error to 0
;                  Failure - Returns empty string and sets @error:
;                  |1 - CreateFile function or call to it failed.
;                  |2 - CreateFileMapping function or call to it failed.
;                  |3 - MapViewOfFile function or call to it failed.
;                  |4 - MD4Init function or call to it failed.
;                  |5 - MD4Update function or call to it failed.
;                  |6 - MD4Final function or call to it failed.
; Author ........: trancexx
;
;==========================================================================================
Func _MD4ForFile($sFile)

    Local $a_hCall = DllCall("kernel32.dll", "hwnd", "CreateFileW", _
            "wstr", $sFile, _
            "dword", 0x80000000, _ ; GENERIC_READ
            "dword", 3, _ ; FILE_SHARE_READ|FILE_SHARE_WRITE
            "ptr", 0, _
            "dword", 3, _ ; OPEN_EXISTING
            "dword", 0, _ ; SECURITY_ANONYMOUS
            "ptr", 0)

    If @error Or $a_hCall[0] = -1 Then
        Return SetError(1, 0, "")
    EndIf

    Local $hFile = $a_hCall[0]

    $a_hCall = DllCall("kernel32.dll", "ptr", "CreateFileMappingW", _
            "hwnd", $hFile, _
            "dword", 0, _ ; default security descriptor
            "dword", 2, _ ; PAGE_READONLY
            "dword", 0, _
            "dword", 0, _
            "ptr", 0)

    If @error Or Not $a_hCall[0] Then
        DllCall("kernel32.dll", "int", "CloseHandle", "hwnd", $hFile)
        Return SetError(2, 0, "")
    EndIf

    DllCall("kernel32.dll", "int", "CloseHandle", "hwnd", $hFile)

    Local $hFileMappingObject = $a_hCall[0]

    $a_hCall = DllCall("kernel32.dll", "ptr", "MapViewOfFile", _
            "hwnd", $hFileMappingObject, _
            "dword", 4, _ ; FILE_MAP_READ
            "dword", 0, _
            "dword", 0, _
            "dword", 0)

    If @error Or Not $a_hCall[0] Then
        DllCall("kernel32.dll", "int", "CloseHandle", "hwnd", $hFileMappingObject)
        Return SetError(3, 0, "")
    EndIf

    Local $pFile = $a_hCall[0]
    Local $iBufferSize = FileGetSize($sFile)

    Local $tMD4_CTX = DllStructCreate("dword i[2];" & _
            "dword buf[4];" & _
            "ubyte in[64];" & _
            "ubyte digest[16]")

    DllCall("advapi32.dll", "none", "MD4Init", "ptr", DllStructGetPtr($tMD4_CTX))

    If @error Then
        DllCall("kernel32.dll", "int", "UnmapViewOfFile", "ptr", $pFile)
        DllCall("kernel32.dll", "int", "CloseHandle", "hwnd", $hFileMappingObject)
        Return SetError(4, 0, "")
    EndIf

    DllCall("advapi32.dll", "none", "MD4Update", _
            "ptr", DllStructGetPtr($tMD4_CTX), _
            "ptr", $pFile, _
            "dword", $iBufferSize)

    If @error Then
        DllCall("kernel32.dll", "int", "UnmapViewOfFile", "ptr", $pFile)
        DllCall("kernel32.dll", "int", "CloseHandle", "hwnd", $hFileMappingObject)
        Return SetError(5, 0, "")
    EndIf

    DllCall("advapi32.dll", "none", "MD4Final", "ptr", DllStructGetPtr($tMD4_CTX))

    If @error Then
        DllCall("kernel32.dll", "int", "UnmapViewOfFile", "ptr", $pFile)
        DllCall("kernel32.dll", "int", "CloseHandle", "hwnd", $hFileMappingObject)
        Return SetError(6, 0, "")
    EndIf

    DllCall("kernel32.dll", "int", "UnmapViewOfFile", "ptr", $pFile)
    DllCall("kernel32.dll", "int", "CloseHandle", "hwnd", $hFileMappingObject)

    Local $sMD4 = Hex(DllStructGetData($tMD4_CTX, "digest"))

    Return SetError(0, 0, $sMD4)

EndFunc   ;==>_MD4ForFile


; #FUNCTION# ;===============================================================================
;
; Name...........: _MD5ForFile
; Description ...: Calculates MD5 value for the specific file.
; Syntax.........: _MD5ForFile ($sFile)
; Parameters ....: $sFile - Full path to the file to process.
; Return values .: Success - Returns MD5 value in form of hex string
;                          - Sets @error to 0
;                  Failure - Returns empty string and sets @error:
;                  |1 - CreateFile function or call to it failed.
;                  |2 - CreateFileMapping function or call to it failed.
;                  |3 - MapViewOfFile function or call to it failed.
;                  |4 - MD5Init function or call to it failed.
;                  |5 - MD5Update function or call to it failed.
;                  |6 - MD5Final function or call to it failed.
; Author ........: trancexx
;
;==========================================================================================
Func _MD5ForFile($sFile)

    Local $a_hCall = DllCall("kernel32.dll", "hwnd", "CreateFileW", _
            "wstr", $sFile, _
            "dword", 0x80000000, _ ; GENERIC_READ
            "dword", 3, _ ; FILE_SHARE_READ|FILE_SHARE_WRITE
            "ptr", 0, _
            "dword", 3, _ ; OPEN_EXISTING
            "dword", 0, _ ; SECURITY_ANONYMOUS
            "ptr", 0)

    If @error Or $a_hCall[0] = -1 Then
        Return SetError(1, 0, "")
    EndIf

    Local $hFile = $a_hCall[0]

    $a_hCall = DllCall("kernel32.dll", "ptr", "CreateFileMappingW", _
            "hwnd", $hFile, _
            "dword", 0, _ ; default security descriptor
            "dword", 2, _ ; PAGE_READONLY
            "dword", 0, _
            "dword", 0, _
            "ptr", 0)

    If @error Or Not $a_hCall[0] Then
        DllCall("kernel32.dll", "int", "CloseHandle", "hwnd", $hFile)
        Return SetError(2, 0, "")
    EndIf

    DllCall("kernel32.dll", "int", "CloseHandle", "hwnd", $hFile)

    Local $hFileMappingObject = $a_hCall[0]

    $a_hCall = DllCall("kernel32.dll", "ptr", "MapViewOfFile", _
            "hwnd", $hFileMappingObject, _
            "dword", 4, _ ; FILE_MAP_READ
            "dword", 0, _
            "dword", 0, _
            "dword", 0)

    If @error Or Not $a_hCall[0] Then
        DllCall("kernel32.dll", "int", "CloseHandle", "hwnd", $hFileMappingObject)
        Return SetError(3, 0, "")
    EndIf

    Local $pFile = $a_hCall[0]
    Local $iBufferSize = FileGetSize($sFile)

    Local $tMD5_CTX = DllStructCreate("dword i[2];" & _
            "dword buf[4];" & _
            "ubyte in[64];" & _
            "ubyte digest[16]")

    DllCall("advapi32.dll", "none", "MD5Init", "ptr", DllStructGetPtr($tMD5_CTX))

    If @error Then
        DllCall("kernel32.dll", "int", "UnmapViewOfFile", "ptr", $pFile)
        DllCall("kernel32.dll", "int", "CloseHandle", "hwnd", $hFileMappingObject)
        Return SetError(4, 0, "")
    EndIf

    DllCall("advapi32.dll", "none", "MD5Update", _
            "ptr", DllStructGetPtr($tMD5_CTX), _
            "ptr", $pFile, _
            "dword", $iBufferSize)

    If @error Then
        DllCall("kernel32.dll", "int", "UnmapViewOfFile", "ptr", $pFile)
        DllCall("kernel32.dll", "int", "CloseHandle", "hwnd", $hFileMappingObject)
        Return SetError(5, 0, "")
    EndIf

    DllCall("advapi32.dll", "none", "MD5Final", "ptr", DllStructGetPtr($tMD5_CTX))

    If @error Then
        DllCall("kernel32.dll", "int", "UnmapViewOfFile", "ptr", $pFile)
        DllCall("kernel32.dll", "int", "CloseHandle", "hwnd", $hFileMappingObject)
        Return SetError(6, 0, "")
    EndIf

    DllCall("kernel32.dll", "int", "UnmapViewOfFile", "ptr", $pFile)
    DllCall("kernel32.dll", "int", "CloseHandle", "hwnd", $hFileMappingObject)

    Local $sMD5 = Hex(DllStructGetData($tMD5_CTX, "digest"))

    Return SetError(0, 0, $sMD5)

EndFunc   ;==>_MD5ForFile


; #FUNCTION# ;===============================================================================
;
; Name...........: _SHA1ForFile
; Description ...: Calculates SHA1 value for the specific file.
; Syntax.........: _SHA1ForFile ($sFile)
; Parameters ....: $sFile - Full path to the file to process.
; Return values .: Success - Returns SHA1 value in form of hex string
;                          - Sets @error to 0
;                  Failure - Returns empty string and sets @error:
;                  |1 - CreateFile function or call to it failed.
;                  |2 - CreateFileMapping function or call to it failed.
;                  |3 - MapViewOfFile function or call to it failed.
;                  |4 - CryptAcquireContext function or call to it failed.
;                  |5 - CryptCreateHash function or call to it failed.
;                  |6 - CryptHashData function or call to it failed.
;                  |7 - CryptGetHashParam function or call to it failed.
; Author ........: trancexx
;
;==========================================================================================
Func _SHA1ForFile($sFile)

    Local $a_hCall = DllCall("kernel32.dll", "hwnd", "CreateFileW", _
            "wstr", $sFile, _
            "dword", 0x80000000, _ ; GENERIC_READ
            "dword", 3, _ ; FILE_SHARE_READ|FILE_SHARE_WRITE
            "ptr", 0, _
            "dword", 3, _ ; OPEN_EXISTING
            "dword", 0, _ ; SECURITY_ANONYMOUS
            "ptr", 0)

    If @error Or $a_hCall[0] = -1 Then
        Return SetError(1, 0, "")
    EndIf

    Local $hFile = $a_hCall[0]

    $a_hCall = DllCall("kernel32.dll", "ptr", "CreateFileMappingW", _
            "hwnd", $hFile, _
            "dword", 0, _ ; default security descriptor
            "dword", 2, _ ; PAGE_READONLY
            "dword", 0, _
            "dword", 0, _
            "ptr", 0)

    If @error Or Not $a_hCall[0] Then
        DllCall("kernel32.dll", "int", "CloseHandle", "hwnd", $hFile)
        Return SetError(2, 0, "")
    EndIf

    DllCall("kernel32.dll", "int", "CloseHandle", "hwnd", $hFile)

    Local $hFileMappingObject = $a_hCall[0]

    $a_hCall = DllCall("kernel32.dll", "ptr", "MapViewOfFile", _
            "hwnd", $hFileMappingObject, _
            "dword", 4, _ ; FILE_MAP_READ
            "dword", 0, _
            "dword", 0, _
            "dword", 0)

    If @error Or Not $a_hCall[0] Then
        DllCall("kernel32.dll", "int", "CloseHandle", "hwnd", $hFileMappingObject)
        Return SetError(3, 0, "")
    EndIf

    Local $pFile = $a_hCall[0]
    Local $iBufferSize = FileGetSize($sFile)

    Local $a_iCall = DllCall("advapi32.dll", "int", "CryptAcquireContext", _
            "ptr*", 0, _
            "ptr", 0, _
            "ptr", 0, _
            "dword", 1, _ ; PROV_RSA_FULL
            "dword", 0xF0000000) ; CRYPT_VERIFYCONTEXT

    If @error Or Not $a_iCall[0] Then
        DllCall("kernel32.dll", "int", "UnmapViewOfFile", "ptr", $pFile)
        DllCall("kernel32.dll", "int", "CloseHandle", "hwnd", $hFileMappingObject)
        Return SetError(4, 0, "")
    EndIf

    Local $hContext = $a_iCall[1]

    $a_iCall = DllCall("advapi32.dll", "int", "CryptCreateHash", _
            "ptr", $hContext, _
            "dword", 0x00008004, _ ; CALG_SHA1
            "ptr", 0, _ ; nonkeyed
            "dword", 0, _
            "ptr*", 0)

    If @error Or Not $a_iCall[0] Then
        DllCall("kernel32.dll", "int", "UnmapViewOfFile", "ptr", $pFile)
        DllCall("kernel32.dll", "int", "CloseHandle", "hwnd", $hFileMappingObject)
        DllCall("advapi32.dll", "int", "CryptReleaseContext", "ptr", $hContext, "dword", 0)
        Return SetError(5, 0, "")
    EndIf

    Local $hHashSHA1 = $a_iCall[5]

    $a_iCall = DllCall("advapi32.dll", "int", "CryptHashData", _
            "ptr", $hHashSHA1, _
            "ptr", $pFile, _
            "dword", $iBufferSize, _
            "dword", 0)

    If @error Or Not $a_iCall[0] Then
        DllCall("kernel32.dll", "int", "UnmapViewOfFile", "ptr", $pFile)
        DllCall("kernel32.dll", "int", "CloseHandle", "hwnd", $hFileMappingObject)
        DllCall("advapi32.dll", "int", "CryptDestroyHash", "ptr", $hHashSHA1)
        DllCall("advapi32.dll", "int", "CryptReleaseContext", "ptr", $hContext, "dword", 0)
        Return SetError(6, 0, "")
    EndIf

    Local $tOutSHA1 = DllStructCreate("byte[20]")

    $a_iCall = DllCall("advapi32.dll", "int", "CryptGetHashParam", _
            "ptr", $hHashSHA1, _
            "dword", 2, _ ; HP_HASHVAL
            "ptr", DllStructGetPtr($tOutSHA1), _
            "dword*", 20, _
            "dword", 0)

    If @error Or Not $a_iCall[0] Then
        DllCall("kernel32.dll", "int", "UnmapViewOfFile", "ptr", $pFile)
        DllCall("kernel32.dll", "int", "CloseHandle", "hwnd", $hFileMappingObject)
        DllCall("advapi32.dll", "int", "CryptDestroyHash", "ptr", $hHashSHA1)
        DllCall("advapi32.dll", "int", "CryptReleaseContext", "ptr", $hContext, "dword", 0)
        Return SetError(7, 0, "")
    EndIf

    DllCall("kernel32.dll", "int", "UnmapViewOfFile", "ptr", $pFile)
    DllCall("kernel32.dll", "int", "CloseHandle", "hwnd", $hFileMappingObject)

    DllCall("advapi32.dll", "int", "CryptDestroyHash", "ptr", $hHashSHA1)

    Local $sSHA1 = Hex(DllStructGetData($tOutSHA1, 1))

    DllCall("advapi32.dll", "int", "CryptReleaseContext", "ptr", $hContext, "dword", 0)

    Return SetError(0, 0, $sSHA1)

EndFunc   ;==>_SHA1ForFile

Results are in form of hex strings, but that is easily changed to fit your needs.

First time is the hardest - you will see what I mean if you run it (file mapping related).

Try it on something big.

Edited by trancexx
4 people like this

♡♡♡

.

eMyvnE

Share this post


Link to post
Share on other sites



#2 ·  Posted (edited)

Nice :party:, will for sure give it a try. Do you know, which OS provide the "advapi32.dll" by standard? Until now, I always utilize the functions by Ward here. Did you already run a benchmark against those functions?

Keep up the great work :)

Edited by KaFu

Share this post


Link to post
Share on other sites

Nice :party:, will for sure give it a try. Do you know, which OS provide the "advapi32.dll" by standard? Until now, I always utilize the functions by Ward here. Did you already run a benchmark against those functions?

Keep up the great work :)

You should have no problem running this on XP+.

It should work for Win2000. If not just change "advapi32.dll" in _MD4ForFile() and _MD5ForFile() to "cryptdll.dll".

Btw, Vista is having this functions available in ntdll.dll also.

This is a reaction to Ward's function. I had DEP issues with his functions on several systems (SMF inherited that problem).


♡♡♡

.

eMyvnE

Share this post


Link to post
Share on other sites

#4 ·  Posted (edited)

This is a reaction to Ward's function. I had DEP issues with his functions on several systems (SMF inherited that problem).

I haven't got problems with DEP so I can't test, but shouldn't protecting the memory with the EXECUTE_READWRITE flag with the VirtualProtect function solve that issue?

Edit: Sometimes I forget words.

Edited by monoceres

Broken link? PM me and I'll send you the file!

Share this post


Link to post
Share on other sites

#5 ·  Posted (edited)

This is a reaction to Ward's function. I had DEP issues with his functions on several systems (SMF inherited that problem).

Jes, that just happens because I already resold Vista :)... had to do a research on DEP first to understand your statement :idea:. Maybe I should re-buy a Vista Home Version OEM on eBay for testing only :party:?

Will for sure do some testing! Hmmm, but looking at the code, the main advantage I utilize in SMF with Ward's function is, that I don't calculate the full md5 for large files but only a small portion to identify duplicate files, this gives a major speed improvement (without any accuracy loss I realized, 24k is enough to uniquely identify nearly any file (forget MS files, for those I also use a full md5, just to much framecode)). Can't see a way to just partially calculate md5 values for a file, only the whole file seems possible.

Edited by KaFu

Share this post


Link to post
Share on other sites

I haven't got problems with DEP so I can't text, but shouldn't protecting the memory with the EXECUTE_READWRITE flag with the VirtualProtect function solve that issue?

Edit: Sometimes I forget words.

Yes, yes, of course. I just did few tests (I used mothod of allocating with $PAGE_EXECUTE_READWRITE and moving).

You should drop him a pm or post there.


♡♡♡

.

eMyvnE

Share this post


Link to post
Share on other sites

Jes, that just happens because I already resold Vista :)... had to do a research on DEP first to understand your statement :idea:. Maybe I should re-buy a Vista Home Version OEM on eBay for testing only :party:?

Will for sure do some testing! Hmmm, but looking at the code, the main advantage I utilize in SMF with Ward's function is, that I don't calculate the full md5 for large files but only a small portion to identify duplicate files, this gives a major speed improvement (without any accuracy loss I realized, 24k is enough to uniquely identify nearly any file (forget MS files, for those I also use a full md5, just to much framecode)). Can't see a way to just partially calculate md5 values for a file, only the whole file seems possible.

Last parameter of MapViewOfFile function is determining the number of bytes of a file that you wanna map.

♡♡♡

.

eMyvnE

Share this post


Link to post
Share on other sites

Thanks Zedna.

Thing and beauty of these functions is that you are not creating buffers directly. You are not doing memory job.

If writing some, for example, code that reads some file in low(er) level languages you never allocate memory in size of that file. That is done in chunks. AutoIt (internally) is reading some file in chunks of 4096 (*2) bytes and loops until end is reached. If you would do that in script it would take really long time to read e.g. 100MB.

That's why leaving allocation job on a lower level functions is a good thing to do when scripting. Speed is gained. In fact speed of that hashing functions, written in AutoIt, is measurable with speed of hashing functions written in any language.


♡♡♡

.

eMyvnE

Share this post


Link to post
Share on other sites

trancexx

Thank you very much. For small files speed of hash-calculation is very big.

Share this post


Link to post
Share on other sites

Very Nice ..

Thank You !!!

^_*

Share this post


Link to post
Share on other sites

Very nicely done.

I just recently started lurking the forums again and what you guys come up with never ceases to amaze me.

:)


Don't bother, It's inside your monitor!------GUISetOnEvent should behave more like HotKeySet()

Share this post


Link to post
Share on other sites

#13 ·  Posted (edited)

trancexx, started tinkering with your functions :) , first speed comparison to Ward's UDF looks promising. Now I got some questions:

- You mentioned DEP problems with Ward's UDF. I'm not sure, but is this related to the fact that the ASM code is loaded to memory and utilized to calculate the md5?

- Is there a specific reason why you use CreateFileMappingW()? Or does the _WinAPI_CreateFile() function trigger DEP (don't think so)? I found it quiet hard to read only parts of a file with CreateFileMappingW() (as I do in SMF to speed up hashing, in fact create fake hashes, for large files).

Best Regards

#include<WinAPI.au3>

Global $nBytes
$Checksum_Filename = @ScriptDir & "\Testfile.exe"

Local $tMD5_CTX = DllStructCreate("dword i[2];" & _
        "dword buf[4];" & _
        "ubyte in[64];" & _
        "ubyte digest[16]")

DllCall("advapi32.dll", "none", "MD5Init", "ptr", DllStructGetPtr($tMD5_CTX))

$hFile = _WinAPI_CreateFile($Checksum_Filename, 2, 2)
$iFileGetSize_Save = FileGetSize($Checksum_Filename)
$iBuffersize = 1024 * 1024 * 10
Local $tBuffer = DllStructCreate("byte[" & $iBuffersize & "]")

For $i = 1 To Ceiling($iFileGetSize_Save / $iBuffersize)
    _WinAPI_ReadFile($hFile, DllStructGetPtr($tBuffer), $iBuffersize, $nBytes)
    DllCall("advapi32.dll", "none", "MD5Update", _
            "ptr", DllStructGetPtr($tMD5_CTX), _
            "ptr", DllStructGetPtr($tBuffer), _
            "dword", $nBytes)
Next

If IsHWnd($hFile) Then _WinAPI_CloseHandle($hFile)

DllCall("advapi32.dll", "none", "MD5Final", "ptr", DllStructGetPtr($tMD5_CTX))

ConsoleWrite('MD5 = 0x' & Hex(DllStructGetData($tMD5_CTX, "digest")) & @crlf)
Edited by KaFu

Share this post


Link to post
Share on other sites

#14 ·  Posted (edited)

trancexx, started tinkering with your functions :) , first speed comparison to Ward's UDF looks promising. Now I got some questions:

- You mentioned DEP problems with Ward's UDF. I'm not sure, but is this related to the fact that the ASM code is loaded to memory and utilized to calculate the md5?

- Is there a specific reason why you use CreateFileMappingW()? Or does the _WinAPI_CreateFile() function trigger DEP (don't think so)?

...

DEP issues with Ward's functions are caused by the access protection of the memory he's allocating. Proper way is shown in any recent script written by me on assembly theme. Just read them.

I already explained why CreateFileMapping is used. It has nothing to do with DEP (how could it???).

I found it quiet hard to read only parts of a file with CreateFileMappingW() (as I do in SMF to speed up hashing, in fact create fake hashes, for large files).

Hardness here is confusing.

There are only two small things that you need to change to do that with posted functions. For MD5:

Func _MD5ForFirstFileChunk($sFile, $iChunkSize = 0)

    If Not $iChunkSize Then
        $iChunkSize = FileGetSize($sFile)
    EndIf

    Local $a_hCall = DllCall("kernel32.dll", "hwnd", "CreateFileW", _
            "wstr", $sFile, _
            "dword", 0x80000000, _ ; GENERIC_READ
            "dword", 1, _ ; FILE_SHARE_READ
            "ptr", 0, _
            "dword", 3, _ ; OPEN_EXISTING
            "dword", 0, _ ; SECURITY_ANONYMOUS
            "ptr", 0)

    If @error Or $a_hCall[0] = -1 Then
        Return SetError(1, 0, "")
    EndIf

    Local $hFile = $a_hCall[0]

    $a_hCall = DllCall("kernel32.dll", "ptr", "CreateFileMappingW", _
            "hwnd", $hFile, _
            "dword", 0, _ ; default security descriptor
            "dword", 2, _ ; PAGE_READONLY
            "dword", 0, _
            "dword", 0, _
            "ptr", 0)

    If @error Or Not $a_hCall[0] Then
        DllCall("kernel32.dll", "int", "CloseHandle", "hwnd", $hFile)
        Return SetError(2, 0, "")
    EndIf

    DllCall("kernel32.dll", "int", "CloseHandle", "hwnd", $hFile)

    Local $hFileMappingObject = $a_hCall[0]

    $a_hCall = DllCall("kernel32.dll", "ptr", "MapViewOfFile", _
            "hwnd", $hFileMappingObject, _
            "dword", 4, _ ; FILE_MAP_READ
            "dword", 0, _
            "dword", 0, _
            "dword", $iChunkSize)

    If @error Or Not $a_hCall[0] Then
        DllCall("kernel32.dll", "int", "CloseHandle", "hwnd", $hFileMappingObject)
        Return SetError(3, 0, "")
    EndIf

    Local $pFile = $a_hCall[0]
    Local $iBufferSize = $iChunkSize

    Local $tMD5_CTX = DllStructCreate("dword i[2];" & _
            "dword buf[4];" & _
            "ubyte in[64];" & _
            "ubyte digest[16]")

    DllCall("advapi32.dll", "none", "MD5Init", "ptr", DllStructGetPtr($tMD5_CTX))

    If @error Then
        DllCall("kernel32.dll", "int", "UnmapViewOfFile", "ptr", $pFile)
        DllCall("kernel32.dll", "int", "CloseHandle", "hwnd", $hFileMappingObject)
        Return SetError(4, 0, "")
    EndIf

    DllCall("advapi32.dll", "none", "MD5Update", _
            "ptr", DllStructGetPtr($tMD5_CTX), _
            "ptr", $pFile, _
            "dword", $iBufferSize)

    If @error Then
        DllCall("kernel32.dll", "int", "UnmapViewOfFile", "ptr", $pFile)
        DllCall("kernel32.dll", "int", "CloseHandle", "hwnd", $hFileMappingObject)
        Return SetError(5, 0, "")
    EndIf

    DllCall("advapi32.dll", "none", "MD5Final", "ptr", DllStructGetPtr($tMD5_CTX))

    If @error Then
        DllCall("kernel32.dll", "int", "UnmapViewOfFile", "ptr", $pFile)
        DllCall("kernel32.dll", "int", "CloseHandle", "hwnd", $hFileMappingObject)
        Return SetError(6, 0, "")
    EndIf

    DllCall("kernel32.dll", "int", "UnmapViewOfFile", "ptr", $pFile)
    DllCall("kernel32.dll", "int", "CloseHandle", "hwnd", $hFileMappingObject)

    Local $sMD5 = Hex(DllStructGetData($tMD5_CTX, "digest"))

    Return SetError(0, 0, $sMD5)

EndFunc

MD5 for first 1024 bytes of some file would be:

$sMD5 = _MD5ForFirstFileChunk($sFile, 1024)
Edited by trancexx

♡♡♡

.

eMyvnE

Share this post


Link to post
Share on other sites

#15 ·  Posted (edited)

Hardness here is confusing.

There are only two small things that you need to change to do that with posted functions. For MD5:

Sorry I didn't explained clearly... Reading the first chunk seems quiet easy, but for SMF's duplicate search functionality I

- read&hash the first (customizable) 8K of a file

- read&hash 8K from exactly the middle of the file

- read&hash the last 8K from a file

and create a 'fake' hash for the file adding up these three partial hashes, dramatically increasing performance for large files (with very, very few false positives for smaller ones like MS-Office files with their huge overhead, but those are explicitly excluded from this method and are bound for a full hash).

What's now difficult to me is how to read a defined chunk from the middle of a file with MapViewOfFile(), as MSDN states "That is, the offset must be a multiple of the allocation granularity" (to be obtained with GetSystemInfo()), sounds like you're bound the certain offset steps... utilizing _WinAPI_SetFilePointer() it's easy to define offset to the byte.

But anyway, it seems to work the way I want, it does not collide with DEP.

Additionally, is there an advantage to use CreateFileMappingW() over _WinAPI_ReadFile() I don't see? MSDN also states "A mapped view of a file is not guaranteed to be coherent with a file that is being accessed by the ReadFile or WriteFile function.".

Edited by KaFu

Share this post


Link to post
Share on other sites

Sorry I didn't explained clearly... Reading the first chunk seems quiet easy, but for SMF's duplicate search functionality I

- read&hash the first (customizable) 8K of a file

- read&hash 8K from exactly the middle of the file

- read&hash the last 8K from a file

and create a 'fake' hash for the file adding up these three partial hashes, dramatically increasing performance for large files (with very, very few false positives for smaller ones like MS-Office files with their huge overhead, but those are explicitly excluded from this method and are bound for a full hash).

What's now difficult to me is how to read a defined chunk from the middle of a file with MapViewOfFile(), as MSDN states "That is, the offset must be a multiple of the allocation granularity" (to be obtained with GetSystemInfo()), sounds like you're bound the certain offset steps... utilizing _WinAPI_SetFilePointer() it's easy to define offset to the byte.

But anyway, it seems to work the way I want, it does not collide with DEP.

Additionally, is there an advantage to use CreateFileMappingW() over _WinAPI_ReadFile() I don't see? MSDN also states "A mapped view of a file is not guaranteed to be coherent with a file that is being accessed by the ReadFile or WriteFile function.".

You should just use SetFilePointer to walk through the file.

Yes there are advantages. I explained some of them.

I must quote this additionally

A mapped file and a file that is accessed by using the input and output (I/O) functions (ReadFile and WriteFile) are not necessarily coherent.

How do you know which one is righter?

♡♡♡

.

eMyvnE

Share this post


Link to post
Share on other sites

I think the underlying question he's trying to ask is, will reading a file for hashing purposes with the different methods possibly produce two different hashes? If so, it's not so much a case of being 'right', but being 'consistent' with pre-existing hashing methods. If using a mapped file is faster but produces hashes that don't match most other programs out there, then there's not much point to it.

Share this post


Link to post
Share on other sites

I think the underlying question he's trying to ask is, will reading a file for hashing purposes with the different methods possibly produce two different hashes? If so, it's not so much a case of being 'right', but being 'consistent' with pre-existing hashing methods. If using a mapped file is faster but produces hashes that don't match most other programs out there, then there's not much point to it.

It's faster in my tests, less demanding too and produces the same hashes.

If it was/is anything different then I wouldn't be posting it.

That answers some questions?


♡♡♡

.

eMyvnE

Share this post


Link to post
Share on other sites

Sure does. Any idea what MS means by 'incoherent' then?

Share this post


Link to post
Share on other sites

#20 ·  Posted (edited)

Sure does. Any idea what MS means by 'incoherent' then?

Maybe CreateFileMappingW() somehow considers I/O cached / not yet flushed to disk? Could also be the reason why trancexx says it's faster than FileRead()... Edited by KaFu

Share this post


Link to post
Share on other sites

Create an account or sign in to comment

You need to be a member in order to leave a comment

Create an account

Sign up for a new account in our community. It's easy!


Register a new account

Sign in

Already have an account? Sign in here.


Sign In Now