#include-once

; #INDEX# =======================================================================================================================
; Title .........: CUDA UDF
; AutoIt Version : 3.3.14.2
; Language ......: English
; Description ...: AutoIt wrapper for NVIDIA CUDA Driver API.
; Author(s) .....: scintilla4evr
; ===============================================================================================================================

Global $_g_hCUDADll = 0
Global Enum $CUDA_SUCCESS = 0, _
			$CUDA_ERROR_INVALID_VALUE = 1, _
			$CUDA_ERROR_OUT_OF_MEMORY = 2, _
			$CUDA_ERROR_NOT_INITIALIZED = 3, _
			$CUDA_ERROR_NO_DEVICE = 100, _
			$CUDA_ERROR_INVALID_DEVICE = 101, _
			$CUDA_ERROR_INVALID_IMAGE = 200, _
			$CUDA_ERROR_INVALID_CONTEXT = 201, _
			$CUDA_ERROR_CONTEXT_ALREADY_CURRENT = 202, _
			$CUDA_ERROR_MAP_FAILED = 205, _
			$CUDA_ERROR_UNMAP_FAILED = 206, _
			$CUDA_ERROR_ARRAY_IS_MAPPED = 207, _
			$CUDA_ERROR_ALREADY_MAPPED = 208, _
			$CUDA_ERROR_NO_BINARY_FOR_GPU = 209, _
			$CUDA_ERROR_ALREADY_ACQUIRED = 210, _
			$CUDA_ERROR_NOT_MAPPED = 211, _
			$CUDA_ERROR_INVALID_SOURCE = 300, _
			$CUDA_ERROR_FILE_NOT_FOUND = 301, _
			$CUDA_ERROR_INVALID_HANDLE = 400, _
			$CUDA_ERROR_NOT_FOUND = 500, _
			$CUDA_ERROR_NOT_READY = 600, _
			$CUDA_ERROR_LAUNCH_FAILED = 700, _
			$CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES = 701, _
			$CUDA_ERROR_LAUNCH_TIMEOUT = 702, _
			$CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING = 703, _
			$CUDA_ERROR_UNKNOWN = 999

Global Const $tagCUDADEVICEPROP = "int maxThreadsPerBlock; int maxThreadsDim[3]; int maxGridSize[3]; int sharedMemPerBlock; int totalConstantMemory; int SIMDWidth; int memPitch; int regsPerBlock; int clockRate; int textureAlign;"

#Region Initialization

Func _CUDA_Startup($sDll = "nvcuda.dll")
	$_g_hCUDADll = DllOpen($sDll)
	If @error Then Return SetError(@error, 0, 0)
	Return 1
EndFunc

Func _CUDA_Shutdown()
	DllClose($_g_hCUDADll)
EndFunc

Func _CUDA_Init($iFlags = 0)
	Local $aCall
	$aCall = DllCall($_g_hCUDADll, "dword", "cuInit", "uint", $iFlags)
	If @error Then Return SetError(@error, 0, -1)
	Return $aCall[0]
EndFunc

#EndRegion

#Region Device functions

Func _CUDA_DeviceGetCount()
	Local $aCall
	$aCall = DllCall($_g_hCUDADll, "dword", "cuDeviceGetCount", "int*", 0)
	If @error Then Return SetError(@error, 0, -1)
	Return SetExtended($aCall[0], $aCall[1])
EndFunc

Func _CUDA_DeviceGet($iOrdinal)
	Local $aCall
	$aCall = DllCall($_g_hCUDADll, "dword", "cuDeviceGet", "int*", 0, "int", $iOrdinal)
	If @error Then Return SetError(@error, 0, -1)
	Return SetExtended($aCall[0], $aCall[1])
EndFunc

Func _CUDA_DeviceGetName($hDevice)
	Local $aCall, $tBuffer
	$tBuffer = DllStructCreate("char name[128]")
	$aCall = DllCall($_g_hCUDADll, "dword", "cuDeviceGetName", "ptr", DllStructGetPtr($tBuffer), "int", 128, "int", $hDevice)
	If @error Then Return SetError(@error, 0, -1)
	Return SetExtended($aCall[0], $tBuffer.name)
EndFunc

Func _CUDA_DeviceComputeCapability($hDevice)
	Local $aCall
	$aCall = DllCall($_g_hCUDADll, "dword", "cuDeviceComputeCapability", "int*", 0, "int*", 0, "int", $hDevice)
	If @error Then Return SetError(@error, 0, -1)
	Return SetExtended($aCall[0], $aCall[1]&"."&$aCall[2])
EndFunc

Func _CUDA_DeviceTotalMem($hDevice)
	Local $aCall
	$aCall = DllCall($_g_hCUDADll, "dword", "cuDeviceTotalMem", "uint*", 0, "int", $hDevice)
	If @error Then Return SetError(@error, 0, -1)
	Return SetExtended($aCall[0], $aCall[1])
EndFunc

Func _CUDA_DeviceGetProperties($hDevice)
	Local $aCall
	Local $tProps = DllStructCreate($tagCUDADEVICEPROP)
	$aCall = DllCall($_g_hCUDADll, "dword", "cuDeviceGetProperties", "struct*", $tProps, "int", $hDevice)
	If @error Then Return SetError(@error, 0, -1)
	Return SetExtended($aCall[0], $tProps)
EndFunc

#EndRegion

#Region Context

Func _CUDA_CtxCreate($hDevice, $iFlags = 0)
	Local $aCall
	$aCall = DllCall($_g_hCUDADll, "dword", "cuCtxCreate", "ptr*", 0, "int", $iFlags, "int", $hDevice)
	If @error Then Return SetError(@error, 0, -1)
	Return SetExtended($aCall[0], $aCall[1])
EndFunc

Func _CUDA_CtxDetach($hContext)
	Local $aCall
	$aCall = DllCall($_g_hCUDADll, "dword", "cuCtxDetach", "ptr", $hContext)
	If @error Then Return SetError(@error, 0, -1)
	Return $aCall[0]
EndFunc

Func _CUDA_CtxSynchronize()
	Local $aCall
	$aCall = DllCall($_g_hCUDADll, "dword", "cuCtxSynchronize")
	If @error Then Return SetError(@error, 0, -1)
	Return $aCall[0]
EndFunc

#EndRegion

#Region Modules

Func _CUDA_ModuleLoad($sFile)
	Local $aCall
	$aCall = DllCall($_g_hCUDADll, "dword", "cuModuleLoad", "ptr*", 0, "str", $sFile)
	If @error Then Return SetError(@error, 0, -1)
	Return SetExtended($aCall[0], $aCall[1])
EndFunc

Func _CUDA_ModuleGetFunction($hModule, $sFuncName)
	Local $aCall
	$aCall = DllCall($_g_hCUDADll, "dword", "cuModuleGetFunction", "ptr*", 0, "ptr", $hModule, "str", $sFuncName)
	If @error Then Return SetError(@error, 0, -1)
	Return SetExtended($aCall[0], $aCall[1])
EndFunc

#EndRegion

#Region Execution

Func _CUDA_Launch($hFunction, $iGridX = 1, $iGridY = 1)
	Local $aCall
	$aCall = DllCall($_g_hCUDADll, "dword", "cuLaunchGrid", "ptr", $hFunction, "int", $iGridX, "int", $iGridY)
	If @error Then Return SetError(@error, 0, -1)
	Return $aCall[0]
EndFunc

Func _CUDA_FuncSetBlockShape($hFunction, $iX, $iY, $iZ)
	Local $aCall
	$aCall = DllCall($_g_hCUDADll, "dword", "cuFuncSetBlockShape", "ptr", $hFunction, "int", $iX, "int", $iY, "int", $iZ)
	If @error Then Return SetError(@error, 0, -1)
	Return $aCall[0]
EndFunc

Func _CUDA_FuncSetSharedSize($hFunction, $nBytes)
	Local $aCall
	$aCall = DllCall($_g_hCUDADll, "dword", "cuFuncSetSharedSize", "ptr", $hFunction, "uint", $nBytes)
	If @error Then Return SetError(@error, 0, -1)
	Return $aCall[0]
EndFunc

Func _CUDA_ParamSetSize($hFunction, $nBytes)
	Local $aCall
	$aCall = DllCall($_g_hCUDADll, "dword", "cuParamSetSize", "ptr", $hFunction, "uint", $nBytes)
	If @error Then Return SetError(@error, 0, -1)
	Return $aCall[0]
EndFunc

Func _CUDA_ParamSetv($hFunction, $iOffset, $pData, $nBytes)
	Local $aCall
	$aCall = DllCall($_g_hCUDADll, "dword", "cuParamSetv", "ptr", $hFunction, "int", $iOffset, "ptr*", $pData, "uint", $nBytes)
	If @error Then Return SetError(@error, 0, -1)
	Return $aCall[0]
EndFunc

;cuParamSeti    (CUfunction hfunc, int offset, unsigned int value);
Func _CUDA_ParamSeti($hFunction, $iOffset, $iData)
	Local $aCall
	$aCall = DllCall($_g_hCUDADll, "dword", "cuParamSeti", "ptr", $hFunction, "int", $iOffset, "uint", $iData)
	If @error Then Return SetError(@error, 0, -1)
	Return $aCall[0]
EndFunc

;cuParamSetf    (CUfunction hfunc, int offset, float value);
Func _CUDA_ParamSetf($hFunction, $iOffset, $fData)
	Local $aCall
	$aCall = DllCall($_g_hCUDADll, "dword", "cuParamSetf", "ptr", $hFunction, "int", $iOffset, "uint", $fData)
	If @error Then Return SetError(@error, 0, -1)
	Return $aCall[0]
EndFunc

#EndRegion

#Region Memory

Func _CUDA_MemAlloc($nBytes)
	Local $aCall
	$aCall = DllCall($_g_hCUDADll, "dword", "cuMemAlloc", "ptr*", 0, "uint", $nBytes)
	If @error Then Return SetError(@error, 0, -1)
	Return SetExtended($aCall[0], $aCall[1])
EndFunc

Func _CUDA_MemFree($pDevMemPtr)
	Local $aCall
	$aCall = DllCall($_g_hCUDADll, "dword", "cuMemFree", "ptr", $pDevMemPtr)
	If @error Then Return SetError(@error, 0, -1)
	Return $aCall[0]
EndFunc

Func _CUDA_MemcpyHtoD($pHostPtr, $pDevMemPtr, $nBytes)
	Local $aCall
	$aCall = DllCall($_g_hCUDADll, "dword", "cuMemcpyHtoD", "ptr", $pDevMemPtr, "ptr", $pHostPtr, "uint", $nBytes)
	If @error Then Return SetError(@error, 0, -1)
	Return $aCall[0]
EndFunc

Func _CUDA_MemcpyDtoH($pDevMemPtr, $pHostPtr, $nBytes)
	Local $aCall
	$aCall = DllCall($_g_hCUDADll, "dword", "cuMemcpyDtoH", "ptr", $pHostPtr, "ptr", $pDevMemPtr, "uint", $nBytes)
	If @error Then Return SetError(@error, 0, -1)
	Return $aCall[0]
EndFunc

#EndRegion
