mirror of
https://github.com/vxunderground/VXUG-Papers.git
synced 2026-06-13 06:19:24 +00:00
Add files via upload
This commit is contained in:
@@ -0,0 +1,372 @@
|
||||
#include <Windows.h>
|
||||
#pragma warning(disable:6011)
|
||||
|
||||
|
||||
#define CUDACALL __stdcall
|
||||
typedef struct PCUDE_CONTEXT* CUDA_CONTEXT;
|
||||
|
||||
typedef INT(CUDACALL* CUDAMEMORYALLOCATE)(ULONG_PTR, SIZE_T);
|
||||
typedef INT(CUDACALL* CUDAINIT)(INT);
|
||||
typedef INT(CUDACALL* CUDAGETDEVICECOUNT)(PINT);
|
||||
typedef INT(CUDACALL* CUDAGETDEVICE)(PINT, INT);
|
||||
typedef INT(CUDACALL* CUDACREATECONTEXT)(CUDA_CONTEXT*, DWORD, INT);
|
||||
typedef INT(CUDACALL* CUDADESTROYCONTEXT)(CUDA_CONTEXT*);
|
||||
typedef INT(CUDACALL* CUDAMEMORYCOPYTODEVICE)(ULONG_PTR, PVOID, SIZE_T);
|
||||
typedef INT(CUDACALL* CUDAMEMORYCOPYTOHOST)(PVOID, ULONG_PTR, SIZE_T);
|
||||
typedef INT(CUDACALL* CUDAMEMORYFREE)(ULONG_PTR);
|
||||
|
||||
#define CUDA_SUCCESS 0
|
||||
|
||||
typedef struct _NVIDIA_API_TABLE {
|
||||
HMODULE NvidiaLibary;
|
||||
CUDAMEMORYALLOCATE CudaMemoryAllocate;
|
||||
CUDAINIT CudaInit;
|
||||
CUDAGETDEVICECOUNT CudaGetDeviceCount;
|
||||
CUDAGETDEVICE CudaGetDevice;
|
||||
CUDACREATECONTEXT CudaCreateContext;
|
||||
CUDAMEMORYCOPYTODEVICE CudaMemoryCopyToDevice;
|
||||
CUDAMEMORYCOPYTOHOST CudaMemoryCopyToHost;
|
||||
CUDAMEMORYFREE CudaMemoryFree;
|
||||
CUDADESTROYCONTEXT CudaDestroyContext;
|
||||
} NVIDIA_API_TABLE, *PNVIDIA_API_TABLE;
|
||||
|
||||
SIZE_T StringLengthW(LPCWSTR String)
|
||||
{
|
||||
LPCWSTR String2;
|
||||
|
||||
for (String2 = String; *String2; ++String2);
|
||||
|
||||
return (String2 - String);
|
||||
}
|
||||
|
||||
PWCHAR StringLocateCharW(PWCHAR String, INT Character)
|
||||
{
|
||||
do
|
||||
{
|
||||
if (*String == Character)
|
||||
return (PWCHAR)String;
|
||||
|
||||
} while (*String++);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
INT StringCompareStringRegionW(PWCHAR String1, PWCHAR String2, SIZE_T Count)
|
||||
{
|
||||
UCHAR Block1, Block2;
|
||||
while (Count-- > 0)
|
||||
{
|
||||
Block1 = (UCHAR)*String1++;
|
||||
Block2 = (UCHAR)*String2++;
|
||||
|
||||
if (Block1 != Block2)
|
||||
return Block1 - Block2;
|
||||
|
||||
if (Block1 == '\0')
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
PWCHAR StringFindSubstringW(PWCHAR String1, PWCHAR String2)
|
||||
{
|
||||
PWCHAR pPointer = String1;
|
||||
DWORD Length = (DWORD)StringLengthW(String2);
|
||||
|
||||
for (; (pPointer = StringLocateCharW(pPointer, *String2)) != 0; pPointer++)
|
||||
{
|
||||
if (StringCompareStringRegionW(pPointer, String2, Length) == 0)
|
||||
return (PWCHAR)pPointer;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
PWCHAR StringCopyW(PWCHAR String1, PWCHAR String2)
|
||||
{
|
||||
PWCHAR p = String1;
|
||||
|
||||
while ((*p++ = *String2++) != 0);
|
||||
|
||||
return String1;
|
||||
}
|
||||
|
||||
PWCHAR StringConcatW(PWCHAR String, PWCHAR String2)
|
||||
{
|
||||
StringCopyW(&String[StringLengthW(String)], String2);
|
||||
|
||||
return String;
|
||||
}
|
||||
|
||||
BOOL IsNvidiaGraphicsCardPresent(VOID)
|
||||
{
|
||||
DISPLAY_DEVICEW DisplayDevice; RtlZeroMemory(&DisplayDevice, sizeof(DISPLAY_DEVICEW));
|
||||
DisplayDevice.cb = sizeof(DISPLAY_DEVICEW);
|
||||
|
||||
DWORD dwDeviceId = ERROR_SUCCESS;
|
||||
|
||||
while (EnumDisplayDevicesW(NULL, dwDeviceId, &DisplayDevice, 0))
|
||||
{
|
||||
if (StringFindSubstringW(DisplayDevice.DeviceString, (PWCHAR)L"NVIDIA") != NULL)
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
BOOL InitNvidiaCudaAPITable(PNVIDIA_API_TABLE Api)
|
||||
{
|
||||
Api->NvidiaLibary = LoadLibraryW(L"nvcuda.dll");
|
||||
if (Api->NvidiaLibary == NULL)
|
||||
return FALSE;
|
||||
|
||||
Api->CudaCreateContext = (CUDACREATECONTEXT)GetProcAddress(Api->NvidiaLibary, "cuCtxCreate_v2");
|
||||
Api->CudaGetDevice = (CUDAGETDEVICE)GetProcAddress(Api->NvidiaLibary, "cuDeviceGet");
|
||||
Api->CudaGetDeviceCount = (CUDAGETDEVICECOUNT)GetProcAddress(Api->NvidiaLibary, "cuDeviceGetCount");
|
||||
Api->CudaInit = (CUDAINIT)GetProcAddress(Api->NvidiaLibary, "cuInit");
|
||||
Api->CudaMemoryAllocate = (CUDAMEMORYALLOCATE)GetProcAddress(Api->NvidiaLibary, "cuMemAlloc_v2");
|
||||
Api->CudaMemoryCopyToDevice = (CUDAMEMORYCOPYTODEVICE)GetProcAddress(Api->NvidiaLibary, "cuMemcpyHtoD_v2");
|
||||
Api->CudaMemoryCopyToHost = (CUDAMEMORYCOPYTOHOST)GetProcAddress(Api->NvidiaLibary, "cuMemcpyDtoH_v2");
|
||||
Api->CudaMemoryFree = (CUDAMEMORYFREE)GetProcAddress(Api->NvidiaLibary, "cuMemFree_v2");
|
||||
Api->CudaDestroyContext = (CUDADESTROYCONTEXT)GetProcAddress(Api->NvidiaLibary, "cuCtxDestroy");
|
||||
|
||||
if (!Api->CudaCreateContext || !Api->CudaGetDevice || !Api->CudaGetDeviceCount || !Api->CudaInit || !Api->CudaDestroyContext)
|
||||
return FALSE;
|
||||
|
||||
if (!Api->CudaMemoryAllocate || !Api->CudaMemoryCopyToDevice || !Api->CudaMemoryCopyToHost || !Api->CudaMemoryFree)
|
||||
return FALSE;
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
ULONG_PTR RtlAllocateGpuMemory(PNVIDIA_API_TABLE Api, DWORD ByteSize)
|
||||
{
|
||||
ULONG_PTR GpuBufferPointer = NULL;
|
||||
|
||||
if (ByteSize == 0)
|
||||
return NULL;
|
||||
|
||||
if (Api->CudaMemoryAllocate((ULONG_PTR )&GpuBufferPointer, ByteSize) != CUDA_SUCCESS)
|
||||
return NULL;
|
||||
|
||||
return GpuBufferPointer;
|
||||
|
||||
}
|
||||
|
||||
INT main(VOID)
|
||||
{
|
||||
/********************************************************************
|
||||
* Variables
|
||||
********************************************************************/
|
||||
|
||||
//Application variables
|
||||
DWORD dwError = ERROR_SUCCESS;
|
||||
BOOL bFlag = FALSE;
|
||||
|
||||
//NVIDIA related variables
|
||||
NVIDIA_API_TABLE Api = { 0 };
|
||||
INT DeviceCount = 0;
|
||||
INT Device = 0;
|
||||
CUDA_CONTEXT Context = NULL;;
|
||||
ULONG_PTR GpuMemory = NULL;
|
||||
|
||||
//Subroutine related variables, unimportant to proof-of-concept
|
||||
WCHAR BinaryPath[MAX_PATH * sizeof(WCHAR)] = { 0 };
|
||||
HANDLE hHandle = INVALID_HANDLE_VALUE;
|
||||
PBYTE DataBuffer = NULL;
|
||||
HANDLE ProcessHeap = GetProcessHeap();
|
||||
DWORD dwRead = 0;
|
||||
|
||||
/********************************************************************
|
||||
* Start
|
||||
*********************************************************************
|
||||
*
|
||||
* IsNvidiaGraphicsCardPresent() invokes the EnumDisplayDevicesW and
|
||||
* performs a string comparison on DISPLAY_DEVICEW member
|
||||
* DeviceString to look for the presence of NVIDIA. If NVIDIA string
|
||||
* is present IsNvidiaGraphicsCardPresent() returns true else false.
|
||||
* If IsNvidiaGraphicsCardPresent() returns false the application
|
||||
* terminates. This proof-of-concept is NVIDIA specific.
|
||||
*
|
||||
* If IsNvidiaGraphicsCardPresent() returns true, we make a
|
||||
* subsequent function call to InitNvidiaCudaAPITable().
|
||||
* InitNvidiaCudaAPITable() populates a NVIDIA_API_TABLE structure
|
||||
* whose members are function pointers to NVIDIA-related APIs omit
|
||||
* the NvidiaLibrary HMODULE member whose value is returned from a
|
||||
* call to LoadLibrary.
|
||||
*
|
||||
* Anyway, enjoy the proof-of-concept.
|
||||
* With love, smelly__vx
|
||||
* vx-underground.org
|
||||
*
|
||||
********************************************************************/
|
||||
|
||||
if (!IsNvidiaGraphicsCardPresent())
|
||||
goto EXIT_ROUTINE;
|
||||
|
||||
if (!InitNvidiaCudaAPITable(&Api))
|
||||
goto EXIT_ROUTINE;
|
||||
|
||||
/********************************************************************
|
||||
* Unimportant section
|
||||
*********************************************************************
|
||||
*
|
||||
* This section performs trivial tasks unrelated to the core
|
||||
* concept being illustrated in this proof of concept. The code
|
||||
* below assembles a path to the desktop by using
|
||||
* GetEnvironmentVariableW and appending a hardcoded path. The hard
|
||||
* coded path is a binary present on the desktop (must be created
|
||||
* by the user).
|
||||
*
|
||||
* Once creating the desktop path in memory, we invoke CreateFile
|
||||
* and get a handle to file on the desktop. Subsequently this code
|
||||
* gets the file size, allocates it in memory, and reads the content
|
||||
* into member (type PBYTE).
|
||||
*
|
||||
********************************************************************/
|
||||
|
||||
if (GetEnvironmentVariableW(L"USERPROFILE", BinaryPath, (MAX_PATH * sizeof(WCHAR))) == 0)
|
||||
goto EXIT_ROUTINE;
|
||||
|
||||
if (StringConcatW(BinaryPath, (PWCHAR)L"\\Desktop\\Demo.txt") == NULL)
|
||||
goto EXIT_ROUTINE;
|
||||
|
||||
hHandle = CreateFileW(BinaryPath, GENERIC_READ | GENERIC_WRITE, 0, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
|
||||
if (hHandle == INVALID_HANDLE_VALUE)
|
||||
goto EXIT_ROUTINE;
|
||||
|
||||
dwError = GetFileSize(hHandle, NULL);
|
||||
if (dwError == INVALID_FILE_SIZE)
|
||||
goto EXIT_ROUTINE;
|
||||
|
||||
DataBuffer = (PBYTE)HeapAlloc(ProcessHeap, HEAP_ZERO_MEMORY, dwError);
|
||||
if (DataBuffer == NULL)
|
||||
goto EXIT_ROUTINE;
|
||||
|
||||
if (!ReadFile(hHandle, DataBuffer, dwError, &dwRead, NULL))
|
||||
goto EXIT_ROUTINE;
|
||||
|
||||
dwError = ERROR_SUCCESS;
|
||||
|
||||
/********************************************************************
|
||||
* Unimportant code segment end
|
||||
********************************************************************/
|
||||
|
||||
/********************************************************************
|
||||
* NVIDIA CUDA Code segment
|
||||
*********************************************************************
|
||||
*
|
||||
* The code below copies content from the data file read off the
|
||||
* desktop onto the GPU, frees the heap, then copies the data back
|
||||
*
|
||||
* CUDA_SUCCESS is defined as 0 (zero). Successful CUDA invocations
|
||||
* return 0 (zero).
|
||||
*
|
||||
* CudaInit(0) is required to initialize NVIDIA API interface. The
|
||||
* subsequent function calls, CudaGetDeviceCount, and
|
||||
* CudaGetDevice are generic NVIDIA functions to enumerating and
|
||||
* identifying GPU devices - in the unlikely event more than one
|
||||
* GPU is present.
|
||||
*
|
||||
* The handle returned by CudaGetDevice is used to create a CUDA
|
||||
* context - the state of the application, how two consecutive
|
||||
* API calls are related to each other while utilizing the CUDA
|
||||
* runtime API.
|
||||
*
|
||||
* This code forwards to the NVIDIA driver.
|
||||
*
|
||||
* After a context is successfully created - we allocate memory
|
||||
* on the GPU via our wrapper function, RtlAllocateGpuMemory, which
|
||||
* returns a ULONG_PTR, a pointer to the allocated GPU memory.
|
||||
* The calls that proceed this first copy data to the GPU, and
|
||||
* finally, retrive the data back. This is illustrated by this
|
||||
* proof-of-concept freeing the heap which contained data which was
|
||||
* previously stored in the DataBuffer variable.
|
||||
*
|
||||
********************************************************************/
|
||||
|
||||
if (Api.CudaInit(0) != CUDA_SUCCESS)
|
||||
goto EXIT_ROUTINE;
|
||||
|
||||
if (Api.CudaGetDeviceCount(&DeviceCount) != CUDA_SUCCESS || DeviceCount == 0)
|
||||
goto EXIT_ROUTINE;
|
||||
|
||||
if (Api.CudaGetDevice(&Device, DeviceCount - 1) != CUDA_SUCCESS)
|
||||
goto EXIT_ROUTINE;
|
||||
|
||||
if (Api.CudaCreateContext(&Context, 0, Device) != CUDA_SUCCESS)
|
||||
goto EXIT_ROUTINE;
|
||||
|
||||
GpuMemory = RtlAllocateGpuMemory(&Api, dwRead);
|
||||
if (GpuMemory == NULL)
|
||||
goto EXIT_ROUTINE;
|
||||
|
||||
if (Api.CudaMemoryCopyToDevice(GpuMemory, DataBuffer, dwRead) != CUDA_SUCCESS)
|
||||
goto EXIT_ROUTINE;
|
||||
|
||||
/********************************************************************
|
||||
* Unimportant section
|
||||
*********************************************************************
|
||||
*
|
||||
* Frees the heap, to illustrate the DataBuffer variable is indeed
|
||||
* empty, containing no data. We then reallocate the buffer and
|
||||
* copy the contents from the GPU back onto the hosts heap.
|
||||
*
|
||||
********************************************************************/
|
||||
if (DataBuffer)
|
||||
HeapFree(ProcessHeap, HEAP_ZERO_MEMORY, DataBuffer);
|
||||
|
||||
Sleep(1000);
|
||||
|
||||
DataBuffer = (PBYTE)HeapAlloc(ProcessHeap, HEAP_ZERO_MEMORY, dwRead);
|
||||
if (DataBuffer == NULL)
|
||||
goto EXIT_ROUTINE;
|
||||
|
||||
/********************************************************************
|
||||
* Unimportant code segment end
|
||||
********************************************************************/
|
||||
|
||||
if (Api.CudaMemoryCopyToHost(DataBuffer, GpuMemory, dwRead) != CUDA_SUCCESS)
|
||||
goto EXIT_ROUTINE;
|
||||
|
||||
/********************************************************************
|
||||
* EXIT_ROUTINE
|
||||
*********************************************************************
|
||||
*
|
||||
* bFlag is a generic variable. Its value by default, FALSE,
|
||||
* indicates whether or not the application has entered the
|
||||
* exit routine normally. If the code has finished execution, then
|
||||
* bFlag will be set to TRUE indicating there has been no issues.
|
||||
* Otherwise, bFlag being FALSE indicates failure and GetLastError
|
||||
* is invoked.
|
||||
*
|
||||
* Regardless of failure, each variable is checked to determine if
|
||||
* it needs to be freed or unloaded.
|
||||
*
|
||||
********************************************************************/
|
||||
|
||||
bFlag = TRUE;
|
||||
|
||||
EXIT_ROUTINE:
|
||||
|
||||
if (!bFlag)
|
||||
dwError = GetLastError();
|
||||
|
||||
if (DataBuffer)
|
||||
HeapFree(ProcessHeap, HEAP_ZERO_MEMORY, DataBuffer);
|
||||
|
||||
if (GpuMemory != NULL)
|
||||
Api.CudaMemoryFree(GpuMemory);
|
||||
|
||||
if (Context != NULL)
|
||||
Api.CudaDestroyContext(&Context);
|
||||
|
||||
if (Api.NvidiaLibary)
|
||||
FreeLibrary(Api.NvidiaLibary);
|
||||
|
||||
if (hHandle)
|
||||
CloseHandle(hHandle);
|
||||
|
||||
return dwError;
|
||||
}
|
||||
Reference in New Issue
Block a user