From c370354528533ee69465f20f7860aad61e5e2ad1 Mon Sep 17 00:00:00 2001
From: vxunderground <57078196+vxunderground@users.noreply.github.com>
Date: Thu, 28 Oct 2021 16:57:31 -0500
Subject: [PATCH] Add files via upload

---
 GpuMemoryAbuse.cpp | 372 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 372 insertions(+)
 create mode 100644 GpuMemoryAbuse.cpp

diff --git a/GpuMemoryAbuse.cpp b/GpuMemoryAbuse.cpp
new file mode 100644
index 0000000..797a522
--- /dev/null
+++ b/GpuMemoryAbuse.cpp
@@ -0,0 +1,372 @@
+#include <Windows.h>
+#pragma warning(disable:6011)
+
+
+#define CUDACALL __stdcall
+typedef struct PCUDE_CONTEXT* CUDA_CONTEXT;
+
+typedef INT(CUDACALL* CUDAMEMORYALLOCATE)(ULONG_PTR, SIZE_T);
+typedef INT(CUDACALL* CUDAINIT)(INT);
+typedef INT(CUDACALL* CUDAGETDEVICECOUNT)(PINT);
+typedef INT(CUDACALL* CUDAGETDEVICE)(PINT, INT);
+typedef INT(CUDACALL* CUDACREATECONTEXT)(CUDA_CONTEXT*, DWORD, INT);
+typedef INT(CUDACALL* CUDADESTROYCONTEXT)(CUDA_CONTEXT*);
+typedef INT(CUDACALL* CUDAMEMORYCOPYTODEVICE)(ULONG_PTR, PVOID, SIZE_T);
+typedef INT(CUDACALL* CUDAMEMORYCOPYTOHOST)(PVOID, ULONG_PTR, SIZE_T);
+typedef INT(CUDACALL* CUDAMEMORYFREE)(ULONG_PTR);
+
+#define CUDA_SUCCESS 0
+
+typedef struct _NVIDIA_API_TABLE {
+	HMODULE NvidiaLibary;
+	CUDAMEMORYALLOCATE CudaMemoryAllocate;
+	CUDAINIT CudaInit;
+	CUDAGETDEVICECOUNT CudaGetDeviceCount;
+	CUDAGETDEVICE CudaGetDevice;
+	CUDACREATECONTEXT CudaCreateContext;
+	CUDAMEMORYCOPYTODEVICE CudaMemoryCopyToDevice;
+	CUDAMEMORYCOPYTOHOST CudaMemoryCopyToHost;
+	CUDAMEMORYFREE CudaMemoryFree;
+	CUDADESTROYCONTEXT CudaDestroyContext;
+} NVIDIA_API_TABLE, *PNVIDIA_API_TABLE;
+
+SIZE_T StringLengthW(LPCWSTR String)
+{
+	LPCWSTR String2;
+
+	for (String2 = String; *String2; ++String2);
+
+	return (String2 - String);
+}
+
+PWCHAR StringLocateCharW(PWCHAR String, INT Character)
+{
+	do
+	{
+		if (*String == Character)
+			return (PWCHAR)String;
+
+	} while (*String++);
+
+	return NULL;
+}
+
+INT StringCompareStringRegionW(PWCHAR String1, PWCHAR String2, SIZE_T Count)
+{
+	UCHAR Block1, Block2;
+	while (Count-- > 0)
+	{
+		Block1 = (UCHAR)*String1++;
+		Block2 = (UCHAR)*String2++;
+
+		if (Block1 != Block2)
+			return Block1 - Block2;
+
+		if (Block1 == '\0')
+			return 0;
+	}
+
+	return 0;
+}
+
+PWCHAR StringFindSubstringW(PWCHAR String1, PWCHAR String2)
+{
+	PWCHAR pPointer = String1;
+	DWORD Length = (DWORD)StringLengthW(String2);
+
+	for (; (pPointer = StringLocateCharW(pPointer, *String2)) != 0; pPointer++)
+	{
+		if (StringCompareStringRegionW(pPointer, String2, Length) == 0)
+			return (PWCHAR)pPointer;
+	}
+
+	return NULL;
+}
+
+PWCHAR StringCopyW(PWCHAR String1, PWCHAR String2)
+{
+	PWCHAR p = String1;
+
+	while ((*p++ = *String2++) != 0);
+
+	return String1;
+}
+
+PWCHAR StringConcatW(PWCHAR String, PWCHAR String2)
+{
+	StringCopyW(&String[StringLengthW(String)], String2);
+
+	return String;
+}
+
+BOOL IsNvidiaGraphicsCardPresent(VOID)
+{
+    DISPLAY_DEVICEW DisplayDevice; RtlZeroMemory(&DisplayDevice, sizeof(DISPLAY_DEVICEW));
+    DisplayDevice.cb = sizeof(DISPLAY_DEVICEW);
+
+	DWORD dwDeviceId = ERROR_SUCCESS;
+
+	while (EnumDisplayDevicesW(NULL, dwDeviceId, &DisplayDevice, 0))
+	{
+		if (StringFindSubstringW(DisplayDevice.DeviceString, (PWCHAR)L"NVIDIA") != NULL)
+			return TRUE;
+	}
+
+	return FALSE;
+}
+
+BOOL InitNvidiaCudaAPITable(PNVIDIA_API_TABLE Api)
+{
+	Api->NvidiaLibary = LoadLibraryW(L"nvcuda.dll");
+	if (Api->NvidiaLibary == NULL)
+		return FALSE;
+
+	Api->CudaCreateContext = (CUDACREATECONTEXT)GetProcAddress(Api->NvidiaLibary, "cuCtxCreate_v2");
+	Api->CudaGetDevice = (CUDAGETDEVICE)GetProcAddress(Api->NvidiaLibary, "cuDeviceGet");
+	Api->CudaGetDeviceCount = (CUDAGETDEVICECOUNT)GetProcAddress(Api->NvidiaLibary, "cuDeviceGetCount");
+	Api->CudaInit = (CUDAINIT)GetProcAddress(Api->NvidiaLibary, "cuInit");
+	Api->CudaMemoryAllocate = (CUDAMEMORYALLOCATE)GetProcAddress(Api->NvidiaLibary, "cuMemAlloc_v2");
+	Api->CudaMemoryCopyToDevice = (CUDAMEMORYCOPYTODEVICE)GetProcAddress(Api->NvidiaLibary, "cuMemcpyHtoD_v2");
+	Api->CudaMemoryCopyToHost = (CUDAMEMORYCOPYTOHOST)GetProcAddress(Api->NvidiaLibary, "cuMemcpyDtoH_v2");
+	Api->CudaMemoryFree = (CUDAMEMORYFREE)GetProcAddress(Api->NvidiaLibary, "cuMemFree_v2");
+	Api->CudaDestroyContext = (CUDADESTROYCONTEXT)GetProcAddress(Api->NvidiaLibary, "cuCtxDestroy");
+
+	if (!Api->CudaCreateContext || !Api->CudaGetDevice || !Api->CudaGetDeviceCount || !Api->CudaInit || !Api->CudaDestroyContext)
+		return FALSE;
+
+	if (!Api->CudaMemoryAllocate || !Api->CudaMemoryCopyToDevice || !Api->CudaMemoryCopyToHost || !Api->CudaMemoryFree)
+		return FALSE;
+
+	return TRUE;
+}
+
+ULONG_PTR RtlAllocateGpuMemory(PNVIDIA_API_TABLE Api, DWORD ByteSize)
+{
+	ULONG_PTR GpuBufferPointer = NULL;
+
+	if (ByteSize == 0)
+		return NULL;
+
+	if (Api->CudaMemoryAllocate((ULONG_PTR )&GpuBufferPointer, ByteSize) != CUDA_SUCCESS)
+		return NULL;
+
+	return GpuBufferPointer;
+
+}
+
+INT main(VOID)
+{
+	/********************************************************************
+	*					Variables
+	********************************************************************/
+
+	//Application variables
+	DWORD dwError = ERROR_SUCCESS;
+	BOOL bFlag = FALSE;
+
+	//NVIDIA related variables
+	NVIDIA_API_TABLE Api = { 0 };
+	INT DeviceCount = 0;
+	INT Device = 0;
+	CUDA_CONTEXT Context = NULL;;
+	ULONG_PTR GpuMemory = NULL;
+
+	//Subroutine related variables, unimportant to proof-of-concept
+	WCHAR BinaryPath[MAX_PATH * sizeof(WCHAR)] = { 0 };
+	HANDLE hHandle = INVALID_HANDLE_VALUE;
+	PBYTE DataBuffer = NULL;
+	HANDLE ProcessHeap = GetProcessHeap();
+	DWORD dwRead = 0;
+
+	/********************************************************************
+	*					Start
+	*********************************************************************
+	* 
+	*	IsNvidiaGraphicsCardPresent() invokes the EnumDisplayDevicesW and
+	*	performs a string comparison on DISPLAY_DEVICEW member
+	*	DeviceString to look for the presence of NVIDIA. If NVIDIA string
+	*	is present IsNvidiaGraphicsCardPresent() returns true else false.
+	*	If IsNvidiaGraphicsCardPresent() returns false the application
+	*	terminates. This proof-of-concept is NVIDIA specific.
+	* 
+	*	If IsNvidiaGraphicsCardPresent() returns true, we make a
+	*	subsequent function call to InitNvidiaCudaAPITable(). 
+	*	InitNvidiaCudaAPITable() populates a NVIDIA_API_TABLE structure
+	*	whose members are function pointers to NVIDIA-related APIs omit
+	*	the NvidiaLibrary HMODULE member whose value is returned from a
+	*	call to LoadLibrary.
+	*
+	*	Anyway, enjoy the proof-of-concept.
+	*	With love, smelly__vx
+	*	vx-underground.org
+	*	
+	********************************************************************/
+
+	if (!IsNvidiaGraphicsCardPresent())
+		goto EXIT_ROUTINE;
+
+	if (!InitNvidiaCudaAPITable(&Api))
+		goto EXIT_ROUTINE;
+
+	/********************************************************************
+	*					Unimportant section
+	*********************************************************************
+	*
+	*	This section performs trivial tasks unrelated to the core
+	*	concept being illustrated in this proof of concept. The code
+	*	below assembles a path to the desktop by using
+	*	GetEnvironmentVariableW and appending a hardcoded path. The hard
+	*	coded path is a binary present on the desktop (must be created
+	*	by the user).
+	* 
+	*	Once creating the desktop path in memory, we invoke CreateFile
+	*	and get a handle to file on the desktop. Subsequently this code
+	*	gets the file size, allocates it in memory, and reads the content
+	*	into member (type PBYTE).
+	* 
+	********************************************************************/
+
+	if (GetEnvironmentVariableW(L"USERPROFILE", BinaryPath, (MAX_PATH * sizeof(WCHAR))) == 0)
+		goto EXIT_ROUTINE;
+
+	if (StringConcatW(BinaryPath, (PWCHAR)L"\\Desktop\\Demo.txt") == NULL)
+		goto EXIT_ROUTINE;
+
+	hHandle = CreateFileW(BinaryPath, GENERIC_READ | GENERIC_WRITE, 0, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
+	if (hHandle == INVALID_HANDLE_VALUE)
+		goto EXIT_ROUTINE;
+
+	dwError = GetFileSize(hHandle, NULL);
+	if (dwError == INVALID_FILE_SIZE)
+		goto EXIT_ROUTINE;
+
+	DataBuffer = (PBYTE)HeapAlloc(ProcessHeap, HEAP_ZERO_MEMORY, dwError);
+	if (DataBuffer == NULL)
+		goto EXIT_ROUTINE;
+
+	if (!ReadFile(hHandle, DataBuffer, dwError, &dwRead, NULL))
+		goto EXIT_ROUTINE;
+
+	dwError = ERROR_SUCCESS;
+
+	/********************************************************************
+	*					Unimportant code segment end
+	********************************************************************/
+
+	/********************************************************************
+	*					NVIDIA CUDA Code segment
+	*********************************************************************
+	* 
+	*	The code below copies content from the data file read off the
+	*	desktop onto the GPU, frees the heap, then copies the data back
+	* 
+	*	CUDA_SUCCESS is defined as 0 (zero). Successful CUDA invocations
+	*	return 0 (zero).
+	* 
+	*	CudaInit(0) is required to initialize NVIDIA API interface. The
+	*	subsequent function calls, CudaGetDeviceCount, and 
+	*	CudaGetDevice are generic NVIDIA functions to enumerating and
+	*	identifying GPU devices - in the unlikely event more than one
+	*	GPU is present.
+	* 
+	*	The handle returned by CudaGetDevice is used to create a CUDA
+	*	context - the state of the application, how two consecutive 
+	*	API calls are related to each other while utilizing the CUDA
+	*	runtime API.
+	* 
+	*	This code forwards to the NVIDIA driver.
+	* 
+	*	After a context is successfully created - we allocate memory
+	*	on the GPU via our wrapper function, RtlAllocateGpuMemory, which
+	*	returns a ULONG_PTR, a pointer to the allocated GPU memory.
+	*	The calls that proceed this first copy data to the GPU, and
+	*	finally, retrive the data back. This is illustrated by this
+	*	proof-of-concept freeing the heap which contained data which was
+	*	previously stored in the DataBuffer variable. 
+	* 
+	********************************************************************/
+
+	if (Api.CudaInit(0) != CUDA_SUCCESS)
+		goto EXIT_ROUTINE;
+
+	if (Api.CudaGetDeviceCount(&DeviceCount) != CUDA_SUCCESS || DeviceCount == 0)
+		goto EXIT_ROUTINE;
+
+	if (Api.CudaGetDevice(&Device, DeviceCount - 1) != CUDA_SUCCESS)
+		goto EXIT_ROUTINE;
+
+	if (Api.CudaCreateContext(&Context, 0, Device) != CUDA_SUCCESS)
+		goto EXIT_ROUTINE;
+
+	GpuMemory = RtlAllocateGpuMemory(&Api, dwRead);
+	if (GpuMemory == NULL)
+		goto EXIT_ROUTINE;
+
+	if (Api.CudaMemoryCopyToDevice(GpuMemory, DataBuffer, dwRead) != CUDA_SUCCESS)
+		goto EXIT_ROUTINE;
+
+	/********************************************************************
+	*					Unimportant section
+	*********************************************************************
+	*
+	*	Frees the heap, to illustrate the DataBuffer variable is indeed
+	*	empty, containing no data. We then reallocate the buffer and
+	*	copy the contents from the GPU back onto the hosts heap.
+	* 
+	********************************************************************/
+	if (DataBuffer)
+		HeapFree(ProcessHeap, HEAP_ZERO_MEMORY, DataBuffer);
+
+	Sleep(1000);
+
+	DataBuffer = (PBYTE)HeapAlloc(ProcessHeap, HEAP_ZERO_MEMORY, dwRead);
+	if (DataBuffer == NULL)
+		goto EXIT_ROUTINE;
+
+	/********************************************************************
+	*					Unimportant code segment end
+	********************************************************************/
+
+	if (Api.CudaMemoryCopyToHost(DataBuffer, GpuMemory, dwRead) != CUDA_SUCCESS)
+		goto EXIT_ROUTINE;
+
+	/********************************************************************
+	*					EXIT_ROUTINE
+	*********************************************************************
+	* 
+	*	bFlag is a generic variable. Its value by default, FALSE,
+	*	indicates whether or not the application has entered the 
+	*	exit routine normally. If the code has finished execution, then
+	*	bFlag will be set to TRUE indicating there has been no issues.
+	*	Otherwise, bFlag being FALSE indicates failure and GetLastError
+	*	is invoked.
+	* 
+	*	Regardless of failure, each variable is checked to determine if
+	*	it needs to be freed or unloaded.
+	* 
+	********************************************************************/
+
+	bFlag = TRUE;
+
+EXIT_ROUTINE:
+
+	if (!bFlag)
+		dwError = GetLastError();
+
+	if (DataBuffer)
+		HeapFree(ProcessHeap, HEAP_ZERO_MEMORY, DataBuffer);
+
+	if (GpuMemory != NULL)
+		Api.CudaMemoryFree(GpuMemory);
+
+	if (Context != NULL)
+		Api.CudaDestroyContext(&Context);
+
+	if (Api.NvidiaLibary)
+		FreeLibrary(Api.NvidiaLibary);
+
+	if (hHandle)
+		CloseHandle(hHandle);
+
+	return dwError;
+}
\ No newline at end of file