mirror of
https://github.com/DualCoder/vgpu_unlock.git
synced 2024-12-22 18:45:26 +00:00
Add modifications to kernel module.
Additional modifications had to be included into the build of nvidia.ko in order for things to work properly.
This commit is contained in:
parent
5cfacdf7b5
commit
6881c417ab
167
README.md
167
README.md
|
@ -5,7 +5,7 @@ Unlock vGPU functionality for consumer grade GPUs.
|
|||
|
||||
## Important!
|
||||
|
||||
This tool is a work in progress. In the current state it does not work.
|
||||
This tool is very untested, use at your own risk.
|
||||
|
||||
|
||||
## Description
|
||||
|
@ -13,34 +13,171 @@ This tool is a work in progress. In the current state it does not work.
|
|||
This tool enables the use of Geforce and Quadro GPUs with the NVIDIA vGPU
|
||||
software. NVIDIA vGPU normally only supports a few Tesla GPUs but since some
|
||||
Geforce and Quadro GPUs share the same physical chip as the Tesla this is only
|
||||
a software limitation for those GPUs. This tool works by intercepting the ioctl
|
||||
syscalls between the userspace nvidia-vgpud and nvidia-vgpu-mgr services and
|
||||
the kernel driver. Doing this allows the script to alter the identification and
|
||||
capabilities that the user space services relies on to determine if the GPU is
|
||||
vGPU capable.
|
||||
a software limitation for those GPUs. This tool aims to remove this limitation.
|
||||
|
||||
|
||||
## Dependencies:
|
||||
|
||||
* This tool requires Python3, the latest version is recommended.
|
||||
* The python package "frida" is required. `pip3 install frida`.
|
||||
* The tool requires the NVIDIA GRID vGPU driver to be properly installed for it
|
||||
to do its job. This special driver is only accessible to NVIDIA enterprise
|
||||
customers. The script has only been tested with 11.3 for "KVM on Linux" and
|
||||
may or may not work on other versions.
|
||||
* The tool requires the NVIDIA GRID vGPU driver.
|
||||
* "dkms" is highly recommended as it simplifies the process of rebuilding the
|
||||
driver alot.
|
||||
|
||||
|
||||
## Installation:
|
||||
|
||||
The NVIDIA vGPU drivers will create an nvidia-vgpud and nvidia-vgpu-mgr
|
||||
systemd service. All we have to do is replace the path
|
||||
/usr/bin/<executable> in /lib/systemd/system/nvidia-vgpud.service and
|
||||
/lib/systemd/system/nvidia-vgpu-mgr.service with the path to the vgpu\_unlock
|
||||
script and pass the original executable path as the first argument.
|
||||
In the following instructions `<path_to_vgpu_unlock>` need to be replaced with
|
||||
the path to this repository on the target system and `<version>` need to be
|
||||
replaced with the version of the NVIDIA GRID vGPU driver.
|
||||
|
||||
Install the NVIDIA GRID vGPU driver, make sure to install it as a dkms module.
|
||||
```
|
||||
./nvidia-installer
|
||||
```
|
||||
|
||||
Modify the line begining with `ExecStart=` in `/lib/systemd/system/nvidia-vgpu.service`
|
||||
and `/lib/systemd/system/nvidia-vgpu-mgr.service` to use `vgpu_unlock` as
|
||||
executable and pass the original executable as the first argument. Ex:
|
||||
```
|
||||
ExecStart=<path_to_vgpu_unlock>/vgpu_unlock /usr/bin/nvidia-vgpud
|
||||
```
|
||||
|
||||
Reload the systemd daemons:
|
||||
```
|
||||
systemctl daemon-reload
|
||||
```
|
||||
|
||||
Modify the file `/usr/src/nvidia-<version>/nvidia/os-interface.c` and add the
|
||||
following line after the lines begining with `#include` at the start of the
|
||||
file.
|
||||
```
|
||||
#include "<path_to_vgpu_unlock>/vgpu_unlock_hooks.c"
|
||||
```
|
||||
|
||||
Modify the file `/usr/src/nvidia-<version>/nvidia/nvidia.Kbuild` and add the
|
||||
following line.
|
||||
```
|
||||
ldflags-y += -T <path_to_vgpu_unlock>/kern.ld
|
||||
```
|
||||
|
||||
Remove the nvidia kernel module using dkms:
|
||||
```
|
||||
dkms remove -m nvidia -v <version> --all
|
||||
```
|
||||
|
||||
Rebuild and reinstall the nvidia kernel module using dkms:
|
||||
```
|
||||
dkms install -m nvidia -v <version>
|
||||
```
|
||||
|
||||
Reboot.
|
||||
|
||||
---
|
||||
**NOTE**
|
||||
|
||||
This script will only work if there exists a vGPU compatible Tesla GPU that
|
||||
uses the same physical chip as the actual GPU being used.
|
||||
|
||||
|
||||
---
|
||||
|
||||
## How it works
|
||||
|
||||
### vGPU supported?
|
||||
|
||||
In order to determine if a certain GPU supports the vGPU functionality the
|
||||
driver looks at the PCI device ID. This identifier together with the PCI vendor
|
||||
ID is unique for each type of PCI device. In order to enable vGPU support we
|
||||
need to tell the driver that the PCI device ID of the installed GPU is one of
|
||||
the device IDs used by a vGPU capable GPU.
|
||||
|
||||
### Userspace script: vgpu\_unlock
|
||||
|
||||
The userspace services nvidia-vgpud and nvidia-vgpu-mgr uses the ioctl syscall
|
||||
to communicate with the kernel module. Specifically they read the PCI device ID
|
||||
and determines if the installed GPU is vGPU capable.
|
||||
|
||||
The python script vgpu\_unlock intercepts all ioctl syscalls between the
|
||||
executable specified as the first argument and the kernel. The script then
|
||||
modifies the kernel responses to indicate a PCI device ID with vGPU support
|
||||
and a vGPU capable GPU.
|
||||
|
||||
### Kernel module hooks: vgpu\_unlock\_hooks.c
|
||||
|
||||
In order to exchange data with the GPU the kernel module maps the physical
|
||||
address space of the PCI bus into its own virtual address space. This is done
|
||||
using the ioremap\* kernel functions. The kernel module then reads and writes
|
||||
data into that mapped address space. This is done using the memcpy kernel
|
||||
function.
|
||||
|
||||
By including the vgpu\_unlock\_hooks.c file into the os-interface.c file we can
|
||||
use C preprocessor macros to replace and intercept calls to the iormeap and
|
||||
memcpy functions. Doing this allows us to maintain a view of what is mapped
|
||||
where and what data that is being accessed.
|
||||
|
||||
### Kernel module linker script: kern.ld
|
||||
|
||||
This is a modified version of the default linker script provided by gcc. The
|
||||
script is modified to place the .rodata section of nv-kernel.o into .data
|
||||
section instead of .rodata, making it writable. The script also provide the
|
||||
symbols `vgpu_unlock_nv_kern_rodata_beg` and `vgpu_unlock_nv_kern_rodata_end`
|
||||
to let us know where that section begins and ends.
|
||||
|
||||
### How it all comes together
|
||||
|
||||
After boot the nvidia-vgpud service queries the kernel for all installed GPUs
|
||||
and checks for vGPU capability. This call is intercepted by the vgpu\_unlock
|
||||
python script and the GPU is made vGPU capable. If a vGPU capable GPU is found
|
||||
then nvidia-vgpu creates an MDEV device and the /sys/class/mdev\_bus directory
|
||||
is created by the system.
|
||||
|
||||
vGPU devices can now be created by echoing UUIDs into the `create` files in the
|
||||
mdev bus representation. This will create additional structures representing
|
||||
the new vGPU device on the MDEV bus. These devices can then be assigned to VMs,
|
||||
and when the VM starts it will open the MDEV device. This causes nvidia-vgpu-mgr
|
||||
to start communicating with the kernel using ioctl. Again these calls are
|
||||
intercepted by the vgpu\_unlock python script and when nvidia-vgpu-mgr asks if
|
||||
the GPU is vGPU capable the answer is changed to yes. After that check it
|
||||
attempts to initialize the vGPU device instance.
|
||||
|
||||
Initialization of the vGPU device is handled by the kernel module and it
|
||||
performs its own check for vGPU capability, this one is a bit more complicated.
|
||||
|
||||
The kernel module maps the physical PCI address range 0xf0000000-0xf1000000 into
|
||||
its virtual address space, it then performs some magical operations which we
|
||||
don't really know what they do. What we do know is that after these operations
|
||||
it accesses a 128 bit value at physical address 0xf0029624, which we call the
|
||||
magic value. The kernel module also accessses a 128 bit value at physical
|
||||
address 0xf0029634, which we call the key value.
|
||||
|
||||
The kernel module then has a couple of lookup tables for the magic value, one
|
||||
for vGPU capable GPUs and one for the others. So the kernel module looks for the
|
||||
magic value in both of these lookup tables, and if it is found that table entry
|
||||
also contains a set of AES-128 encrypted data blocks and a HMAC-SHA256
|
||||
signature.
|
||||
|
||||
The signature is then validated by using the key value mentioned earlier to
|
||||
calculate the HMAC-SHA256 signature over the encrypted data blocks. If the
|
||||
signature is correct, then the blocks are decrypted using AES-128 and the same
|
||||
key.
|
||||
|
||||
Inside of the decrypted data is once again the PCI device ID.
|
||||
|
||||
So in order for the kernel module to accept the GPU as vGPU capable the magic
|
||||
value will have to be in the table of vGPU capable magic values, the key has
|
||||
to generate a valid HMAC-SHA256 signature and the AES-128 decrypted data blocks
|
||||
has to contain a vGPU capable PCI device ID. If any of these checks fail, then
|
||||
the error code 0x56 "Call not supported" is returned.
|
||||
|
||||
In order to make these checks pass the hooks in vgpu\_unlock\_hooks.c will look
|
||||
for a ioremap call that maps the physical address range that contain the magic
|
||||
and key values, recalculate the addresses of those values into the virtual
|
||||
address space of the kernel module, monitor memcpy operations reading at those
|
||||
addresses, and if such an operation occurs, keep a copy of the value until both
|
||||
are known, locate the lookup tables in the .rodata section of nv-kernel.o, find
|
||||
the signature and data bocks, validate the signature, decrypt the blocks, edit
|
||||
the PCI device ID in the decrypted data, reencrypt the blocks, regenerate the
|
||||
signature and insert the magic, blocks and signature into the table of vGPU
|
||||
capable magic values. And that's what they do.
|
||||
|
||||
|
|
162
kern.ld
Normal file
162
kern.ld
Normal file
|
@ -0,0 +1,162 @@
|
|||
/* Script for ld -r: link without relocation */
|
||||
/* Copyright (C) 2014-2018 Free Software Foundation, Inc.
|
||||
Copying and distribution of this script, with or without modification,
|
||||
are permitted in any medium without royalty provided the copyright
|
||||
notice and this notice are preserved. */
|
||||
OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64",
|
||||
"elf64-x86-64")
|
||||
OUTPUT_ARCH(i386:x86-64)
|
||||
/* For some reason, the Solaris linker makes bad executables
|
||||
if gld -r is used and the intermediate file has sections starting
|
||||
at non-zero addresses. Could be a Solaris ld bug, could be a GNU ld
|
||||
bug. But for now assigning the zero vmas works. */
|
||||
SECTIONS
|
||||
{
|
||||
/* Read-only sections, merged into text segment: */
|
||||
.interp 0 : { *(.interp) }
|
||||
.note.gnu.build-id : { *(.note.gnu.build-id) }
|
||||
.hash 0 : { *(.hash) }
|
||||
.gnu.hash 0 : { *(.gnu.hash) }
|
||||
.dynsym 0 : { *(.dynsym) }
|
||||
.dynstr 0 : { *(.dynstr) }
|
||||
.gnu.version 0 : { *(.gnu.version) }
|
||||
.gnu.version_d 0: { *(.gnu.version_d) }
|
||||
.gnu.version_r 0: { *(.gnu.version_r) }
|
||||
.rela.init 0 : { *(.rela.init) }
|
||||
.rela.text 0 : { *(.rela.text) }
|
||||
.rela.fini 0 : { *(.rela.fini) }
|
||||
.rela.rodata 0 : { *(.rela.rodata) }
|
||||
.rela.data.rel.ro 0 : { *(.rela.data.rel.ro) }
|
||||
.rela.data 0 : { *(.rela.data) }
|
||||
.rela.tdata 0 : { *(.rela.tdata) }
|
||||
.rela.tbss 0 : { *(.rela.tbss) }
|
||||
.rela.ctors 0 : { *(.rela.ctors) }
|
||||
.rela.dtors 0 : { *(.rela.dtors) }
|
||||
.rela.got 0 : { *(.rela.got) }
|
||||
.rela.bss 0 : { *(.rela.bss) }
|
||||
.rela.ldata 0 : { *(.rela.ldata) }
|
||||
.rela.lbss 0 : { *(.rela.lbss) }
|
||||
.rela.lrodata 0 : { *(.rela.lrodata) }
|
||||
.rela.ifunc 0 : { *(.rela.ifunc) }
|
||||
.rela.plt 0 :
|
||||
{
|
||||
*(.rela.plt)
|
||||
}
|
||||
.init 0 :
|
||||
{
|
||||
KEEP (*(SORT_NONE(.init)))
|
||||
}
|
||||
.plt 0 : { *(.plt) *(.iplt) }
|
||||
.plt.got 0 : { *(.plt.got) }
|
||||
.plt.sec 0 : { *(.plt.sec) }
|
||||
.text 0 :
|
||||
{
|
||||
*(.text .stub)
|
||||
/* .gnu.warning sections are handled specially by elf32.em. */
|
||||
*(.gnu.warning)
|
||||
}
|
||||
.fini 0 :
|
||||
{
|
||||
KEEP (*(SORT_NONE(.fini)))
|
||||
}
|
||||
.rodata 0 : { *(EXCLUDE_FILE (*nv-kernel.o) .rodata) }
|
||||
.rodata1 0 : { *(.rodata1) }
|
||||
.eh_frame_hdr : { *(.eh_frame_hdr) }
|
||||
.eh_frame 0 : ONLY_IF_RO { KEEP (*(.eh_frame)) }
|
||||
.gcc_except_table 0 : ONLY_IF_RO { *(.gcc_except_table
|
||||
.gcc_except_table.*) }
|
||||
.gnu_extab 0 : ONLY_IF_RO { *(.gnu_extab*) }
|
||||
/* These sections are generated by the Sun/Oracle C++ compiler. */
|
||||
.exception_ranges 0 : ONLY_IF_RO { *(.exception_ranges
|
||||
.exception_ranges*) }
|
||||
/* Adjust the address for the data segment. We want to adjust up to
|
||||
the same address within the page on the next page up. */
|
||||
/* Exception handling */
|
||||
.eh_frame 0 : ONLY_IF_RW { KEEP (*(.eh_frame)) }
|
||||
.gnu_extab 0 : ONLY_IF_RW { *(.gnu_extab) }
|
||||
.gcc_except_table 0 : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
|
||||
.exception_ranges 0 : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
|
||||
/* Thread Local Storage sections */
|
||||
.tdata 0 :
|
||||
{
|
||||
*(.tdata)
|
||||
}
|
||||
.tbss 0 : { *(.tbss) }
|
||||
.jcr 0 : { KEEP (*(.jcr)) }
|
||||
.dynamic 0 : { *(.dynamic) }
|
||||
.got 0 : { *(.got) *(.igot) }
|
||||
.got.plt 0 : { *(.got.plt) *(.igot.plt) }
|
||||
.data 0 :
|
||||
{
|
||||
*(.data)
|
||||
vgpu_unlock_nv_kern_rodata_beg = .;
|
||||
*nv-kernel.o(.rodata)
|
||||
vgpu_unlock_nv_kern_rodata_end = .;
|
||||
}
|
||||
.data1 0 : { *(.data1) }
|
||||
.bss 0 :
|
||||
{
|
||||
*(.bss)
|
||||
*(COMMON)
|
||||
/* Align here to ensure that the .bss section occupies space up to
|
||||
_end. Align after .bss to ensure correct alignment even if the
|
||||
.bss section disappears because there are no input sections.
|
||||
FIXME: Why do we need it? When there is no .bss section, we don't
|
||||
pad the .data section. */
|
||||
}
|
||||
.lbss 0 :
|
||||
{
|
||||
*(.dynlbss)
|
||||
*(.lbss)
|
||||
*(LARGE_COMMON)
|
||||
}
|
||||
.lrodata 0 :
|
||||
{
|
||||
*(.lrodata)
|
||||
}
|
||||
.ldata 0 :
|
||||
{
|
||||
*(.ldata)
|
||||
}
|
||||
/* Stabs debugging sections. */
|
||||
.stab 0 : { *(.stab) }
|
||||
.stabstr 0 : { *(.stabstr) }
|
||||
.stab.excl 0 : { *(.stab.excl) }
|
||||
.stab.exclstr 0 : { *(.stab.exclstr) }
|
||||
.stab.index 0 : { *(.stab.index) }
|
||||
.stab.indexstr 0 : { *(.stab.indexstr) }
|
||||
.comment 0 : { *(.comment) }
|
||||
/* DWARF debug sections.
|
||||
Symbols in the DWARF debugging sections are relative to the beginning
|
||||
of the section so we begin them at 0. */
|
||||
/* DWARF 1 */
|
||||
.debug 0 : { *(.debug) }
|
||||
.line 0 : { *(.line) }
|
||||
/* GNU DWARF 1 extensions */
|
||||
.debug_srcinfo 0 : { *(.debug_srcinfo) }
|
||||
.debug_sfnames 0 : { *(.debug_sfnames) }
|
||||
/* DWARF 1.1 and DWARF 2 */
|
||||
.debug_aranges 0 : { *(.debug_aranges) }
|
||||
.debug_pubnames 0 : { *(.debug_pubnames) }
|
||||
/* DWARF 2 */
|
||||
.debug_info 0 : { *(.debug_info) }
|
||||
.debug_abbrev 0 : { *(.debug_abbrev) }
|
||||
.debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
|
||||
.debug_frame 0 : { *(.debug_frame) }
|
||||
.debug_str 0 : { *(.debug_str) }
|
||||
.debug_loc 0 : { *(.debug_loc) }
|
||||
.debug_macinfo 0 : { *(.debug_macinfo) }
|
||||
/* SGI/MIPS DWARF 2 extensions */
|
||||
.debug_weaknames 0 : { *(.debug_weaknames) }
|
||||
.debug_funcnames 0 : { *(.debug_funcnames) }
|
||||
.debug_typenames 0 : { *(.debug_typenames) }
|
||||
.debug_varnames 0 : { *(.debug_varnames) }
|
||||
/* DWARF 3 */
|
||||
.debug_pubtypes 0 : { *(.debug_pubtypes) }
|
||||
.debug_ranges 0 : { *(.debug_ranges) }
|
||||
/* DWARF Extension. */
|
||||
.debug_macro 0 : { *(.debug_macro) }
|
||||
.debug_addr 0 : { *(.debug_addr) }
|
||||
.gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
|
||||
}
|
||||
|
959
vgpu_unlock_hooks.c
Normal file
959
vgpu_unlock_hooks.c
Normal file
|
@ -0,0 +1,959 @@
|
|||
/*
|
||||
* vGPU unlock hooks.
|
||||
*
|
||||
* This file is designed to be included into a single translation unit of the
|
||||
* vGPU driver's kernel module. It hooks the nv_ioremap_* functions and memcpy
|
||||
* for that translation unit and applies the vgpu_unlock patch when the magic
|
||||
* and key values has been accessed by the driver.
|
||||
*
|
||||
* Copyright 2021 Jonathan Johansson
|
||||
* This file is part of the "vgpu_unlock" project, and is distributed under the
|
||||
* MIT License. See the LICENSE file for more details.
|
||||
*/
|
||||
|
||||
/*------------------------------------------------------------------------------
|
||||
* Implementation of AES128-ECB.
|
||||
*------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint8_t round_key[176];
|
||||
}
|
||||
vgpu_unlock_aes128_ctx;
|
||||
|
||||
typedef uint8_t vgpu_unlock_aes128_state[4][4];
|
||||
|
||||
#define Nb 4
|
||||
#define Nk 4
|
||||
#define Nr 10
|
||||
#define getSBoxValue(num) (vgpu_unlock_aes128_sbox[(num)])
|
||||
#define getSBoxInvert(num) (vgpu_unlock_aes128_rsbox[(num)])
|
||||
#define Multiply(x, y) \
|
||||
( ((y & 1) * x) ^ \
|
||||
((y>>1 & 1) * vgpu_unlock_aes128_xtime(x)) ^ \
|
||||
((y>>2 & 1) * vgpu_unlock_aes128_xtime(vgpu_unlock_aes128_xtime(x))) ^ \
|
||||
((y>>3 & 1) * vgpu_unlock_aes128_xtime(vgpu_unlock_aes128_xtime(vgpu_unlock_aes128_xtime(x)))) ^ \
|
||||
((y>>4 & 1) * vgpu_unlock_aes128_xtime(vgpu_unlock_aes128_xtime(vgpu_unlock_aes128_xtime(vgpu_unlock_aes128_xtime(x)))))) \
|
||||
|
||||
static const uint8_t vgpu_unlock_aes128_sbox[256] = {
|
||||
//0 1 2 3 4 5 6 7 8 9 A B C D E F
|
||||
0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
|
||||
0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
|
||||
0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
|
||||
0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
|
||||
0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
|
||||
0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
|
||||
0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
|
||||
0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
|
||||
0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
|
||||
0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
|
||||
0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
|
||||
0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
|
||||
0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
|
||||
0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
|
||||
0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
|
||||
0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 };
|
||||
|
||||
static const uint8_t vgpu_unlock_aes128_rsbox[256] = {
|
||||
0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
|
||||
0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
|
||||
0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
|
||||
0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
|
||||
0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
|
||||
0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
|
||||
0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
|
||||
0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
|
||||
0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
|
||||
0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
|
||||
0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
|
||||
0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
|
||||
0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
|
||||
0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
|
||||
0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
|
||||
0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d };
|
||||
|
||||
static const uint8_t vgpu_unlock_aes128_rcon[11] = {
|
||||
0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36 };
|
||||
|
||||
static void vgpu_unlock_aes128_key_expansion(uint8_t *round_key,
|
||||
const uint8_t *Key)
|
||||
{
|
||||
unsigned i, j, k;
|
||||
uint8_t tempa[4];
|
||||
|
||||
for (i = 0; i < Nk; ++i)
|
||||
{
|
||||
round_key[(i * 4) + 0] = Key[(i * 4) + 0];
|
||||
round_key[(i * 4) + 1] = Key[(i * 4) + 1];
|
||||
round_key[(i * 4) + 2] = Key[(i * 4) + 2];
|
||||
round_key[(i * 4) + 3] = Key[(i * 4) + 3];
|
||||
}
|
||||
|
||||
for (i = Nk; i < Nb * (Nr + 1); ++i)
|
||||
{
|
||||
k = (i - 1) * 4;
|
||||
tempa[0] = round_key[k + 0];
|
||||
tempa[1] = round_key[k + 1];
|
||||
tempa[2] = round_key[k + 2];
|
||||
tempa[3] = round_key[k + 3];
|
||||
|
||||
if (i % Nk == 0)
|
||||
{
|
||||
const uint8_t u8tmp = tempa[0];
|
||||
tempa[0] = tempa[1];
|
||||
tempa[1] = tempa[2];
|
||||
tempa[2] = tempa[3];
|
||||
tempa[3] = u8tmp;
|
||||
tempa[0] = getSBoxValue(tempa[0]);
|
||||
tempa[1] = getSBoxValue(tempa[1]);
|
||||
tempa[2] = getSBoxValue(tempa[2]);
|
||||
tempa[3] = getSBoxValue(tempa[3]);
|
||||
tempa[0] = tempa[0] ^ vgpu_unlock_aes128_rcon[i/Nk];
|
||||
}
|
||||
|
||||
j = i * 4; k=(i - Nk) * 4;
|
||||
round_key[j + 0] = round_key[k + 0] ^ tempa[0];
|
||||
round_key[j + 1] = round_key[k + 1] ^ tempa[1];
|
||||
round_key[j + 2] = round_key[k + 2] ^ tempa[2];
|
||||
round_key[j + 3] = round_key[k + 3] ^ tempa[3];
|
||||
}
|
||||
}
|
||||
|
||||
static void vgpu_unlock_aes128_add_round_key(uint8_t round,
|
||||
vgpu_unlock_aes128_state *state,
|
||||
const uint8_t *round_key)
|
||||
{
|
||||
uint8_t i,j;
|
||||
|
||||
for (i = 0; i < 4; ++i)
|
||||
{
|
||||
for (j = 0; j < 4; ++j)
|
||||
{
|
||||
(*state)[i][j] ^= round_key[(round * Nb * 4) + (i * Nb) + j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void vgpu_unlock_aes128_sub_bytes(vgpu_unlock_aes128_state *state)
|
||||
{
|
||||
uint8_t i, j;
|
||||
|
||||
for (i = 0; i < 4; ++i)
|
||||
{
|
||||
for (j = 0; j < 4; ++j)
|
||||
{
|
||||
(*state)[j][i] = getSBoxValue((*state)[j][i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void vgpu_unlock_aes128_shift_rows(vgpu_unlock_aes128_state *state)
|
||||
{
|
||||
uint8_t temp;
|
||||
|
||||
temp = (*state)[0][1];
|
||||
(*state)[0][1] = (*state)[1][1];
|
||||
(*state)[1][1] = (*state)[2][1];
|
||||
(*state)[2][1] = (*state)[3][1];
|
||||
(*state)[3][1] = temp;
|
||||
|
||||
temp = (*state)[0][2];
|
||||
(*state)[0][2] = (*state)[2][2];
|
||||
(*state)[2][2] = temp;
|
||||
|
||||
temp = (*state)[1][2];
|
||||
(*state)[1][2] = (*state)[3][2];
|
||||
(*state)[3][2] = temp;
|
||||
|
||||
temp = (*state)[0][3];
|
||||
(*state)[0][3] = (*state)[3][3];
|
||||
(*state)[3][3] = (*state)[2][3];
|
||||
(*state)[2][3] = (*state)[1][3];
|
||||
(*state)[1][3] = temp;
|
||||
}
|
||||
|
||||
static uint8_t vgpu_unlock_aes128_xtime(uint8_t x)
|
||||
{
|
||||
return ((x<<1) ^ (((x>>7) & 1) * 0x1b));
|
||||
}
|
||||
|
||||
static void vgpu_unlock_aes128_mix_columns(vgpu_unlock_aes128_state *state)
|
||||
{
|
||||
uint8_t i;
|
||||
uint8_t tmp, tm, t;
|
||||
|
||||
for (i = 0; i < 4; ++i)
|
||||
{
|
||||
t = (*state)[i][0];
|
||||
tmp = (*state)[i][0] ^ (*state)[i][1] ^ (*state)[i][2] ^ (*state)[i][3];
|
||||
tm = (*state)[i][0] ^ (*state)[i][1];
|
||||
tm = vgpu_unlock_aes128_xtime(tm); (*state)[i][0] ^= tm ^ tmp;
|
||||
tm = (*state)[i][1] ^ (*state)[i][2];
|
||||
tm = vgpu_unlock_aes128_xtime(tm); (*state)[i][1] ^= tm ^ tmp;
|
||||
tm = (*state)[i][2] ^ (*state)[i][3];
|
||||
tm = vgpu_unlock_aes128_xtime(tm); (*state)[i][2] ^= tm ^ tmp;
|
||||
tm = (*state)[i][3] ^ t;
|
||||
tm = vgpu_unlock_aes128_xtime(tm); (*state)[i][3] ^= tm ^ tmp;
|
||||
}
|
||||
}
|
||||
|
||||
static void vgpu_unlock_aes128_inv_mix_columns(vgpu_unlock_aes128_state *state)
|
||||
{
|
||||
int i;
|
||||
uint8_t a, b, c, d;
|
||||
|
||||
for (i = 0; i < 4; ++i)
|
||||
{
|
||||
a = (*state)[i][0];
|
||||
b = (*state)[i][1];
|
||||
c = (*state)[i][2];
|
||||
d = (*state)[i][3];
|
||||
|
||||
(*state)[i][0] = Multiply(a, 0x0e) ^ Multiply(b, 0x0b) ^ Multiply(c, 0x0d) ^ Multiply(d, 0x09);
|
||||
(*state)[i][1] = Multiply(a, 0x09) ^ Multiply(b, 0x0e) ^ Multiply(c, 0x0b) ^ Multiply(d, 0x0d);
|
||||
(*state)[i][2] = Multiply(a, 0x0d) ^ Multiply(b, 0x09) ^ Multiply(c, 0x0e) ^ Multiply(d, 0x0b);
|
||||
(*state)[i][3] = Multiply(a, 0x0b) ^ Multiply(b, 0x0d) ^ Multiply(c, 0x09) ^ Multiply(d, 0x0e);
|
||||
}
|
||||
}
|
||||
|
||||
static void vgpu_unlock_aes128_inv_sub_bytes(vgpu_unlock_aes128_state *state)
|
||||
{
|
||||
uint8_t i, j;
|
||||
|
||||
for (i = 0; i < 4; ++i)
|
||||
{
|
||||
for (j = 0; j < 4; ++j)
|
||||
{
|
||||
(*state)[j][i] = getSBoxInvert((*state)[j][i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void vgpu_unlock_aes128_inv_shift_rows(vgpu_unlock_aes128_state *state)
|
||||
{
|
||||
uint8_t temp;
|
||||
|
||||
temp = (*state)[3][1];
|
||||
(*state)[3][1] = (*state)[2][1];
|
||||
(*state)[2][1] = (*state)[1][1];
|
||||
(*state)[1][1] = (*state)[0][1];
|
||||
(*state)[0][1] = temp;
|
||||
|
||||
temp = (*state)[0][2];
|
||||
(*state)[0][2] = (*state)[2][2];
|
||||
(*state)[2][2] = temp;
|
||||
|
||||
temp = (*state)[1][2];
|
||||
(*state)[1][2] = (*state)[3][2];
|
||||
(*state)[3][2] = temp;
|
||||
|
||||
temp = (*state)[0][3];
|
||||
(*state)[0][3] = (*state)[1][3];
|
||||
(*state)[1][3] = (*state)[2][3];
|
||||
(*state)[2][3] = (*state)[3][3];
|
||||
(*state)[3][3] = temp;
|
||||
}
|
||||
|
||||
static void vgpu_unlock_aes128_cipher(vgpu_unlock_aes128_state *state,
|
||||
const uint8_t* round_key)
|
||||
{
|
||||
uint8_t round = 0;
|
||||
|
||||
vgpu_unlock_aes128_add_round_key(0, state, round_key);
|
||||
|
||||
for (round = 1; ; ++round)
|
||||
{
|
||||
vgpu_unlock_aes128_sub_bytes(state);
|
||||
vgpu_unlock_aes128_shift_rows(state);
|
||||
|
||||
if (round == Nr)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
vgpu_unlock_aes128_mix_columns(state);
|
||||
vgpu_unlock_aes128_add_round_key(round, state, round_key);
|
||||
}
|
||||
|
||||
vgpu_unlock_aes128_add_round_key(Nr, state, round_key);
|
||||
}
|
||||
|
||||
static void vgpu_unlock_aes128_inv_cipher(vgpu_unlock_aes128_state *state,
|
||||
const uint8_t* round_key)
|
||||
{
|
||||
uint8_t round = 0;
|
||||
|
||||
vgpu_unlock_aes128_add_round_key(Nr, state, round_key);
|
||||
|
||||
for (round = (Nr - 1); ; --round)
|
||||
{
|
||||
vgpu_unlock_aes128_inv_shift_rows(state);
|
||||
vgpu_unlock_aes128_inv_sub_bytes(state);
|
||||
vgpu_unlock_aes128_add_round_key(round, state, round_key);
|
||||
|
||||
if (round == 0)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
vgpu_unlock_aes128_inv_mix_columns(state);
|
||||
}
|
||||
}
|
||||
|
||||
static void vgpu_unlock_aes128_init(vgpu_unlock_aes128_ctx *ctx,
|
||||
const uint8_t *key)
|
||||
{
|
||||
vgpu_unlock_aes128_key_expansion(ctx->round_key, key);
|
||||
}
|
||||
|
||||
static void vgpu_unlock_aes128_encrypt(const vgpu_unlock_aes128_ctx *ctx,
|
||||
uint8_t *buf)
|
||||
{
|
||||
vgpu_unlock_aes128_cipher((vgpu_unlock_aes128_state*)buf,
|
||||
ctx->round_key);
|
||||
}
|
||||
|
||||
static void vgpu_unlock_aes128_decrypt(const vgpu_unlock_aes128_ctx *ctx,
|
||||
uint8_t* buf)
|
||||
{
|
||||
vgpu_unlock_aes128_inv_cipher((vgpu_unlock_aes128_state*)buf,
|
||||
ctx->round_key);
|
||||
}
|
||||
|
||||
#undef Nb
|
||||
#undef Nk
|
||||
#undef Nr
|
||||
#undef getSBoxValue
|
||||
#undef getSBoxInvert
|
||||
#undef Multiply
|
||||
|
||||
/*------------------------------------------------------------------------------
|
||||
* End of AES128-ECB implementation.
|
||||
*------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/*------------------------------------------------------------------------------
|
||||
* Implementation of SHA256.
|
||||
* Original author: Brad Conte (brad AT bradconte.com)
|
||||
*------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
typedef struct {
|
||||
uint8_t data[64];
|
||||
uint32_t datalen;
|
||||
uint64_t bitlen;
|
||||
uint32_t state[8];
|
||||
}
|
||||
vgpu_unlock_sha256_ctx;
|
||||
|
||||
#define ROTLEFT(a,b) (((a) << (b)) | ((a) >> (32-(b))))
|
||||
#define ROTRIGHT(a,b) (((a) >> (b)) | ((a) << (32-(b))))
|
||||
|
||||
#define CH(x,y,z) (((x) & (y)) ^ (~(x) & (z)))
|
||||
#define MAJ(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
|
||||
#define EP0(x) (ROTRIGHT(x,2) ^ ROTRIGHT(x,13) ^ ROTRIGHT(x,22))
|
||||
#define EP1(x) (ROTRIGHT(x,6) ^ ROTRIGHT(x,11) ^ ROTRIGHT(x,25))
|
||||
#define SIG0(x) (ROTRIGHT(x,7) ^ ROTRIGHT(x,18) ^ ((x) >> 3))
|
||||
#define SIG1(x) (ROTRIGHT(x,17) ^ ROTRIGHT(x,19) ^ ((x) >> 10))
|
||||
|
||||
static void vgpu_unlock_sha256_transform(vgpu_unlock_sha256_ctx *ctx,
|
||||
const uint8_t data[])
|
||||
{
|
||||
static const uint32_t k[64] = {
|
||||
0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5,0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5,
|
||||
0xd807aa98,0x12835b01,0x243185be,0x550c7dc3,0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174,
|
||||
0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc,0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da,
|
||||
0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7,0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967,
|
||||
0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13,0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85,
|
||||
0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3,0xd192e819,0xd6990624,0xf40e3585,0x106aa070,
|
||||
0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5,0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3,
|
||||
0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208,0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
|
||||
};
|
||||
|
||||
uint32_t a, b, c, d, e, f, g, h, i, j, t1, t2, m[64];
|
||||
|
||||
for (i = 0, j = 0; i < 16; ++i, j += 4)
|
||||
m[i] = (data[j] << 24) | (data[j + 1] << 16) | (data[j + 2] << 8) | (data[j + 3]);
|
||||
for ( ; i < 64; ++i)
|
||||
m[i] = SIG1(m[i - 2]) + m[i - 7] + SIG0(m[i - 15]) + m[i - 16];
|
||||
|
||||
a = ctx->state[0];
|
||||
b = ctx->state[1];
|
||||
c = ctx->state[2];
|
||||
d = ctx->state[3];
|
||||
e = ctx->state[4];
|
||||
f = ctx->state[5];
|
||||
g = ctx->state[6];
|
||||
h = ctx->state[7];
|
||||
|
||||
for (i = 0; i < 64; ++i) {
|
||||
t1 = h + EP1(e) + CH(e,f,g) + k[i] + m[i];
|
||||
t2 = EP0(a) + MAJ(a,b,c);
|
||||
h = g;
|
||||
g = f;
|
||||
f = e;
|
||||
e = d + t1;
|
||||
d = c;
|
||||
c = b;
|
||||
b = a;
|
||||
a = t1 + t2;
|
||||
}
|
||||
|
||||
ctx->state[0] += a;
|
||||
ctx->state[1] += b;
|
||||
ctx->state[2] += c;
|
||||
ctx->state[3] += d;
|
||||
ctx->state[4] += e;
|
||||
ctx->state[5] += f;
|
||||
ctx->state[6] += g;
|
||||
ctx->state[7] += h;
|
||||
}
|
||||
|
||||
static void vgpu_unlock_sha256_init(vgpu_unlock_sha256_ctx *ctx)
|
||||
{
|
||||
ctx->datalen = 0;
|
||||
ctx->bitlen = 0;
|
||||
ctx->state[0] = 0x6a09e667;
|
||||
ctx->state[1] = 0xbb67ae85;
|
||||
ctx->state[2] = 0x3c6ef372;
|
||||
ctx->state[3] = 0xa54ff53a;
|
||||
ctx->state[4] = 0x510e527f;
|
||||
ctx->state[5] = 0x9b05688c;
|
||||
ctx->state[6] = 0x1f83d9ab;
|
||||
ctx->state[7] = 0x5be0cd19;
|
||||
}
|
||||
|
||||
static void vgpu_unlock_sha256_update(vgpu_unlock_sha256_ctx *ctx,
|
||||
const uint8_t data[],
|
||||
size_t len)
|
||||
{
|
||||
uint32_t i;
|
||||
|
||||
for (i = 0; i < len; ++i) {
|
||||
ctx->data[ctx->datalen] = data[i];
|
||||
ctx->datalen++;
|
||||
if (ctx->datalen == 64) {
|
||||
vgpu_unlock_sha256_transform(ctx, ctx->data);
|
||||
ctx->bitlen += 512;
|
||||
ctx->datalen = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void vgpu_unlock_sha256_final(vgpu_unlock_sha256_ctx *ctx,
|
||||
uint8_t hash[])
|
||||
{
|
||||
uint32_t i;
|
||||
|
||||
i = ctx->datalen;
|
||||
|
||||
/* Pad whatever data is left in the buffer. */
|
||||
if (ctx->datalen < 56) {
|
||||
ctx->data[i++] = 0x80;
|
||||
while (i < 56)
|
||||
ctx->data[i++] = 0x00;
|
||||
}
|
||||
else {
|
||||
ctx->data[i++] = 0x80;
|
||||
while (i < 64)
|
||||
ctx->data[i++] = 0x00;
|
||||
vgpu_unlock_sha256_transform(ctx, ctx->data);
|
||||
memset(ctx->data, 0, 56);
|
||||
}
|
||||
|
||||
/*
|
||||
* Append to the padding the total message's length in bits and
|
||||
* transform.
|
||||
*/
|
||||
ctx->bitlen += ctx->datalen * 8;
|
||||
ctx->data[63] = ctx->bitlen;
|
||||
ctx->data[62] = ctx->bitlen >> 8;
|
||||
ctx->data[61] = ctx->bitlen >> 16;
|
||||
ctx->data[60] = ctx->bitlen >> 24;
|
||||
ctx->data[59] = ctx->bitlen >> 32;
|
||||
ctx->data[58] = ctx->bitlen >> 40;
|
||||
ctx->data[57] = ctx->bitlen >> 48;
|
||||
ctx->data[56] = ctx->bitlen >> 56;
|
||||
vgpu_unlock_sha256_transform(ctx, ctx->data);
|
||||
|
||||
/*
|
||||
* Since this implementation uses little endian byte ordering and SHA
|
||||
* uses big endian, reverse all the bytes when copying the final state
|
||||
* to the output hash.
|
||||
*/
|
||||
for (i = 0; i < 4; ++i) {
|
||||
hash[i] = (ctx->state[0] >> (24 - i * 8)) & 0x000000ff;
|
||||
hash[i + 4] = (ctx->state[1] >> (24 - i * 8)) & 0x000000ff;
|
||||
hash[i + 8] = (ctx->state[2] >> (24 - i * 8)) & 0x000000ff;
|
||||
hash[i + 12] = (ctx->state[3] >> (24 - i * 8)) & 0x000000ff;
|
||||
hash[i + 16] = (ctx->state[4] >> (24 - i * 8)) & 0x000000ff;
|
||||
hash[i + 20] = (ctx->state[5] >> (24 - i * 8)) & 0x000000ff;
|
||||
hash[i + 24] = (ctx->state[6] >> (24 - i * 8)) & 0x000000ff;
|
||||
hash[i + 28] = (ctx->state[7] >> (24 - i * 8)) & 0x000000ff;
|
||||
}
|
||||
}
|
||||
|
||||
#undef ROTLEFT
|
||||
#undef ROTRIGHT
|
||||
|
||||
#undef CH
|
||||
#undef MAJ
|
||||
#undef EP0
|
||||
#undef EP1
|
||||
#undef SIG0
|
||||
#undef SIG1
|
||||
|
||||
/*------------------------------------------------------------------------------
|
||||
* End of SHA256 implementation.
|
||||
*------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/*------------------------------------------------------------------------------
|
||||
* Implementation of HMAC-SHA256.
|
||||
*------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
static void vgpu_unlock_hmac_sha256(void* dst,
|
||||
const void *msg,
|
||||
size_t msg_size,
|
||||
const void *key,
|
||||
size_t key_size)
|
||||
{
|
||||
vgpu_unlock_sha256_ctx ctx;
|
||||
uint8_t o_key[96];
|
||||
uint8_t i_key_pad[64];
|
||||
uint8_t i;
|
||||
|
||||
for (i = 0; i < 64; i++)
|
||||
{
|
||||
if (i < key_size)
|
||||
{
|
||||
o_key[i] = ((uint8_t*)key)[i] ^ 0x5c;
|
||||
i_key_pad[i] = ((uint8_t*)key)[i] ^ 0x36;
|
||||
}
|
||||
else
|
||||
{
|
||||
o_key[i] = 0x5c;
|
||||
i_key_pad[i] = 0x36;
|
||||
}
|
||||
}
|
||||
|
||||
vgpu_unlock_sha256_init(&ctx);
|
||||
vgpu_unlock_sha256_update(&ctx, i_key_pad, sizeof(i_key_pad));
|
||||
vgpu_unlock_sha256_update(&ctx, msg, msg_size);
|
||||
vgpu_unlock_sha256_final(&ctx, &o_key[64]);
|
||||
|
||||
vgpu_unlock_sha256_init(&ctx);
|
||||
vgpu_unlock_sha256_update(&ctx, o_key, sizeof(o_key));
|
||||
vgpu_unlock_sha256_final(&ctx, dst);
|
||||
}
|
||||
|
||||
/*------------------------------------------------------------------------------
|
||||
* End of HMAC-SHA256 implementation.
|
||||
*------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/*------------------------------------------------------------------------------
|
||||
* Implementation of vgpu_unlock hooks.
|
||||
*------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/* Debug logs can be enabled here. */
|
||||
#if 0
|
||||
#define LOG(...) printk(__VA_ARGS__)
|
||||
#else
|
||||
#define LOG(...)
|
||||
#endif
|
||||
|
||||
#define VGPU_UNLOCK_MAGIC_PHYS_BEG (0xf0029624)
|
||||
#define VGPU_UNLOCK_MAGIC_PHYS_END (VGPU_UNLOCK_MAGIC_PHYS_BEG + 0x10)
|
||||
#define VGPU_UNLOCK_KEY_PHYS_BEG (0xf0029634)
|
||||
#define VGPU_UNLOCK_KEY_PHYS_END (VGPU_UNLOCK_KEY_PHYS_BEG + 0x10)
|
||||
|
||||
static const uint8_t vgpu_unlock_magic_sacrifice[0x10] = {
|
||||
0x46, 0x4f, 0x39, 0x49, 0x74, 0x91, 0xd7, 0x0f,
|
||||
0xbc, 0x65, 0xc2, 0x70, 0xdd, 0xdd, 0x11, 0x54
|
||||
};
|
||||
|
||||
static bool vgpu_unlock_patch_applied = FALSE;
|
||||
|
||||
static bool vgpu_unlock_magic_mapped = FALSE;
|
||||
static uint64_t vgpu_unlock_magic_beg;
|
||||
static uint64_t vgpu_unlock_magic_end;
|
||||
static uint8_t vgpu_unlock_magic[0x10];
|
||||
static bool vgpu_unlock_magic_found = FALSE;
|
||||
|
||||
static bool vgpu_unlock_key_mapped = FALSE;
|
||||
static uint64_t vgpu_unlock_key_beg;
|
||||
static uint64_t vgpu_unlock_key_end;
|
||||
static uint8_t vgpu_unlock_key[0x10];
|
||||
static bool vgpu_unlock_key_found = FALSE;
|
||||
|
||||
/* These need to be added to the linker script. */
|
||||
extern uint8_t vgpu_unlock_nv_kern_rodata_beg;
|
||||
extern uint8_t vgpu_unlock_nv_kern_rodata_end;
|
||||
|
||||
static uint16_t vgpu_unlock_pci_devid_to_vgpu_capable(uint16_t pci_devid)
|
||||
{
|
||||
switch (pci_devid)
|
||||
{
|
||||
/* GP102 */
|
||||
case 0x1b00: /* TITAN X (Pascal) */
|
||||
case 0x1b02: /* TITAN Xp */
|
||||
case 0x1b06: /* GTX 1080 Ti */
|
||||
case 0x1b30: /* Quadro P6000 */
|
||||
return 0x1b38; /* Tesla P40 */
|
||||
|
||||
/* GP104 */
|
||||
case 0x1b80: /* GTX 1080 */
|
||||
case 0x1b81: /* GTX 1070 */
|
||||
case 0x1b82: /* GTX 1070 Ti */
|
||||
case 0x1b83: /* GTX 1060 6GB */
|
||||
case 0x1b84: /* GTX 1060 3GB */
|
||||
case 0x1bb0: /* Quadro P5000 */
|
||||
return 0x1bb3; /* Tesla P4 */
|
||||
|
||||
/* TU102 */
|
||||
case 0x1e02: /* TITAN RTX */
|
||||
case 0x1e04: /* RTX 2080 Ti */
|
||||
case 0x1e07: /* RTX 2080 Ti */
|
||||
return 0x1e30; /* Quadro RTX 6000 */
|
||||
}
|
||||
|
||||
return pci_devid;
|
||||
}
|
||||
|
||||
/* Our own memcmp that will bypass buffer overflow checks. */
|
||||
static int vgpu_unlock_memcmp(const void *a, const void *b, size_t size)
|
||||
{
|
||||
uint8_t *pa = (uint8_t*)a;
|
||||
uint8_t *pb = (uint8_t*)b;
|
||||
|
||||
while (size--)
|
||||
{
|
||||
if (*pa != *pb)
|
||||
{
|
||||
return *pa - *pb;
|
||||
}
|
||||
|
||||
pa++;
|
||||
pb++;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Search for a certain pattern in the .rodata section of nv-kern.o_binary. */
|
||||
static void *vgpu_unlock_find_in_rodata(const void *val, size_t size)
|
||||
{
|
||||
uint8_t *i;
|
||||
|
||||
for (i = (uint8_t*)&vgpu_unlock_nv_kern_rodata_beg;
|
||||
i < (uint8_t*)&vgpu_unlock_nv_kern_rodata_end - size;
|
||||
i++)
|
||||
{
|
||||
if (vgpu_unlock_memcmp(val, i, size) == 0)
|
||||
{
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Check if a value is within a range. */
|
||||
static bool vgpu_unlock_in_range(uint64_t val, uint64_t beg, uint64_t end)
|
||||
{
|
||||
return (val >= beg) && (val <= end);
|
||||
}
|
||||
|
||||
/* Check if range a is completely contained within range b. */
|
||||
static bool vgpu_unlock_range_contained_in(uint64_t a_beg,
|
||||
uint64_t a_end,
|
||||
uint64_t b_beg,
|
||||
uint64_t b_end)
|
||||
{
|
||||
return vgpu_unlock_in_range(a_beg, b_beg, b_end) &&
|
||||
vgpu_unlock_in_range(a_end, b_beg, b_end);
|
||||
}
|
||||
|
||||
static void vgpu_unlock_apply_patch(void)
|
||||
{
|
||||
uint8_t i;
|
||||
void *magic;
|
||||
void **magic_ptr;
|
||||
void **blocks_ptr;
|
||||
void **sign_ptr;
|
||||
uint8_t sign[0x20];
|
||||
uint8_t num_blocks;
|
||||
void *sac_magic;
|
||||
void **sac_magic_ptr;
|
||||
void **sac_blocks_ptr;
|
||||
void **sac_sign_ptr;
|
||||
vgpu_unlock_aes128_ctx aes_ctx;
|
||||
uint16_t *pci_info;
|
||||
|
||||
magic = vgpu_unlock_find_in_rodata(vgpu_unlock_magic,
|
||||
sizeof(vgpu_unlock_magic));
|
||||
if (!magic)
|
||||
{
|
||||
LOG(KERN_ERR "Failed to find magic in .rodata.\n");
|
||||
goto failed;
|
||||
}
|
||||
|
||||
LOG(KERN_WARNING "Magic is at: %px\n", magic);
|
||||
|
||||
magic_ptr = (void**)vgpu_unlock_find_in_rodata(&magic,
|
||||
sizeof(magic));
|
||||
|
||||
if (!magic_ptr)
|
||||
{
|
||||
LOG(KERN_ERR "Failed to find pointer to magic in .rodata.\n");
|
||||
goto failed;
|
||||
}
|
||||
|
||||
blocks_ptr = magic_ptr + 1;
|
||||
sign_ptr = magic_ptr + 2;
|
||||
|
||||
LOG(KERN_WARNING "Pointers found, magic: %px blocks: %px sign: %px\n",
|
||||
magic_ptr, blocks_ptr, sign_ptr);
|
||||
|
||||
if (!vgpu_unlock_in_range((uint64_t)*blocks_ptr,
|
||||
(uint64_t)&vgpu_unlock_nv_kern_rodata_beg,
|
||||
(uint64_t)&vgpu_unlock_nv_kern_rodata_end) ||
|
||||
!vgpu_unlock_in_range((uint64_t)*sign_ptr,
|
||||
(uint64_t)&vgpu_unlock_nv_kern_rodata_beg,
|
||||
(uint64_t)&vgpu_unlock_nv_kern_rodata_end))
|
||||
{
|
||||
LOG(KERN_ERR "Invalid sign or blocks pointer.\n");
|
||||
goto failed;
|
||||
}
|
||||
|
||||
num_blocks = *(uint8_t*)*blocks_ptr;
|
||||
|
||||
vgpu_unlock_hmac_sha256(sign,
|
||||
*blocks_ptr,
|
||||
1 + num_blocks * 0x10,
|
||||
vgpu_unlock_key,
|
||||
sizeof(vgpu_unlock_key));
|
||||
|
||||
LOG(KERN_WARNING "Generate signature is: %32ph\n", sign);
|
||||
|
||||
if (memcmp(sign, *sign_ptr, sizeof(sign)) != 0)
|
||||
{
|
||||
LOG(KERN_ERR "Signatures does not match.\n");
|
||||
goto failed;
|
||||
}
|
||||
|
||||
sac_magic = vgpu_unlock_find_in_rodata(vgpu_unlock_magic_sacrifice,
|
||||
sizeof(vgpu_unlock_magic_sacrifice));
|
||||
|
||||
if (!sac_magic)
|
||||
{
|
||||
LOG(KERN_ERR "Failed to find sacrificial magic.\n");
|
||||
goto failed;
|
||||
}
|
||||
|
||||
LOG(KERN_WARNING "Sacrificial magic is at: %px\n", sac_magic);
|
||||
|
||||
sac_magic_ptr = (void**) vgpu_unlock_find_in_rodata(&sac_magic,
|
||||
sizeof(sac_magic));
|
||||
|
||||
if (!sac_magic_ptr)
|
||||
{
|
||||
LOG(KERN_ERR "Failed to find pointer to sacrificial magic.\n");
|
||||
goto failed;
|
||||
}
|
||||
|
||||
sac_blocks_ptr = sac_magic_ptr + 1;
|
||||
sac_sign_ptr = sac_magic_ptr + 2;
|
||||
|
||||
LOG(KERN_WARNING "Pointers found, sac_magic: %px sac_blocks: %px sac_sign: %px\n",
|
||||
sac_magic_ptr, sac_blocks_ptr, sac_sign_ptr);
|
||||
|
||||
if (!vgpu_unlock_in_range((uint64_t)*sac_blocks_ptr,
|
||||
(uint64_t)&vgpu_unlock_nv_kern_rodata_beg,
|
||||
(uint64_t)&vgpu_unlock_nv_kern_rodata_end) ||
|
||||
!vgpu_unlock_in_range((uint64_t)*sac_sign_ptr,
|
||||
(uint64_t)&vgpu_unlock_nv_kern_rodata_beg,
|
||||
(uint64_t)&vgpu_unlock_nv_kern_rodata_end))
|
||||
{
|
||||
LOG(KERN_ERR "Invalid sacrificial sign or blocks pointer.\n");
|
||||
goto failed;
|
||||
}
|
||||
|
||||
memcpy(sac_magic, vgpu_unlock_magic, sizeof(vgpu_unlock_magic));
|
||||
memcpy(*sac_blocks_ptr, *blocks_ptr, num_blocks * 0x10 + 1);
|
||||
|
||||
vgpu_unlock_aes128_init(&aes_ctx, vgpu_unlock_key);
|
||||
|
||||
for (i = 0; i < num_blocks; i++)
|
||||
{
|
||||
vgpu_unlock_aes128_decrypt(&aes_ctx,
|
||||
(uint8_t*)*sac_blocks_ptr + 1 + i * 0x10);
|
||||
LOG(KERN_WARNING "Decrypted block is: %16ph.\n",
|
||||
(uint8_t*)*sac_blocks_ptr + 1 + i * 0x10);
|
||||
}
|
||||
|
||||
pci_info = (uint16_t*)((uint8_t*)*sac_blocks_ptr + 1);
|
||||
|
||||
pci_info[1] = vgpu_unlock_pci_devid_to_vgpu_capable(pci_info[1]);
|
||||
pci_info[2] = 0;
|
||||
pci_info[3] = 0x11ec;
|
||||
pci_info[4] = 0;
|
||||
|
||||
vgpu_unlock_aes128_init(&aes_ctx, vgpu_unlock_key);
|
||||
|
||||
for (i = 0; i < num_blocks; i++)
|
||||
{
|
||||
vgpu_unlock_aes128_encrypt(&aes_ctx,
|
||||
(uint8_t*)*sac_blocks_ptr + 1 + i * 0x10);
|
||||
}
|
||||
|
||||
vgpu_unlock_hmac_sha256(*sac_sign_ptr,
|
||||
*sac_blocks_ptr,
|
||||
1 + num_blocks * 0x10,
|
||||
vgpu_unlock_key,
|
||||
sizeof(vgpu_unlock_key));
|
||||
|
||||
vgpu_unlock_patch_applied = TRUE;
|
||||
|
||||
LOG(KERN_WARNING "vGPU unlock patch applied.\n");
|
||||
|
||||
return;
|
||||
|
||||
failed:
|
||||
vgpu_unlock_magic_mapped = FALSE;
|
||||
vgpu_unlock_magic_found = FALSE;
|
||||
vgpu_unlock_key_mapped = FALSE;
|
||||
vgpu_unlock_key_found = FALSE;
|
||||
}
|
||||
|
||||
static void *vgpu_unlock_memcpy_hook(void *dst, const void *src, size_t count)
|
||||
{
|
||||
void *result = memcpy(dst, src, count);
|
||||
|
||||
if (!vgpu_unlock_magic_found &&
|
||||
vgpu_unlock_magic_mapped &&
|
||||
vgpu_unlock_range_contained_in(vgpu_unlock_magic_beg,
|
||||
vgpu_unlock_magic_end,
|
||||
(uint64_t)src,
|
||||
(uint64_t)src + count))
|
||||
{
|
||||
memcpy(vgpu_unlock_magic,
|
||||
(void*)vgpu_unlock_magic_beg,
|
||||
sizeof(vgpu_unlock_magic));
|
||||
vgpu_unlock_magic_found = TRUE;
|
||||
|
||||
LOG(KERN_WARNING "Magic found: %16ph\n",
|
||||
vgpu_unlock_magic);
|
||||
}
|
||||
|
||||
if (!vgpu_unlock_key_found &&
|
||||
vgpu_unlock_key_mapped &&
|
||||
vgpu_unlock_range_contained_in(vgpu_unlock_key_beg,
|
||||
vgpu_unlock_key_end,
|
||||
(uint64_t)src,
|
||||
(uint64_t)src + count))
|
||||
{
|
||||
memcpy(vgpu_unlock_key,
|
||||
(void*)vgpu_unlock_key_beg,
|
||||
sizeof(vgpu_unlock_key));
|
||||
vgpu_unlock_key_found = TRUE;
|
||||
|
||||
LOG(KERN_WARNING "Key found: %16ph\n",
|
||||
vgpu_unlock_key);
|
||||
}
|
||||
|
||||
if (!vgpu_unlock_patch_applied &&
|
||||
vgpu_unlock_magic_found &&
|
||||
vgpu_unlock_key_found)
|
||||
{
|
||||
vgpu_unlock_apply_patch();
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Check if the new IO mapping contains the magic or key. */
|
||||
static void vgpu_unlock_check_map(uint64_t phys_addr,
|
||||
size_t size,
|
||||
void *virt_addr)
|
||||
{
|
||||
LOG(KERN_WARNING "Remap called.\n");
|
||||
|
||||
if (virt_addr &&
|
||||
!vgpu_unlock_magic_mapped &&
|
||||
vgpu_unlock_range_contained_in(VGPU_UNLOCK_MAGIC_PHYS_BEG,
|
||||
VGPU_UNLOCK_MAGIC_PHYS_END,
|
||||
phys_addr,
|
||||
phys_addr + size))
|
||||
{
|
||||
uint64_t offset_beg = VGPU_UNLOCK_MAGIC_PHYS_BEG - phys_addr;
|
||||
uint64_t offset_end = VGPU_UNLOCK_MAGIC_PHYS_END - phys_addr;
|
||||
vgpu_unlock_magic_beg = (uint64_t)virt_addr + offset_beg;
|
||||
vgpu_unlock_magic_end = (uint64_t)virt_addr + offset_end;
|
||||
vgpu_unlock_magic_mapped = TRUE;
|
||||
LOG(KERN_WARNING "Magic mapped at: 0x%llX\n",
|
||||
vgpu_unlock_magic_beg);
|
||||
}
|
||||
|
||||
if (virt_addr &&
|
||||
!vgpu_unlock_key_mapped &&
|
||||
vgpu_unlock_range_contained_in(VGPU_UNLOCK_KEY_PHYS_BEG,
|
||||
VGPU_UNLOCK_KEY_PHYS_END,
|
||||
phys_addr,
|
||||
phys_addr + size))
|
||||
{
|
||||
uint64_t offset_beg = VGPU_UNLOCK_KEY_PHYS_BEG - phys_addr;
|
||||
uint64_t offset_end = VGPU_UNLOCK_KEY_PHYS_END - phys_addr;
|
||||
vgpu_unlock_key_beg = (uint64_t)virt_addr + offset_beg;
|
||||
vgpu_unlock_key_end = (uint64_t)virt_addr + offset_end;
|
||||
vgpu_unlock_key_mapped = TRUE;
|
||||
LOG(KERN_WARNING "Key mapped at: 0x%llX\n",
|
||||
vgpu_unlock_key_beg);
|
||||
}
|
||||
}
|
||||
|
||||
static void *vgpu_unlock_nv_ioremap_hook(uint64_t phys,
|
||||
uint64_t size)
|
||||
{
|
||||
void *virt_addr = nv_ioremap(phys, size);
|
||||
vgpu_unlock_check_map(phys, size, virt_addr);
|
||||
return virt_addr;
|
||||
}
|
||||
|
||||
static void *vgpu_unlock_nv_ioremap_nocache_hook(uint64_t phys,
|
||||
uint64_t size)
|
||||
{
|
||||
void *virt_addr = nv_ioremap_nocache(phys, size);
|
||||
vgpu_unlock_check_map(phys, size, virt_addr);
|
||||
return virt_addr;
|
||||
}
|
||||
|
||||
static void *vgpu_unlock_nv_ioremap_cache_hook(uint64_t phys,
|
||||
uint64_t size)
|
||||
{
|
||||
void *virt_addr = nv_ioremap_cache(phys, size);
|
||||
vgpu_unlock_check_map(phys, size, virt_addr);
|
||||
return virt_addr;
|
||||
}
|
||||
|
||||
static void *vgpu_unlock_nv_ioremap_wc_hook(uint64_t phys,
|
||||
uint64_t size)
|
||||
{
|
||||
void *virt_addr = nv_ioremap_wc(phys, size);
|
||||
vgpu_unlock_check_map(phys, size, virt_addr);
|
||||
return virt_addr;
|
||||
}
|
||||
|
||||
#undef LOG
|
||||
|
||||
/* Redirect future callers to our hooks. */
|
||||
#define memcpy vgpu_unlock_memcpy_hook
|
||||
#define nv_ioremap vgpu_unlock_nv_ioremap_hook
|
||||
#define nv_ioremap_nocache vgpu_unlock_nv_ioremap_nocache_hook
|
||||
#define nv_ioremap_cache vgpu_unlock_nv_ioremap_cache_hook
|
||||
#define nv_ioremap_wc vgpu_unlock_nv_ioremap_wc_hook
|
Loading…
Reference in a new issue