One of the most important capabilities during binary auditing is to identify variables within assembly code. Sometimes such variables are not that clear to identify for beginners as it is by reading C++ code. This time we have a look at many different variables at once. What I can do is to give you some hints. The original source code contained: double, long double, unsigned char, signed char, unsigned long int, signed long int, unsigned short int, signed short int, unsigned int, signed int, bool, float, and wchar_t. Can you get which variable type is corresponding to which lines of the assembly code?
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 | .text:00401000 ; int __cdecl main(int argc, const char **argv, const char **envp) .text:00401000 _main proc near .text:00401000 .text:00401000 var_38= qword ptr -38h .text:00401000 var_2C= dword ptr -2Ch .text:00401000 var_25= byte ptr -25h .text:00401000 var_24= dword ptr -24h .text:00401000 var_20= word ptr -20h .text:00401000 var_1C= dword ptr -1Ch .text:00401000 var_18= word ptr -18h .text:00401000 var_14= word ptr -14h .text:00401000 var_F= byte ptr -0Fh .text:00401000 var_E= byte ptr -0Eh .text:00401000 var_D= byte ptr -0Dh .text:00401000 var_C= dword ptr -0Ch .text:00401000 var_8= qword ptr -8 .text:00401000 argc= dword ptr 8 .text:00401000 argv= dword ptr 0Ch .text:00401000 envp= dword ptr 10h .text:00401000 .text:00401000 push ebp .text:00401001 mov ebp, esp .text:00401003 sub esp, 3Ch .text:00401006 mov [ebp+var_D], 0 .text:0040100A mov [ebp+var_D], 0FFh .text:0040100E mov [ebp+var_25], 80h .text:00401012 mov [ebp+var_25], 7Fh .text:00401016 xor eax, eax .text:00401018 mov [ebp+var_18], ax .text:0040101C mov ecx, 0FFFFh .text:00401021 mov [ebp+var_18], cx .text:00401025 mov edx, 0FFFF8000h .text:0040102A mov [ebp+var_14], dx .text:0040102E mov eax, 7FFFh .text:00401033 mov [ebp+var_14], ax .text:00401037 mov [ebp+var_1C], 0 .text:0040103E mov [ebp+var_1C], 0FFFFFFFFh .text:00401045 mov [ebp+var_1C], 80000000h .text:0040104C mov [ebp+var_1C], 7FFFFFFFh .text:00401053 mov [ebp+var_24], 0 .text:0040105A mov [ebp+var_24], 0FFFFFFFFh .text:00401061 mov [ebp+var_2C], 80000000h .text:00401068 mov [ebp+var_2C], 7FFFFFFFh .text:0040106F mov [ebp+var_F], 1 .text:00401073 mov [ebp+var_E], 0 .text:00401077 fld ds:__real@40aafae0 .text:0040107D fstp [ebp+var_C] .text:00401080 fld ds:__real@40155f5bff2d92cd .text:00401086 fstp [ebp+var_8] .text:00401089 fld ds:__real@40155f5bff2d92cd .text:0040108F fstp [ebp+var_38] .text:00401092 mov ecx, 41h .text:00401097 mov [ebp+var_20], cx .text:0040109B xor eax, eax .text:0040109D mov esp, ebp .text:0040109F pop ebp .text:004010A0 retn .text:004010A0 _main |
Are we going to get the solution at some point?
At some point: yes, I will release the original sources
Here is what I have so far, I am not sure about a couple of my answers:
.text:00401000 ; int __cdecl main(int argc, const char **argv, const char **envp)
.text:00401000 _main proc near
.text:00401000
.text:00401000 var_38= qword ptr -38h ;long double var_38
.text:00401000 var_2C= dword ptr -2Ch ;signed double var_2c
.text:00401000 var_25= byte ptr -25h ;signed int var_25
.text:00401000 var_24= dword ptr -24h ;unsigned double var_24
.text:00401000 var_20= word ptr -20h ;unsigned char var_20
.text:00401000 var_1C= dword ptr -1Ch ;double var_1c
.text:00401000 var_18= word ptr -18h ;unsigned short int Var_18
.text:00401000 var_14= word ptr -14h ;short int var_14
.text:00401000 var_F= byte ptr -0Fh ;bool var_f
.text:00401000 var_E= byte ptr -0Eh ;bool var_e
.text:00401000 var_D= byte ptr -0Dh ;unsigned int var_d
.text:00401000 var_C= dword ptr -0Ch ;double var_c
.text:00401000 var_8= qword ptr -8 ;
.text:00401000 argc= dword ptr 8 ;int *argc
.text:00401000 argv= dword ptr 0Ch ;char *argv
.text:00401000 envp= dword ptr 10h
.text:00401000
.text:00401000 push ebp
.text:00401001 mov ebp, esp
.text:00401003 sub esp, 3Ch
.text:00401006 mov [ebp+var_D], 0 ; var_d = 0, a 16bit register being used ebp
.text:0040100A mov [ebp+var_D], 0FFh ; var_d = 255
.text:0040100E mov [ebp+var_25], 80h ; var_25= -128
.text:00401012 mov [ebp+var_25], 7Fh ; var_25 = +127
.text:00401016 xor eax, eax ; eax = 0000 0000 0000 0000
.text:00401018 mov [ebp+var_18], ax ; var_18 = ax = 0000 0000; a 8bit register being used,ax, who is part of eax (16bit)
.text:0040101C mov ecx, 0FFFFh ; ecx = 65,536
.text:00401021 mov [ebp+var_18], cx ; cx is a 8bit register, part of ecx, var_18= 1111 1111 = 0xFF = 255
.text:00401025 mov edx, 0FFFF8000h ; edx = 4,294,934,528
.text:0040102A mov [ebp+var_14], dx ; dx is a 8bit register, part of edx; thus var_14=0×8000 = 32,768
.text:0040102E mov eax, 7FFFh ; eax = 32,767
.text:00401033 mov [ebp+var_14], ax ; var_14=ax = 0xff = 255
.text:00401037 mov [ebp+var_1C], 0 ; var_1c = 0
.text:0040103E mov [ebp+var_1C], 0FFFFFFFFh ; var_1c = 4,294,967,295
.text:00401045 mov [ebp+var_1C], 80000000h ; var_1c = +2,147,483,648
.text:0040104C mov [ebp+var_1C], 7FFFFFFFh ; var_1C = -2,147,483,647
.text:00401053 mov [ebp+var_24], 0 ; var_24 = 0
.text:0040105A mov [ebp+var_24], 0FFFFFFFFh ; var_24 = 4,294,967,295
.text:00401061 mov [ebp+var_2C], 80000000h ; var_2c = +2,147,483,648
.text:00401068 mov [ebp+var_2C], 7FFFFFFFh ; var_2c = -2,147,483,647
.text:0040106F mov [ebp+var_F], 1 ; var_f = 1
.text:00401073 mov [ebp+var_E], 0 ; var_e = 0
.text:00401077 fld ds:__real@40aafae0 ;0×40aa fae0 = 1,084,947,168
.text:0040107D fstp [ebp+var_C] ; var_c = 1,084,947,168
.text:00401080 fld ds:__real@40155f5bff2d92cd ;0×4015 5f5b ff2d 92cd = 4,617,701,841,666,151,117
.text:00401086 fstp [ebp+var_8] ; var_8 = 4,617,701,841,666,151,117
.text:00401089 fld ds:__real@40155f5bff2d92cd ;0×4015 5f5b ff2d 92cd = 4,617,701,841,666,151,117
.text:0040108F fstp [ebp+var_38] ; var_38 = 4,617,701,841,666,151,117
.text:00401092 mov ecx, 41h ; ecx = 65 or char “A”
.text:00401097 mov [ebp+var_20], cx ; var_20 = “A” because ecx=41h=0041h=65; zeros do tend to be obmitted sometimes
.text:0040109B xor eax, eax
.text:0040109D mov esp, ebp
.text:0040109F pop ebp
.text:004010A0 retn
.text:004010A0 _main
Frankly, there are a lot of ambiguous types since you don’t really do anything with them. Based on the disassembly I would say you have 4 1-byte int variables and only 3 4 byte int variables which contradicts what you say you have. The integer constants that you use might imply some things, but at the assembly level they don’t tell you anything definite other than the size of the variable.
float var_C;
unsigned char var_D;
bool var_E;
bool var_F;
double var_8;
short var_14;
unsigned short var_18;
int var_1C;
wchar_t var_20;
unsigned int var_24;
char var_25;
long var_2C;
long double var_38;
We get the Floating Point ones first, those are easy due to FLD/FSTP instructions
referencing the memory values:
8 double, var_38 (or var_8)
8 long double, var_8 (or var_38)
4 float, var_C
Next we have these types, my byte sizes from the usual x86 compiler defaults
don’t match up with the sizes I see declared. I have one more DWORD and one less BYTE
sized values. Even if my stuff did match up the best I could do is a guess, you CAN’T know
that something is a char just because you move 0×41 ‘A’ into it. Also I can’t presume a var is
signed just because you move 0xFF(-1) into it, it could just be holding 0xFF in an unsigned manner!
1 unsigned char,
1 signed char,
1 bool,
2 unsigned short int,
2 signed short int,
2 wchar_t,
4 unsigned long int,
4 signed long int,
4 unsigned int,
4 signed int,
So I think the correct answer is, you can tell the floats, but you AT BEST can take a wild guess as
to the types of the other variables.
Here we go with the solution. The original code was:
int main(int argc, char* argv[])
{
unsigned char myChar; myChar = 0; myChar = 255; // 1 byte
signed char mySignedChar; mySignedChar = -128; mySignedChar = 127; // 2 bytes
unsigned short int myShort; myShort = 0; myShort = 65535;
signed short int mySignedShort; mySignedShort = -32768; mySignedShort = 32767;
unsigned int myInt; myInt = 0; myInt = 4294967295 ; // 4 bytes
signed int mySignedInt; myInt = -2147483648; myInt = 2147483647; // 4 bytes
unsigned long int myLong; myLong=0; myLong=4294967295; // 4 bytes
signed long int mySignedLong; mySignedLong=-2147483648; mySignedLong=2147483647; // 4 bytes
bool myTrue; myTrue = true; // 1 byte
bool myFalse; myFalse = false; // 1 byte
float myFloat; myFloat = 5.3431243774; // 4 bytes
double myDouble; myDouble = 5.3431243774; // 8 bytes
long double myLongDouble; myLongDouble = 5.3431243774; // 8 bytes
wchar_t myWChar; myWChar = ‘A’; //2 or 4 bytes
return 0;
}
I’m working through this with the help of Chris Eagles book and IDA 5.5. Where could I get an explanation of how the signs are interpreted in the various lines above.
Dr. Schnieder, an explanation of how the lines in C\C++ translate to the lines of ASM would be helpful.
Duh, two’s complement…never mind, brain not engaged.