Saturday, August 23, 2014

Playing with a simple 64 bit binary (Cheating an assignment)


So, I heard some people discussing: "What was the binary doing which was given to us to evaluate our assignment?" To those who don't know, the assignment was to install ArchLinux in one's laptop. So, I took a look at the binary, and came up with the following stuff:
➜  /home/wani/Something  objdump -D eval > eval_dump   
➜  /home/wani/Something  strings eval          
/lib64/ld-linux-x86-64.so.2
libc.so.6
exit
fopen
perror
__isoc99_scanf
fgetc
strlen
getchar
fclose
fprintf
__libc_start_main
__gmon_start__
GLIBC_2.7
GLIBC_2.2.5
UH-H
fffff.
AWAVA
AUATL
[]A\A]A^A_
Full Name: 
Roll No.:
/proc/version
Error while opening the file.
;*3$"
➜  /home/wani/Something  
The interesting strings are: "Full Name:", "Roll No.:" and "/proc/version". Let us analyze a small part of the main section of the binary's dump:
[...]
00000000004007b6 <main>:
  4007b6:       55                      push   %rbp
  4007b7:       48 89 e5                mov    %rsp,%rbp               #Stack Stuff
  4007ba:       48 81 ec f0 0b 00 00    sub    $0xbf0,%rsp
  4007c1:       c7 45 fc 00 00 00 00    movl   $0x0,-0x4(%rbp)
  4007c8:       bf 48 0a 40 00          mov    $0x400a48,%edi          #Argument to printf, address of string: "Full Name:"
  4007cd:       b8 00 00 00 00          mov    $0x0,%eax
  4007d2:       e8 49 fe ff ff          callq  400620 <printf@plt>
  4007d7:       48 8d 85 00 fc ff ff    lea    -0x400(%rbp),%rax
  4007de:       48 89 c6                mov    %rax,%rsi
  4007e1:       bf 54 0a 40 00          mov    $0x400a54,%edi          #Format specifier for scanf: %[^\n]
  4007e6:       b8 00 00 00 00          mov    $0x0,%eax
  4007eb:       e8 b0 fe ff ff          callq  4006a0 <__isoc99_scanf@plt>
  4007f0:       e8 5b fe ff ff          callq  400650 <getchar@plt> 
  4007f5:       bf 5a 0a 40 00          mov    $0x400a5a,%edi          #Argument to printf, address of string: "Roll No.:"
  4007fa:       b8 00 00 00 00          mov    $0x0,%eax
  4007ff:       e8 1c fe ff ff          callq  400620 <printf@plt>
  400804:       48 8d 85 10 f4 ff ff    lea    -0xbf0(%rbp),%rax
  40080b:       48 89 c6                mov    %rax,%rsi
  40080e:       bf 54 0a 40 00          mov    $0x400a54,%edi          #Format specifier for scanf: %[^\n]
  400813:       b8 00 00 00 00          mov    $0x0,%eax
  400818:       e8 83 fe ff ff          callq  4006a0 <__isoc99_scanf@plt>
  40081d:       be 64 0a 40 00          mov    $0x400a64,%esi          #Argument to fopen, address of string: "r"
  400822:       bf 66 0a 40 00          mov    $0x400a66,%edi          #Argument to fopen, address of string: "/proc/version"
  400827:       e8 54 fe ff ff          callq  400680 <fopen@plt>
  40082c:       48 89 45 f0             mov    %rax,-0x10(%rbp)
  400830:       48 83 7d f0 00          cmpq   $0x0,-0x10(%rbp)
  400835:       75 14                   jne    40084b <main+0x95>      #Equality check, if fopen fails, goto: 40084b
  400837:       bf 78 0a 40 00          mov    $0x400a78,%edi
  40083c:       e8 4f fe ff ff          callq  400690 <perror@plt>
  400841:       bf ff ff ff ff          mov    $0xffffffff,%edi
  400846:       e8 65 fe ff ff          callq  4006b0 <exit@plt>
  40084b:       eb 16                   jmp    400863 <main+0xad>
[...]  
Let us confirm whether the addresses are indeed pointing to what I claim they are pointing to. In the same dump file:
[...]
  400a48:       46 75 6c                rex.RX jne 400ab7 <_IO_stdin_used+0x77>
  400a4b:       6c                      insb   (%dx),%es:(%rdi)
  400a4c:       20 4e 61                and    %cl,0x61(%rsi)
  400a4f:       6d                      insl   (%dx),%es:(%rdi)
  400a50:       65 3a 20                cmp    %gs:(%rax),%ah
[...]
  400a59:       00 52 6f                add    %dl,0x6f(%rdx)
  400a5c:       6c                      insb   (%dx),%es:(%rdi)
  400a5d:       6c                      insb   (%dx),%es:(%rdi)
  400a5e:       20 4e 6f                and    %cl,0x6f(%rsi)
  400a61:       2e 3a 00                cmp    %cs:(%rax),%al
[...]
  400a64:       72 00                   jb     400a66 <_IO_stdin_used+0x26>
[...]
  400a66:       2f                      (bad)  
  400a67:       70 72                   jo     400adb <_IO_stdin_used+0x9b>
  400a69:       6f                      outsl  %ds:(%rsi),(%dx)
  400a6a:       63 2f                   movslq (%rdi),%ebp
  400a6c:       76 65                   jbe    400ad3 <_IO_stdin_used+0x93>
  400a6e:       72 73                   jb     400ae3 <_IO_stdin_used+0xa3>
  400a70:       69 6f 6e 00 00 00 00    imul   $0x0,0x6e(%rdi),%ebp
[...]
➜  /home/wani/Something  python
Python 2.7.3 (default, Mar 13 2014, 11:03:55) 
[GCC 4.7.2] on linux2
Type "help", "copyright", "credits" or "license" for more information.
>>> hex2chr = lambda(x): chr(int('0x'+str(x), 16))
>>> f = lambda(x): map(hex2chr, x)
>>> ''.join(f(['46', '75', '6c', '6c', '20', '4e', '61', '6d', '65', '3a']))
'Full Name:'
>>> ''.join(f(['52', '6f', '6c', '6c', '20', '4e', '6f', '2e', '3a']))
'Roll No.:'
>>> ''.join(f(['72']))
'r'
>>> ''.join(f(['2f', 70, '72', '6f', '63', '2f', '76', '65', '72', '73', '69', '6f', '6e']))
'/proc/version'
>>> 
So it seems like the program only needs to capture the output of the file /proc/version, get your roll number and name, do some vodoo-magic with it, print out another weird binary.
It is time to start cheating. Google for the output of the file /proc/version for Archlinux and save it to a file, say in /aroc/version. Now, fire gdb
➜  /home/wani/Something  gdb eval
GNU gdb (GDB) 7.4.1-debian
Copyright (C) 2012 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later 
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.  Type "show copying"
and "show warranty" for details.
This GDB was configured as "x86_64-linux-gnu".
For bug reporting instructions, please see:
...
Reading symbols from /home/wani/Something/eval...(no debugging symbols found)...done.
(gdb) break main
Breakpoint 1 at 0x4007ba
(gdb) r
Starting program: /home/wani/Something/eval 

Breakpoint 1, 0x00000000004007ba in main ()
(gdb) info files
Symbols from "/home/wani/Something/eval".
Unix child process:
 Using the running image of child process 11358.
 While running this, GDB does not access memory from...
Local exec file:
 `/home/wani/Something/eval', file type elf64-x86-64.
 Entry point: 0x4006c0
 0x0000000000400200 - 0x000000000040021c is .interp
 0x000000000040021c - 0x000000000040023c is .note.ABI-tag
 0x000000000040023c - 0x0000000000400260 is .note.gnu.build-id
 0x0000000000400260 - 0x000000000040027c is .gnu.hash
 0x0000000000400280 - 0x00000000004003b8 is .dynsym
 0x00000000004003b8 - 0x000000000040043f is .dynstr
 0x0000000000400440 - 0x000000000040045a is .gnu.version
 0x0000000000400460 - 0x0000000000400490 is .gnu.version_r
 0x0000000000400490 - 0x00000000004004a8 is .rela.dyn
 0x00000000004004a8 - 0x00000000004005c8 is .rela.plt
 0x00000000004005c8 - 0x00000000004005e2 is .init
 0x00000000004005f0 - 0x00000000004006c0 is .plt
 0x00000000004006c0 - 0x0000000000400a32 is .text
 0x0000000000400a34 - 0x0000000000400a3d is .fini
 0x0000000000400a40 - 0x0000000000400aa2 is .rodata #readonly stuff is present here. (not exactly readonly for gdb though)
 0x0000000000400aa4 - 0x0000000000400ad8 is .eh_frame_hdr
 0x0000000000400ad8 - 0x0000000000400bcc is .eh_frame
[...]
(gdb) find /b 0x0000000000400a40, 0x0000000000400aa2, 'p', 'r', 'o', 'c'
0x400a67
1 pattern found.
(gdb) p (char *)0x400a67
$1 = 0x400a67 "proc/version"
(gdb) set {char}0x400a67 = 'a'
(gdb) p (char *)0x400a67
$2 = 0x400a67 "aroc/version"
(gdb) c
Continuing.
Full Name: Nehal J Wani
Roll No.:201125005
[Inferior 1 (process 11358) exited normally]
(gdb) quit
➜  /home/wani/Something
So, essentially, we have changed the binary by pausing it at runtime, changing the value of a char in the memory and fooling the program to read from /aroc/version instead of /proc/version. Since the content will be same, the voodoo-magic will also print the same output in the resulting weird binary. See, I didn't even install ArchLinux and still got full marks :P Now, for quenching the curiosity for knowing the vodoo-thingy, let us analyze the rest of the binary.
[...]
  40084d:       8b 45 fc                mov    -0x4(%rbp),%eax                  #Looping stuff through file 
  400850:       8d 50 01                lea    0x1(%rax),%edx
  400853:       89 55 fc                mov    %edx,-0x4(%rbp)
  400856:       48 98                   cltq   
  400858:       0f b6 55 ef             movzbl -0x11(%rbp),%edx
  40085c:       88 94 05 30 f4 ff ff    mov    %dl,-0xbd0(%rbp,%rax,1)          #Store contents of entire file to $rbp-0xbd0
  400863:       48 8b 45 f0             mov    -0x10(%rbp),%rax
  400867:       48 89 c7                mov    %rax,%rdi
  40086a:       e8 c1 fd ff ff          callq  400630 <fgetc@plt>
  40086f:       88 45 ef                mov    %al,-0x11(%rbp)
  400872:       80 7d ef ff             cmpb   $0xff,-0x11(%rbp)                #EOF check
  400876:       75 d5                   jne    40084d <main+0x97>               #jump, baby, jump! 
  400878:       83 6d fc 01             subl   $0x1,-0x4(%rbp)
  40087c:       8b 45 fc                mov    -0x4(%rbp),%eax
  40087f:       48 98                   cltq   
  400881:       c6 84 05 30 f4 ff ff    movb   $0x0,-0xbd0(%rbp,%rax,1)
  400888:       00   
  400889:       48 8b 45 f0             mov    -0x10(%rbp),%rax
  40088d:       48 89 c7                mov    %rax,%rdi
  400890:       e8 6b fd ff ff          callq  400600 <fclose@plt>
  400895:       48 8d 85 10 f4 ff ff    lea    -0xbf0(%rbp),%rax                #My Roll Number is stored at $rbp-0xbf0
  40089c:       be 97 0a 40 00          mov    $0x400a97,%esi                   #0x400a97 holds the value "w"
  4008a1:       48 89 c7                mov    %rax,%rdi
  4008a4:       e8 d7 fd ff ff          callq  400680 <fopen@plt>               #Open file by the name: $myrollnumber
  4008a9:       48 89 45 f0             mov    %rax,-0x10(%rbp)
  4008ad:       48 8d 85 00 fc ff ff    lea    -0x400(%rbp),%rax                #My Name is stored at $rbp-0x400
  4008b4:       48 89 c7                mov    %rax,%rdi
  4008b7:       e8 54 fd ff ff          callq  400610 <strlen@plt>
  4008bc:       89 45 e8                mov    %eax,-0x18(%rbp)                 #$rbp-0x18 stores the length of my name 
  4008bf:       c7 45 fc 00 00 00 00    movl   $0x0,-0x4(%rbp)                  #Loop counter
  4008c6:       eb 21                   jmp    4008e9 <main+0x133>              #Start of loop 
  4008c8:       8b 45 fc                mov    -0x4(%rbp),%eax                  #Loop counter
  4008cb:       48 98                   cltq   
  4008cd:       0f b6 84 05 00 fc ff    movzbl -0x400(%rbp,%rax,1),%eax
  4008d4:       ff   
  4008d5:       f7 d0                   not    %eax 
  4008d7:       89 c2                   mov    %eax,%edx                        #name[i] = ~name[i]
  4008d9:       8b 45 fc                mov    -0x4(%rbp),%eax
  4008dc:       48 98                   cltq   
  4008de:       88 94 05 00 fc ff ff    mov    %dl,-0x400(%rbp,%rax,1)
  4008e5:       83 45 fc 01             addl   $0x1,-0x4(%rbp)                  #Increment counter
  4008e9:       8b 45 fc                mov    -0x4(%rbp),%eax
  4008ec:       3b 45 e8                cmp    -0x18(%rbp),%eax
  4008ef:       7c d7                   jl     4008c8 <main+0x112>              #Jump, baby, jump! 
  4008f1:       48 8d 85 10 f4 ff ff    lea    -0xbf0(%rbp),%rax
  4008f8:       48 89 c7                mov    %rax,%rdi
  4008fb:       e8 10 fd ff ff          callq  400610 <strlen@plt>              #Length of $myrollnumber
  400900:       89 45 e8                mov    %eax,-0x18(%rbp)
  400903:       c7 45 fc 00 00 00 00    movl   $0x0,-0x4(%rbp)                  #Initialize loop counter to 0 
  40090a:       eb 21                   jmp    40092d <main+0x177>
  40090c:       8b 45 fc                mov    -0x4(%rbp),%eax
  40090f:       48 98                   cltq   
  400911:       0f b6 84 05 10 f4 ff    movzbl -0xbf0(%rbp,%rax,1),%eax
  400918:       ff   
  400919:       f7 d0                   not    %eax                             #roll[i] = ~roll
  40091b:       89 c2                   mov    %eax,%edx
  40091d:       8b 45 fc                mov    -0x4(%rbp),%eax
  400920:       48 98                   cltq   
  400922:       88 94 05 10 f4 ff ff    mov    %dl,-0xbf0(%rbp,%rax,1)          #Store modified contents
  400929:       83 45 fc 01             addl   $0x1,-0x4(%rbp)
  40092d:       8b 45 fc                mov    -0x4(%rbp),%eax
  400930:       3b 45 e8                cmp    -0x18(%rbp),%eax
  400933:       7c d7                   jl     40090c <main+0x156>              #Jump, baby, jump! 
  400935:       48 8d 85 30 f4 ff ff    lea    -0xbd0(%rbp),%rax
  40093c:       48 89 c7                mov    %rax,%rdi
  40093f:       e8 cc fc ff ff          callq  400610 <strlen@plt>              #Length of entire file /proc/version
  400944:       89 45 e8                mov    %eax,-0x18(%rbp)
  400947:       c7 45 fc 00 00 00 00    movl   $0x0,-0x4(%rbp)                  #Initialize loop counter to 0 
  40094e:       eb 21                   jmp    400971 <main+0x1bb>
  400950:       8b 45 fc                mov    -0x4(%rbp),%eax
  400953:       48 98                   cltq   
  400955:       0f b6 84 05 30 f4 ff    movzbl -0xbd0(%rbp,%rax,1),%eax
  40095c:       ff   
  40095d:       f7 d0                   not    %eax                             #file[i] = ~file[i]
  40095f:       89 c2                   mov    %eax,%edx
  400961:       8b 45 fc                mov    -0x4(%rbp),%eax
  400964:       48 98                   cltq   
  400966:       88 94 05 30 f4 ff ff    mov    %dl,-0xbd0(%rbp,%rax,1)          #Store modified contents
  40096d:       83 45 fc 01             addl   $0x1,-0x4(%rbp)
  400971:       8b 45 fc                mov    -0x4(%rbp),%eax
  400974:       3b 45 e8                cmp    -0x18(%rbp),%eax
  400977:       7c d7                   jl     400950 <main+0x19a>              #Jump, baby, jump! 
  400979:       48 8d b5 30 f4 ff ff    lea    -0xbd0(%rbp),%rsi
  400980:       48 8d 8d 10 f4 ff ff    lea    -0xbf0(%rbp),%rcx
  400987:       48 8d 95 00 fc ff ff    lea    -0x400(%rbp),%rdx
  40098e:       48 8b 45 f0             mov    -0x10(%rbp),%rax
  400992:       49 89 f0                mov    %rsi,%r8
  400995:       be 99 0a 40 00          mov    $0x400a99,%esi                   #Format specifier: "%s\n%s\n%s"
  40099a:       48 89 c7                mov    %rax,%rdi
  40099d:       b8 00 00 00 00          mov    $0x0,%eax
  4009a2:       e8 b9 fc ff ff          callq  400660 <fprintf@plt>             #Print all three modified strings to file $myrollnumber
  4009a7:       48 8b 45 f0             mov    -0x10(%rbp),%rax
  4009ab:       48 89 c7                mov    %rax,%rdi
  4009ae:       e8 4d fc ff ff          callq  400600 <fclose@plt>
  4009b3:       b8 00 00 00 00          mov    $0x0,%eax
  4009b8:       c9                      leaveq 
  4009b9:       c3                      retq   
[...]
So, now the equivalent C program can be easily written.
#include <stdio.h>
#include <string.h>

int main() {
   FILE *fp;
   int c;
   int i = 0;
   char file[1024];
   char roll[10];
   char name[30];
   printf("%s ", "Full Name:");
   scanf("%[^\n]", name);
   getchar();
   printf("%s ", "Roll No.:");
   scanf("%[^\n]", roll);

   fp = fopen("/proc/version","r");
   if (fp == NULL) {
      perror("Error while opening the file.\n");
      return(-1);
   }
   do {
      file[i++] = fgetc(fp);
      if (feof(fp)) {
          break ;
      }
   } while(1);

   fclose(fp);

   fp = fopen(roll, "w");

   for (i = 0; i < strlen(name); i++)
       name[i] = ~name[i];

   for (i = 0; i < strlen(roll); i++)
       roll[i] = ~roll[i];

   for (i = 0; i < strlen(file); i++)
       file[i] = ~file[i];

   fprintf(fp, "%s\n%s\n%s", name, roll, file);
   fclose(fp);

   return(0);
}