做了下空指针公开赛的逆向题,做到三分之一左右遇到知识盲区就做不下去了,我太菜了。。。后面看了下官方wp才解决了知识盲区。。。
一打开套了个UPX壳,直接脱掉。
主逻辑在sub_123413B0
稍微看一下就能发现一些花指令,模式是
1 2 3 4 5 6 call $+9 db jmp $+9 db add dword ptr [esp], 1 retn 
还有
1 2 3 4 5 jmp $+5 db retn db call $-2 
这部分的去花脚本如下
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 def  next_instr (addr ):    return  addr+ItemSize(addr) def  patch_nop (begin,end ):    while (end>begin):         PatchByte(begin,0x90 )         begin=begin+1      MakeCode(begin) def  undefine (addr, end ):    while  addr<end:         MakeUnkn(addr, 0 )         addr += 1  def  dejunk (addr, end ):    while  addr < end:         MakeCode(addr)         next  = next_instr(addr)         if  GetMnem(addr) == 'call' :             if  GetOperandValue(addr, 0 ) == addr + 9 :                 print ("call jmp ret: %08x" %addr)                 next  = addr + 15                  patch_nop(addr, next )                 addr = next                  continue              elif  GetOperandValue(addr, 0 ) == addr + 6  and  GetDisasm(addr + 6 ) == "add     esp, 4" :                 print ("call add esp: %08x" %addr)                 next  = addr + 9                  patch_nop(addr, next )                 addr = next                  continue          if  GetMnem(addr) == 'jmp'  and  GetOperandValue(addr, 0 ) == addr + 5  and  Byte(addr+3 ) == 0xC3 :             print ('jmp ret call: %08x' %addr)             next  = addr + 10              patch_nop(addr, next )             addr = next              continue          addr = next  dejunk(0x123413B0 , 0x1234165D ) undefine(0x123413B0 , 0x1234165D ) MakeCode(0x123413B0 ) 
去除后:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 int  sub_123413B0 () {   int  v0;    char  *input;    show(0 );   input = (char  *)malloc (strlen ("npointer{" ) + strlen ("}" ) + 33 );   printf ("Input the correct keys: " );   scanf ("%s" , input);   if  ( !memcmp (input, "npointer{" , strlen ("npointer{" ))     && input[strlen (input) - 1 ] == asc_12343738[0 ]     && (input[strlen (input) - 1 ] = 0 , check((int )&code, 0x30D3 u), v0) )   {     show(1u );     printf ("Congrats!\n" );   }   else    {     printf ("Sorry, the gate remains closed.\n" );   }   return  system("pause" ); } 
逻辑很简单,主要看check函数
check依然加了花,结尾多了一种花指令
去除后的check函数:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 int  __cdecl check (int  code, SIZE_T size, int  flag) {   HANDLE v4;    int  v5;    LPVOID Dst;    Dst = VirtualAlloc(0 , size, 0x3000 u, 0x40 u);   if  ( !Dst )     return  0 ;   memcpy (Dst, (const  void  *)code, size);   v4 = GetCurrentProcess();   FlushInstructionCache(v4, Dst, size);   check_env();   v5 = ((int  (__fastcall *)(_DWORD, int ))Dst)(0 , flag);   VirtualFree(Dst, 0 , 0x8000 u);   return  v5; } 
分配内存,拷贝code。简单看下check_env
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 signed  int  anti_debug () {   unsigned  int  i;    char  *PBuffer;    size_t  PBufferSizeInBytes;    char  VarName[4 ];    int  v5;    __int16 v6;    char  v7;    *(_DWORD *)VarName = 0 ;   v5 = 0 ;   v6 = 0 ;   v7 = 0 ;   for  ( i = 0 ; i < 0xB ; ++i )     VarName[i] = ~byte_1234686C[i];   if  ( dupenv_s(&PBuffer, &PBufferSizeInBytes, VarName) )     return  0 ;   if  ( !PBuffer )     return  0 ;   free (PBuffer);   return  1 ; } 
这里的1234686c取反出来是WINDBG_DIR,一般反调试不会只加个windbg进黑名单(因为IDA和x86dbg用的比较多,至少我是这样的。。。),而且这种反调试非常明显。其实这里算是一个hint吧
所以最重要的部分在code里面,也就是12343798
在跳转到这里之前,还给一些寄存器赋了值:
1 2 3 4 # ebx: length of input # ecx: idx, initialize to zero # edx: input # edi: base_addr 
分析一下12343798开始的代码:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 .rdata:12343798                 cmp     ecx, ebx .rdata:1234379A                 jge     short loc_123437A3 .rdata:1234379C                 mov     al, [edx+ecx] .rdata:1234379F                 cmp     al, 66h .rdata:123437A1                 jz      short loc_123437A6 .rdata:123437A3 .rdata:123437A3 loc_123437A3:                           ; CODE XREF: .rdata:1234379A↑j .rdata:123437A3                 xor     eax, eax .rdata:123437A5                 retn .rdata:123437A6 ; ----------------------------------------------------------------- .rdata:123437A6 .rdata:123437A6 loc_123437A6:                           ; CODE XREF: .rdata:123437A1↑j .rdata:123437A6                 push    75h .rdata:123437A8                 xor     dword ptr [esp], 46h .rdata:123437AC                 call    loc_123437B2 .rdata:123437AC ; ----------------------------------------------------------------- .rdata:123437B1                 db  8Eh .rdata:123437B2 ; ----------------------------------------------------------------- .rdata:123437B2 .rdata:123437B2 loc_123437B2:                           ; CODE XREF: .rdata:123437AC↑j .rdata:123437B2                 mov     dword ptr [esp], 49Eh .rdata:123437B9                 add     [esp], edi .rdata:123437BC                 inc     ecx .rdata:123437BD                 retf 
首先对比第一位需要为f,接下来的代码比较关键:
1 2 3 4 5 6 7 8 push	75h xor     dword ptr [esp], 46h db call 	$+6 mov 	dword ptr [esp], 49Eh add		[esp], edi inc		ecx retf 
这里其实也是花指令,稍作分析这段代码等价于:
1 2 3 4 5 push	33h push 	49Eh		; 下面这两句相当于push (edi+49eh) add		[esp], edi	 inc		ecx			 retf 
自认为我的汇编基本功还是可以的,最早是看的王爽的《汇编语言》这本书,从8086学过来,所以还认得retf这条指令。这时在16位以下才用得到的,它相当于:
也就是在16位汇编下跳转到cs:ip地址处。
然而这里是32位,据我所知32位以上段寄存器不再被用来指向段了。那么被用来做什么呢,这我倒是没想过。然后就因为这个知识盲区卡住做不下去了QAQ
强行往下看,下面的代码十分的奇怪,基本看不出什么意义
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 .rdata:123437BE                 dec     eax .rdata:123437BF                 cmp     ecx, ebx .rdata:123437C1                 jge     short loc_123437D6 .rdata:123437C3                 mov     al, [si] .rdata:123437C6                 or      bh, [ebx+esi] .rdata:123437C9                 jz      short loc_12343823 .rdata:123437CB                 call    near ptr sub_123437D2 .rdata:123437D0                 jno     short near ptr loc_123437B9+1 .rdata:123437D2 .rdata:123437D2 ; =============== S U B R O U T I N E ======================================= .rdata:123437D2 .rdata:123437D2 .rdata:123437D2 sub_123437D2    proc far                ; CODE XREF: .rdata:123437CB↑p .rdata:123437D2 .rdata:123437D2 var_8           = dword ptr -8 .rdata:123437D2 arg_0           = dword ptr  8 .rdata:123437D2 .rdata:123437D2                 dec     eax .rdata:123437D3                 add     esp, 8 .rdata:123437D6 .rdata:123437D6 loc_123437D6:                           ; CODE XREF: .rdata:123437C1↑j .rdata:123437D6                 dec     eax .rdata:123437D7                 xor     eax, eax .rdata:123437D9                 call    loc_123437DF 
没办法,最后尝试调试。用x86dbg调试到前面都没问题,retf后发现先断到ntdll中,然后莫名来到0x20511这里了(这里VirtualAlloc分配的基地址是0x20000,即源代码中的12343CA9,又是莫名其妙。
我一度认为这里存在代码自修改,因此找了挺久有没有漏掉的地方,然后一无所获就放弃了
等到官方wp放出来才恍然大悟。
其实如果做题的时候搜索一下关键字cs
0x33就会有所收获,可能做题不在状态,或者还是菜。。。
 
原来cs 0x33的作用是把windows进程从32位切换为64位。
我们知道64位windows运行32位程序是在WoW64上的(这是Windows 32-bit on
Windows
64-bit的缩写,所以WoW64才是32位)。WoW64既然是Windows的子系统,就应该可以在WoW64上运行64位的代码。cs
0x33就是干这个的。运行32位代码的时候是cs 0x23,64位则是cs
0x33,只要改变cs为33就能够实现切换,本题中这段代码就是干这个事的。
懂了这点后就能继续往下做了。那么根据逻辑接下来来到base+0x49e处。
这里就很蛋疼了,由于我们使用ida32打开这个程序的,所以暂时没办法在ida中直接看64位代码。我们借助capstone这个逆向神器,直接在ida的python中看看这些代码:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 from  capstone import  *md_64 = Cs(CS_ARCH_X86, CS_MODE_64) md_32 = Cs(CS_ARCH_X86, CS_MODE_32) def  get_string (addr, len  ):    res = ""      for  i in  range (len ):         res += chr (Byte(addr + i))     return  res      def  d64 (addr, len =20  ):    global  md_64     print ("========================================================" )     base = 0      code_x64 = get_string(addr, 0x200 )     j = 0      for  i in  md_64.disasm(code_x64, addr):         if  j >= len :             break          j += 1          print ("0x%x:\t%s\t%s"  %(i.address, i.mnemonic, i.op_str))     print ("========================================================" )      def  d32 (addr, len =20  ):    global  md_32     print ("========================================================" )     code_x86 = get_string(addr, 0x200 )     j = 0      for  i in  md_32.disasm(code_x86, addr):         if  j >= len :             break          j += 1          print ("0x%x:\t%s\t%s"  %(i.address, i.mnemonic, i.op_str))     print ("========================================================" ) 
然后就能凑活着反编译64位代码看了。
更简单的办法就是,把code复制进新的文件,赋值两份,一份用ida打开,一份用ida64打开,对比着看。这样我们就能用ida的很多操作了。
然后顺便去一下花,简单分析花的特点都是向后call
1-3字节然后,我们只需要把中间的垃圾值nop掉就行了:
复制到新的文件就不用再计算base了,也舒服了一些。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 with  open ("gatesXgame_d.exe" , "rb" ) as  f:    raw = f.read() code = raw[0x2198 :0x2198 +0x30d3 ] i = 0  while  i<len (code)-5 :    if  code[i:i+5 ] == b"\xe8\x01\x00\x00\x00" :         code = code[:i+5 ]+b"\x90"  + code[i+6 :]         i+=6      elif  code[i:i+5 ] == b"\xe8\x02\x00\x00\x00" :         code = code[:i+5 ]+b"\x90" *2  + code[i+7 :]         i+=7      elif  code[i:i+5 ] == b"\xe8\x03\x00\x00\x00" :         code = code[:i+5 ]+b"\x90" *3  + code[i+8 :]         i+=8      else :         i+=1  with  open ("code" , "wb" ) as  f:    f.write(code) 
接下来可以分析代码了。首先地址0的32位代码,判断第一个为f,然后切换到64位,跳到49e,根据第二位为0或4,跳到下一个32位代码块,然后又。。。
每个代码块都会进行一个以上的判断,然后调到下一个代码块(都不对会返回0)。每个代码块之间的切换必定 会切一次换位数。最终要去到返回1的地方。搜一下retn这条指令,在30d0(其实就是最后啦)。
很显然这是个图算法。代码量很大,所以需要写脚本提取出图的内容。由于每次节点的切换都会切换位数,所以大大降低了写脚本的难度。我们只需要根据一些特征提取数据就行了。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 from  capstone import  *from  struct import  *md_64 = Cs(CS_ARCH_X86, CS_MODE_64) md_32 = Cs(CS_ARCH_X86, CS_MODE_32) nodes = {} with  open ("code" , "rb" ) as  f:    code = f.read() def  dis32 (addr ):    global  code, md_32     tmp = code[addr:addr + 10 ]     return  md_32.disasm(tmp, addr).__next__() def  dis64 (addr ):    global  code, md_64     tmp = code[addr:addr + 10 ]     return  md_64.disasm(tmp, addr).__next__() def  exp32 (addr ):    now = dis32(addr)     assert  now.bytes  == b'9\xd9'      addr += now.size     now = dis32(addr)     ret_addr = int (now.op_str, 16 )     ret = dis32(ret_addr)     assert  ret.bytes  == b'1\xc0'           addr += now.size                         now = dis32(addr)     assert  now.bytes  == b'\x8a\x04\n'       node = []     addr += now.size     while  True :         if  addr == ret_addr:             break          now = dis32(addr)         assert  now.mnemonic == 'cmp'          next_ch = chr (int (now.op_str[4 :], 16 ))                  addr += now.size         now = dis32(addr)         jz_addr = int (now.op_str, 16 )         next_addr = unpack("<I" , code[jz_addr+15 :jz_addr+15 +4 ])[0 ]         next  = [next_addr, next_ch]         node.append(next )         while  True :             addr += now.size             now = dis32(addr)             if  now.mnemonic == 'cmp'  or  addr == ret_addr:                 break      return  node def  exp64 (addr ):    now = dis64(addr)     assert  now.bytes  == b'H9\xd9'      addr += now.size     now = dis64(addr)     ret_addr = int (now.op_str, 16 )     ret = dis64(ret_addr)     assert  ret.bytes  == b'H1\xc0'           addr += now.size                         now = dis64(addr)     assert  now.bytes  == b'g\x8a\x04\n'      node = []     addr += now.size     while  True :         if  addr == ret_addr:             break          now = dis64(addr)         assert  now.mnemonic == 'cmp'          next_ch = chr (int (now.op_str[4 :], 16 ))                  addr += now.size         now = dis64(addr)                  jz_addr = int (now.op_str, 16 )         next_addr = unpack("<I" , code[jz_addr+21 :jz_addr+21 +4 ])[0 ]         next  = [next_addr, next_ch]         node.append(next )         while  True :             addr += now.size             now = dis64(addr)             if  now.mnemonic == 'cmp'  or  addr == ret_addr:                 break      return  node def  show (addr, node ):    print ("0x%04x ->" %addr, end=' ' )     for  i in  node:         print ('\'' +i[1 ]+'\'' , "0x%04x" %i[0 ], end=', ' )     print ()      def  get_graph (addr, switch ):    if  addr == 0x30d0 :         return      global  nodes     if  switch == 0 :         node = exp32(addr)     else :         node = exp64(addr)     switch ^= 1      nodes[addr] = node     show(addr, node)          for  i in  node:         if  i[0 ] not  in  nodes.keys():             get_graph(i[0 ], switch) get_graph(0 , 0 ) 
然后我们拿到了所有100个节点,以及每个节点的“指针”
接下来就是图搜索算法了,需要找到一条从0到0x30d0的路径。观察一下图里存在环,所以肯定是要找最短路径。
1 2 3 4 5 6 7 8 9 10 11 def  get_path (addr, flag, path ):    if  addr == 0x30d0 :         print ("found!" )         print ("npointer{" +flag+"}" )         return      node = nodes[addr]     for  i in  node:         if  i[0 ] not  in  path:             get_path(i[0 ], flag+i[1 ], path+[i[0 ]]) get_path(0 , "" , []) 
一度被图搜索代码困扰了挺久,后面想通了其实很简单。。。