#/* # Christian Bennett # # Purpose: # For use in a Make system when generating the at&t assembly syntax file (for gnu software) from some the # Intel standard assembly syntax file. # # # Expandability: # This program is ready to be hooked into a make system. The design for this script # is based on the assumption there is just an assembly file with no distinct header or # footer information. This was not designed to parse through a C++ file and find the inlined # assembly. Quite simply it takes an Intel asm instruction and outputs the AT&T version. If # there is a line it does not recognize, it outputs it as is. # So if there is distinct headers and footers per compiler you may have to modify the linux # output slightly, or modify this code slightly. # # Use: # The IO in this file is done through standard in and standard out. READ: cat the input # and redirect the output to a file. Or do whatever means necessary. So for testing I did something # like: cat sampleInput.asm perl assembly.pl > linuxTranslation.pl # # # KEY: in comments: ws = whitespcae, pws = possible white space (read: there may be ws here). # Here is a hash for identifying registers. Querying the hash will return TRUE if, the query contains a # register. Else will return null. If there are additional registers that may be used, add them in a # similar fasion. #*/ %registers = ( "eax" => 1, "ebx" => 1, "ecx" => 1, "edx" => 1, "esi" => 1, "edi" => 1, "eip" => 1, "esp" => 1, "ebp" => 1, "efl" => 1, ); LINE: while ($line = <>) { if($line eq "\n"){ #goto next line if endline print "\n"; next LINE; } #/*----------------Interpretation of instructions-------------- ### The Different Cases Handled # # Each case takes a string. It then parses the string and grabs arguments and parameters. # It then formats the instruction and args for AT&T assembly syntax. # # Formating preserves c++ style comments. # # 1) instruction = insn reg, int # This case grabs the three words, and makes sure the second is a reg, and the third a integer # # 2) insn reg, reg2 # This case grabs the three words, and check if the 2nd and 3rd parameters are valid registers. # # 3) insn reg, [reg + offet] (where offset is an integer) # This case grabs 5 items: inst, reg, reg, sign, offset. # # 4) insn int # This case grabs the instruction, then grabs an int. # # 5) insn location (e.g. call FOO) # insn reg # # 6) inverse of case 3: insn [reg + offset], reg # # 7) insn reg, [reg2 + FOO * 4] //note FOO may be a register. # # 8) insn [reg + FOO*4], reg2 # # 9) insn reg, [reg2] # # 10) insn [reg], reg2 # # 11) insn reg, [reg2 + reg3] # # 12) insn [reg + reg2], reg3 # # 13) insn reg, [reg2 + reg3 + offset] # # 14) insn [reg + reg2 + offset], reg4 # # 15) insn reg, [reg2 + reg3*offset + someintOffset] # # 16) insn [reg1 + reg2*offset + someintOffset], reg3 # # # #*/ if($line =~ m@(^//.*)@){ #if a line is a comment, print $1, "\n"; #send to the output stream. next LINE; } #/* CASE 1 -- insn reg, int --> insnl %reg, int # This statment is a regexp looking for: a word, whitespace, alphaNum chars, possible space # an integer (digit), possible whitespace, possible comment. # It then to makes sure the second word it absorbed was a register by using the hash feature. # If entered, 'next' gotos the next iteration of the while loop. #*/ if($line =~ m@(\w+)\s+([a-zA-Z]+),\s*(\d+)\s*(/*.*)@ && $registers{$2}){ print $1, "l %", $2, ", ", $3; print " ", $4, "\n"; next LINE; } #/* CASE 2 -- insn reg, reg2 --> insnl %reg2, %reg # This statment is a regexp looking for: a word, whitespace, a word, possible space # alphanum chars, possible whitespace, possible comments. # It then to makes sure the second and third words absorbed were registers by # using the hash feature. # If entered, 'next' gotos the next iteration of the while loop. #*/ if(($line =~ m@(\w+)\s+(\w+),\s*([a-zA-Z]+)\s*(/*.*)@) && $registers{$2} && $registers{$3}){ print $1, "l %", $3, ", %", $2; print " ", $4, "\n"; next LINE; } #/* CASE 3-- insn reg, [reg2+offset] --> insnl offset(%reg2), %reg # This statment is a regexp looking for: a word, whitespace, alphanum chars, possible space # [ char, alphanum chars, possible whitespace, + or -, possible whitespace, ] char, # possible comments. It then to makes sure the second and third words absorbed were # actually registers by using the hash feature. # If entered, 'next' gotos the next iteration of the while loop. #*/ if($line =~ m@(\w+)\s+([a-zA-Z]+),\s*\[([a-zA-Z]+)\s*([\+\-])\s*(\d+)\s*]\s*(/*.*)@ && $registers{$2} && $registers{$3}){ if($4 eq "\+"){ print $1, "l ", $5, "(%", $3, "), %", $2; #if + } else{ print $1, "l -", $5, "(%", $3, "), %", $2; #if - } print " ", $6, "\n"; next LINE; } #/* CASE 4 -- insn int --> insnl $int, or ret int # This statment is a regexp looking for: a word, whitespace, digits, possible space # possible comments. It then to makes sure the second word absorbed was # actually a register by using the hash feature. # Also makes sure there are no '['s in the string. # If entered, 'next' gotos the next iteration of the while loop. #*/ if($line =~ m@(\w+)\s+(\d+)\s*(/*.*)@ && !$registers{$2} && !($line =~ /\[/)){ $temp = $1; if($1 eq "ret"){ print $temp, " \$", $2; } else{ print $temp, "l \$", $2; } print " ", $3, "\n"; next LINE; } #/* CASE 5 insn LOCATION or insn reg --> either insn *%reg or insnl %LOCATION # This statment is a regexp looking for: a word, whitespace, alphanum chars, possible space # possible comments. It tests to exclude strings with '[' in them. # If entered, 'next' gotos the next iteration of the while loop. #*/ if($line =~ m@(\w+)\s+([a-zA-Z]+)\s*(/*.*)@ && !($line =~ /\[/)){ $temp = $1; $temp2 = $2; if((($registers{$2}) && (($temp eq "call") || ($temp eq "jmp")))){ print $temp, " *\%", $temp2; } elsif($registers{$2}){ print $temp, "l \%", $temp2; } else{ print $, " ", $2; } print " ", $3, "\n"; next LINE; } #/* CASE 6 insn [reg + offset], reg2 -- > insnl %reg2, offset(%reg) # This statment is a regexp looking for: a word, whitespace, alphanum chars, possible space # + or - char, possible whitespace, possible whitespace, digits, ws,], ws, alphanum chars, ws, # possible comments. It then to makes sure the second and fifth words absorbed were # actually registers by using the hash feature. # If entered, 'next' gotos the next iteration of the while loop. #*/ if($line =~ m@(\w+)\s+\[([a-zA-Z]+)\s*([+-])\s*(\d+)\s*\]\s*,\s*([a-zA-Z]+)\s*(/*.*)@ && $registers{$2} && $registers{$5}){ if($3 eq "\+"){ print $1, "l ", $5, ", ", $4, "(", $2, ")"; #if + } else{ print $1, "l ", $5, ", -", $4, "(", $2, ")"; #if - } print " ", $6, "\n"; next LINE; } # CASE 7 : insn reg, [reg2 + FOO*int] --> insnl %reg2(FOO*int), %reg # E.G. : add eax, [ecx + FOO*4] --> addl %ecx(FOO*4), %eax if($line =~ m@(\w+)\s+([a-zA-Z]+),\s*\[([a-zA-Z]+)\s*([\+\-])\s*([a-zA-Z]+)\*(\d+)\s*]\s*(/*.*)@ && $registers{$2} && $registers{$3}){ if($registers{$5}){ #if FOO is a register if($4 eq "\+"){ print $1, "l %", $3, "(%", $5, "*", $6, "), %", $2; #if + } else{ print $1, "l %", $3, "(-%", $5, "*", $6, "), %", $2; #if - } } else{ if($4 eq "\+"){ print $1, "l %", $3, "(", $5, "*", $6, "), %", $2; } else{ print $1, "l %", $3, "(-", $5, "*", $6, "), %", $2; } } print " ", $7, "\n"; next LINE; } # case 8: insn [reg1 + FOO*int], reg2 --> insnl %reg2, %reg1(%FOO*int) if foo is a reg # --> insnl %reg2, %reg1(FOO*int) if foo is not a register # This code first checks to see if foo is a register. If foo is a register, it switches on whether # or not there was a plus or minus symbol used. The rest is formatting to make the case mentioned above # have the right output. # if($line =~ m@(\w+)\s+\[([a-zA-Z]+)\s*([+-])\s*(\w+)\s*\*\s*(\d+)\s*\]\s*,\s*(\w+.*)\s*(/*.*)@ && $registers{$2} && $registers{$5}){ if($registers{$5}){ #if FOO is a register if($4 eq "\+"){ print $1, "l %", $6, ", %", $2, "(%", $4, "*", $5, ")"; #if + } else{ print $1, "l %", $6, ", %", $2, "(-%", $4, "*", $5, ")"; #if - } } else{ #if FOO is not a register if($4 eq "\+"){ print $1, "l %", $6, ", %", $2, "(", $4, "*", $5, ")"; } else{ print $1, "l %", $6, ", %", $2, "(-", $4, "*", $5, ")"; } } print " ", $7, "\n"; next LINE; } # case 9: insn reg, [reg2] --> insn (%reg2), %reg # This code looks for and write the input and output described above. # The only error checking is the identical matching of the input (with ws as a variant), and # to make sure the args are valid registers. # The rest is formatting to make the case mentioned above have the right output. if($line =~ m@(\w+)\s+(\w+),\s*\[(\w+)\]\s*(/*.*)@ && $registers{$2} && $registers{$3}){ print $1, "l (%", $3, "), %", $2; print " ", $4, "\n"; next LINE; } # Case 10: insn [reg], reg2 --> insn %reg2, (%reg) # Just the reverse of the previous case. See case 9. if($line =~ m@(\w+)\s+\[(\w+)\],\s*(\w+)\s*(/*.*)@ && $registers{$2} && $registers{$3}){ #*/ print $1, "l %", $3, ", (%", $2, ")"; print " ", $4, "\n"; next LINE; } # case 11: insn reg, [reg2 + reg3] --> insnl (%reg2, %reg3), %reg # # This looks for the input matching the description above. If found it then formats it match the output described. # Error checking is in the formatting and making sure appropriate args are valid registers. # If entered, goto LINE to iterate through the next line. if($line =~ m@(\w+)\s+(\w+),\s*\[\s*([a-zA-Z]+)\s*([/+/-])\s*([a-zA-Z]+)\s*\]\s*(/*.*)@ && $registers{$2} #*/ && $registers{$3} && $registers{$5}){ if($4 eq "\+"){ print $1, "l (%", $3, ",%", $5, "), ", $2; } else{ # does subtraction work?? } print " ", $6, "\n"; next LINE; } # case 12: insn [reg + reg2], reg3 (reverse of case 11) --> insnl %reg3, (%reg, %reg2) # See case 11 for details. if($line =~ m@(\w+)\s+\[\s*([a-zA-Z]+)\s*([+-])\s*([a-zA-Z]+)\s*\],\s*([a-zA-Z]+)\s*(/*.*)@){ if($3 eq "\+"){ print $1, "l %", $5, ", (%", $2, ",%", $4, ")"; } print " ", $6, "\n"; next LINE; } # case 13: insn reg, [reg2 + reg3 + int] --> insnl (%reg2, %reg3, int), %reg # The following code takes the above # # if($line =~ m@(\w+)\s+(\w+),\s*\[\s*([a-zA-Z]+)\s*([/+/-])\s*([a-zA-Z]+)\s*\+\s*(\d+)\s*\]\s*(/*.*)@ #*/ && $registers{$2} && $registers{$3} && $registers{$5}){ if($4 eq "\+"){ print $1, "l (%", $3, ",%", $5, ",", $6, "), ", $2; } else{ # does subtraction work?? } print " ", $7, "\n"; next LINE; } # case 14: insn [reg + reg2 + int], reg3 (reverse of case 13) --> insnl %reg3, (%reg, %reg2, int) # # The regular expression parses for (in this order): word, ws, '[', pws, Chars, pws, + or -, # pws, chars, pws, +, pws, int, pws, ']', ',',pws, chars, pws, possible comments # # Grabs and puts items in temp vars $1-$7 respectively: insn, reg, sign, reg, int, reg, comment # if($line =~ m@(\w+)\s+\[\s*([a-zA-Z]+)\s*([+-])\s*([a-zA-Z]+)\s*\+\s*(\d*)\s*\],\s*([a-zA-Z]+)\s*(/*.*)@){ if($3 eq "\+"){ print $1, "l %", $6, ", (%", $2, ",%", $4, ",", $5, ")"; } print " ", $7, "\n"; next LINE; } # case 15: insn reg, [reg2 + reg3*4 + int] --> insn (%reg2,%reg3*4,int), reg # # The regular expression parses for (in this order): word, ws, word, ',', pws, '[' # pws, chars, pws, + or -, pws, chars, pws, '*', int, pws, +, pws, int, pws, ']', pws, # possible comments. # # Grabs and puts items temp vars $1-$8 respectively: insn, reg, reg2, reg3, int, + or -, int, comment # if($line =~ m@(\w+)\s+(\w+),\s*\[\s*([a-zA-Z]+)\s*([/+/-])\s*([a-zA-Z]+)\s*\*\s*(\d+)\s*\+\s*(\d+)\s*\]\s*(/*.*)@ && $registers{$2} && $registers{$3} && $registers{$5}){ if($4 eq "\+"){ print $1, "l ", $6, "(%", $3, ",%", $5, ",", $7, "), %", $2; } else{ # does subtraction work?? } print " ", $8, "\n"; next LINE; } # case 16: insn [reg + reg2*4 + int], reg3 (reverse of case 12) --> insn %reg3, (%reg,%reg2*4,int) # The regular expression parses for (in this order): word, ws, '[', possible ws, chars, poss ws, # + or -, possible ws, chars, pws, '*', int, pws, '+', pws, int, pws, ']', pws, chars, pws # then looks for possible comments. # # Grabs and puts items temp vars $1-$8 respectively: insn, reg, reg2, int, + or -, int, reg3, comment # # if($line =~ m@(\w+)\s+\[\s*([a-zA-Z]+)\s*([+-])\s*([a-zA-Z]+)\s*\*\s*(\d*)\s*\+\s*(\d*)\s*\],\s*([a-zA-Z]+)\s*(/*.*)@ && $registers{$2} && $registers{$4} && $registers{$7}){ if($3 eq "\+"){ print $1, "l %", $7, ", ", $5, "(%", $2, ",%", $4, ",", $6, ")"; } print " ", $8, "\n"; next LINE; } # ELSE just output the line as is... if($line =~ /\s*(.+)/){ print $1, "\n"; } }