diff --git a/.gitignore b/.gitignore index 431d327..6e92c44 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ # build residue /.vs/* /x86/* +/x64/* /obj32/* /obj64/* /bin/* diff --git a/README.md b/README.md index 1f6d66a..842442d 100644 --- a/README.md +++ b/README.md @@ -1,124 +1,132 @@ -## WARNING: THIS CODE IS VERY RAW AND PROBABLY VERY BUGGY! - -## Introduction - -This is the blc (Binary Lifting Contraption) plugin for IDA Pro. It is the Bastard -love child of Ghidra's decompiler with Ida Pro. - -The plugin integrates Ghidra's decompiler code into an Ida plugin an provides a -basic decompiler capability for all platforms support by both Ida and Ghidra. It -provides a basic source code display that attempts to mimic that of the Hex-Rays -decompiler. It has only been written with Ida 7.x in mind. - -## BUILDING: - -On all platforms you should clone blc into your IDA SDK's plugins sub-directory -so that you end up with `/plugins/blc`. This is because the build files -all use relative paths to find necessary IDA header files and link libraries. - -### Build blc for Linux / OS X: - -Use the include Makefile to build the plugin. You may need to adjust the paths -that get searched to find your IDA installation (`/Applications/IDA Pro N.NN` is -assumed on OSX and `/opt/ida-N.NN` is assumed on Linux, were N.NN is derived from -the name of your IDA SDK directory eg `idasdk73` associates with `7.3` and should -match your IDA version number). This is required to successfully link the plugin. - -``` -$ cd /plugins/blc -$ make -``` - -Compiled binaries will end up in `/plugins/blc/bin` - -``` -LINUX - ------------------------------------------- - | ida | ida64 | - ------------------------------------------- -IDA 7.x | | | - plugin | blc.so | blc64.so | - ------------------------------------------- - -OS/X - ------------------------------------------- - | ida | ida64 | - ------------------------------------------- -IDA 7.x | | | | - plugin | blc.dylib | blc64.dylib | - ------------------------------------------- -``` - -Copy the plugin(s) into your `/plugins` directory and blc should be -listed as an available plugin for all architectures supported both Ida -and Ghidra. - -### Build blc for Windows - -Build with Visual Studio C++ 2017 or later using the included solution (`.sln`) -file (`blc.sln`). Two build targets are available depending on which version -of IDA you are using: - -``` - ----------------------------------------- - | ida | ida64 | - ----------------------------------------- -IDA 7.x | Release/x64 | Release64/x64 | - plugin | blc.dll | blc64.dll | - ----------------------------------------- -``` - -Copy the plugin(s) into your `/plugins` directory and blc should be -listed as an available plugin for all architectures supported by both Ida -and Ghidra. - -## INSTALLATION - -Assuming you have installed IDA to ``, install the plugin by copying the -compiled binaries from `/plugins/blc/bin` to `/plugins` (Linux/Windows) -or `/idabin/plugins` (OS X). - -The plugin is dependent on Ghira processor specifications which you will need to -copy over from your own Ghidra installation. Installing Ghidra is a simple matter -of unzipping the latest Ghidra release, for example: https://ghidra-sre.org/ghidra_9.1_PUBLIC_20191023.zip -Within the extracted Ghidra folder, you will find a `Ghidra` subdirectory which, -in turn, contains a `Processors` subdirectory. The decompiler needs access to -files contained under `Ghidra/Processors`. By default the plugin looks for the -environment variable `$GHIDRA_DIR` which it expects to point at your Ghidra -installation folder such that `$GHIDRA_DIR/Ghidra/Processors` exists. If -`$GHIDRA_DIR` is not set, then the plugin expects to find `/plugins/Ghidra/Processors` -which you may create with a symlink or by copying the approprate directories -from your Ghidra installation. - -### Pre-built binaries: - -As an alternative to building the plugin yourself, pre-built binaries for -IDA 7.x (Windows, Linux, OS X) are available in the `blc/bins` directory. - -## USING THE PLUGIN - -With the plugin installed, open a binary of interest in IDA. In order for the -plugin to be become available, the binary's architecture must be supported by -both Ida and Ghidra. - -With the cursor placed inside the body of an Ida function, select -`Edit/Plugins/Ghidra Decompiler`. A successful decompilation (which may take a bit -of time, will open a new window containing the C source generated by Ghidra's -decompiler. Within the source window, you may double click on a function name to -decompile tht function. Double clicking on a global data name will navigate you -to that symbol in the Ida disassembly view. The `ESC` key will navigate back to a -previous function, or close the source viewer if there is no previous function. - -The `N` hot key may be used to rename any symbol in the source view. When a symbol -in the source view corresponds to a symbol in the Ida disassembly, the symbol will -also be renamed in the disassembly. - -## POTENTIAL FUTURE WORK - -* Allow user to set data types for symbols in the source view -* Provide IDA derived type information to the decompiler so that it can - do a better job with things like structures and pointer dereferencing -* Better (at least some) support for string literals -* Investigate what settings/info are necessary to get this standalone decompiler - to yield results identical to Ghidra's. Is this symbol information? Type information? +## WARNING: THIS CODE IS VERY RAW AND PROBABLY VERY BUGGY! + +## Introduction + +This is the blc (Binary Lifting Contraption) plugin for IDA Pro. It is the Bastard +love child of Ghidra's decompiler with Ida Pro. + +The plugin integrates Ghidra's decompiler code into an Ida plugin an provides a +basic decompiler capability for all platforms support by both Ida and Ghidra. It +provides a basic source code display that attempts to mimic that of the Hex-Rays +decompiler. It has only been written with Ida 7.x in mind. + +## BUILDING: + +On all platforms you should clone blc into your IDA SDK's plugins sub-directory +so that you end up with `/plugins/blc`. This is because the build files +all use relative paths to find necessary IDA header files and link libraries. + +### Build blc for Linux / OS X: + +Use the include Makefile to build the plugin. You may need to adjust the paths +that get searched to find your IDA installation (`/Applications/IDA Pro N.NN` is +assumed on OSX and `/opt/ida-N.NN` is assumed on Linux, were N.NN is derived from +the name of your IDA SDK directory eg `idasdk73` associates with `7.3` and should +match your IDA version number). This is required to successfully link the plugin. + +``` +$ cd /plugins/blc +$ make +``` + +Compiled binaries will end up in `/plugins/blc/bin` + +``` +LINUX + ------------------------------------------- + | ida | ida64 | + ------------------------------------------- +IDA 7.x | | | + plugin | blc.so | blc64.so | + ------------------------------------------- + +OS/X + ------------------------------------------- + | ida | ida64 | + ------------------------------------------- +IDA 7.x | | | | + plugin | blc.dylib | blc64.dylib | + ------------------------------------------- +``` + +Copy the plugin(s) into your `/plugins` directory and blc should be +listed as an available plugin for all architectures supported both Ida +and Ghidra. + +### Build blc for Windows + +Build with Visual Studio C++ 2019 or later using the included solution (`.sln`) +file (`blc.sln`). Two build targets are available depending on which version +of IDA you are using: + +``` + ----------------------------------------- + | ida | ida64 | + ----------------------------------------- +IDA 7.x | Release/x64 | Release64/x64 | + plugin | blc.dll | blc64.dll | + ----------------------------------------- +``` + +Copy the plugin(s) into your `/plugins` directory and blc should be +listed as an available plugin for all architectures supported by both Ida +and Ghidra. + +## INSTALLATION + +Assuming you have installed IDA to ``, install the plugin by copying the +compiled binaries from `/plugins/blc/bin` to `/plugins` (Linux/Windows) +or `/idabin/plugins` (OS X). + +The plugin is dependent on Ghira processor specifications which you will need to +copy over from your own Ghidra installation. Installing Ghidra is a simple matter +of unzipping the latest Ghidra release, for example: + +https://ghidra-sre.org/ghidra_9.1.2_PUBLIC_20200212.zip + +Within the extracted Ghidra folder, you will find a `Ghidra` subdirectory which, +in turn, contains a `Processors` subdirectory. The decompiler needs access to +files contained under `Ghidra/Processors`. By default the plugin looks for the +environment variable `$GHIDRA_DIR` which it expects to point at your Ghidra +installation folder such that `$GHIDRA_DIR/Ghidra/Processors` exists. If +`$GHIDRA_DIR` is not set, then the plugin expects to find `/plugins/Ghidra/Processors` +which you may create with a symlink or by copying the approprate directories +from your Ghidra installation. + +### Pre-built binaries: + +As an alternative to building the plugin yourself, pre-built binaries for +IDA 7.x (Windows, Linux, OS X) are available in the `blc/bins` directory. + +## USING THE PLUGIN + +With the plugin installed, open a binary of interest in IDA. In order for the +plugin to be become available, the binary's architecture must be supported by +both Ida and Ghidra. + +With the cursor placed inside the body of an IDA function, select +`Edit/Plugins/Ghidra Decompiler`. A successful decompilation (which may take a bit +of time, will open a new window containing the C source generated by Ghidra's +decompiler. Within the source window, you may double click on a function name to +decompile that function. Double clicking on a global data name will navigate you +to that symbol in the IDA disassembly view. The `ESC` key will navigate back to a +previous function, or close the source viewer if there is no previous function. + +The `N` hot key may be used to rename any symbol in the source view. When a symbol +in the source view corresponds to a symbol in the Ida disassembly, the symbol will +also be renamed in the disassembly. + +You can get xrefs of a functions by marking it and pressing `X`. To write a comment, +mark the line and press `/` (or `C`). If you have the hex-rays decompiler installed, +some keys might conflict and need to be changed for hex-rays. A feature to freely +select the hotkey values is planned. + +## POTENTIAL FUTURE WORK + +* Allow user to set data types for symbols in the source view +* Provide IDA derived type information to the decompiler so that it can + do a better job with things like structures and pointer dereferencing +* ~~Better (at least some) support for string literals~~ +* Investigate what settings/info are necessary to get this standalone decompiler + to yield results identical to Ghidra's. Is this symbol information? Type information? arch/platform/compiler settings? \ No newline at end of file diff --git a/ast.cc b/ast.cc index 7639fea..01f3ea8 100644 --- a/ast.cc +++ b/ast.cc @@ -1,2185 +1,2265 @@ -/* - Source for the blc IdaPro plugin - Copyright (c) 2019 Chris Eagle - - This program is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the Free - Software Foundation; either version 2 of the License, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., 59 Temple - Place, Suite 330, Boston, MA 02111-1307 USA -*/ - -#include -#include -#include -#include - -#include "xml.hh" -#include "ast.hh" -#include "ida_minimal.hh" -#include "plugin.hh" - -using std::map; -using std::set; - -//#define DEBUG_AST 1 - -#ifdef DEBUG_AST -#define dmsg(x, ...) msg(x, __VA_ARGS__) -#else -#define dmsg(x, ...) -#endif - -enum ast_tag_t { - ast_tag_null, - ast_tag_syntax, - ast_tag_break, - ast_tag_funcproto, - ast_tag_vardecl, - ast_tag_return_type, - ast_tag_type, - ast_tag_variable, - ast_tag_block, - ast_tag_statement, - ast_tag_funcname, - ast_tag_op, - ast_tag_label -}; - -enum op_keywords_t { - kw_null, - kw_if, - kw_switch, - kw_while, - kw_return, - kw_assign -}; - -static const string WHILE("while"); -static const string DO("do"); -static const string IF("if"); -static const string ELSE("else"); -static const string BREAK("break"); -static const string DEFAULT("default"); -static const string SWITCH("switch"); -static const string CASE("case"); -static const string GOTO("goto"); -static const string RETURN("return"); - -static const string LBRACE("{"); -static const string RBRACE("}"); -static const string LPAREN("("); -static const string RPAREN(")"); -static const string LBRACKET("["); -static const string RBRACKET("]"); -static const string COLON(":"); -static const string SEMICOLON(";"); -static const string COMMA(","); - -enum g_token { - g_null, - g_lbrace, - g_rbrace, - g_lparen, - g_rparen, - g_cond_close, - g_lbracket, - g_rbracket, - g_and, - g_xor, - g_or, - g_not, - g_bnot, - g_logical_and, - g_logical_or, - g_comma, - g_semi, - g_lshift, - g_rshift, - g_lt, - g_lte, - g_gt, - g_gte, - g_assign, - g_eq, - g_ne, - g_plus, - g_minus, - g_mod, - g_div, - g_star, - g_plus_eq, - g_minus_eq, - g_star_eq, - g_div_eq, - g_mod_eq, - g_and_eq, - g_or_eq, - g_xor_eq, - g_lshift_eq, - g_rshift_eq, - g_plusplus, - g_minusminus, - g_qmark, - g_colon, - - g_break, - g_keyword, - g_var, - g_funcname, - g_const, - g_type, - g_eol_cmt, - g_cmt_open, - g_cmt_close, - g_label, - g_symbol, - g_op -}; - -static map tag_map; -static map ops; -static set binary_ops; -static set unary_ops; -static map op_map; -static set reserved; -static map type_map; - -static const string empty_string(""); - -static void block_handler(const Element *el, Block *block); -//static Statement *inner_block(const Element *child); -static VarDecl *vardecl_handler(const Element *el); -static Statement *statement_handler(const Element *el); -static Expression *expr_handler(List::const_iterator &it, List::const_iterator &end, bool comma_ok = true); -static Switch *switch_handler(List::const_iterator &it); -static Else *else_handler(List::const_iterator &it); -static DoWhile *do_handler(List::const_iterator &it); - -static const string &getAttributeValue(const Element *el, const char *attr) { - int nattr = el->getNumAttributes(); - - for (int i = 0; i < nattr; i++) { - if (el->getAttributeName(i) == attr) { - return el->getAttributeValue(i); - } - } - return empty_string; -} - -// in some cases Ghidra uses and others , in all cases -// color="funcname" will be present -bool is_funcname_color(const Element *el) { - return getAttributeValue(el, "color") == "funcname"; -} - -bool is_keyword_color(const Element *el) { - return getAttributeValue(el, "color") == "keyword"; -} - -bool is_const_color(const Element *el) { - return getAttributeValue(el, "color") == "const"; -} - -bool is_global_color(const Element *el) { - return getAttributeValue(el, "color") == "global"; -} - -void escape_string(const string &str, string &escaped) { - for (string::const_iterator i = str.cbegin(); i != str.cend(); i++) { - switch (*i) { - case '\n': - escaped += "\\n"; - break; - case '\t': - escaped += "\\t"; - break; - case '\\': - escaped += "\\"; - break; - case '"': - escaped += "\\\""; - break; - default: - if (*i < 0x20 || *i == 0x7f) { - char buf[16]; - snprintf(buf, sizeof(buf), "\\x%02x", *i & 0xff); - escaped += buf; - } - else{ - escaped.push_back(*i); - } - break; - } - } -} - -static const Element *find_child(const Element *el, const char *tag) { - const List &children = el->getChildren(); - for (List::const_iterator it = children.begin(); it != children.end(); it++) { - const Element *child = *it; - if (child->getName() == tag) { - return child; - } - } - return NULL; -} - -static const Element *get_child(List::const_iterator &it) { - const Element *child = *it; - dmsg("Processing %s/%s\n", child->getName().c_str(), child->getContent().c_str()); - return child; -} - -const char *debug_print(AstItem *exp) { - string l = exp->line; - exp->line.clear(); - exp->do_print(); - string r = exp->line; - exp->line = l; - return tag_remove(r.c_str()); -} - -AstItem::AstItem() : no_indent(false), no_semi(false), line_begin(1), - line_end(1), col_start(-1), col_end(-1), - color(COLOR_DEFAULT) {}; - -vector *AstItem::cfunc; -string AstItem::line; -size_t AstItem::indent; -size_t AstItem::line_index; - -void AstItem::flush(bool no_indent) { - string spaces; - if (!no_indent) { - spaces.append(indent, ' '); - } - if (cfunc) { - cfunc->push_back(spaces + line); - } -// dmsg("append: %s\n", cfunc->back().c_str()); - line.clear(); - line_index = 0; -} - -void AstItem::append(char ch, bool count) { - line.push_back(ch); - line_index += count ? 1 : 0; -} - -void AstItem::color_on(char tag) { - append(COLOR_ON, false); - append(tag, false); -} - -void AstItem::color_off(char tag) { - append(COLOR_OFF, false); - append(tag, false); -} - -void AstItem::append(const char *v) { - append(string(v)); -} - -void AstItem::append(const string &v) { - line.append(v); - line_index += v.length(); -} - -void AstItem::append_colored(char tag, const char *v) { - append_colored(tag, string(v)); -} - -void AstItem::append_colored(char tag, const string &v) { - color_on(tag); - append(v); - color_off(tag); -} - -void AstItem::print_in() { - if (cfunc) { - line_begin = cfunc->size(); - col_start = line_index; - } -} - -void AstItem::print_out() { - if (cfunc) { - line_end = cfunc->size(); - col_end = line_index; - } -} - -void AstItem::do_print() { - print_in(); - print(); - print_out(); -} - -void brace_print(AstItem &item, bool final_append = true) { - AstItem::append_colored(COLOR_SYMBOL, LBRACE); - AstItem::flush(); - AstItem::indent += 3; - item.do_print(); - AstItem::indent -= 3; - AstItem::append_colored(COLOR_SYMBOL, RBRACE); - if (final_append) { - AstItem::flush(); - } -} - -void Statement::print() { - line += ""; -} - -void Type::print() { - print(""); -} - -void Type::print(const string &var) { - bool need_space = true; - print_in(); - if (is_const) { - append_colored(COLOR_KEYWORD, "const "); - } - append_colored(COLOR_KEYWORD, name); - if (ptr) { - if (!is_cast) { - append(' '); - } - color_on(COLOR_SYMBOL); - line.append(ptr, '*'); - line_index += ptr; - color_off(COLOR_SYMBOL); - need_space = false; - } - if (var.length() > 0) { - if (need_space) { - append(' '); - } - append_colored(COLOR_DNAME, var); - } - for (vector::iterator i = dims.begin(); i != dims.end(); i++) { - append_colored(COLOR_SYMBOL, LBRACKET); - if (*i) { - char buf[32]; - snprintf(buf, sizeof(buf), "%u", *i); - append_colored(COLOR_NUMBER, buf); - } - append_colored(COLOR_SYMBOL, RBRACKET); - } - print_out(); -} - -void Type::rename(const string &oldname, const string &newname) { - if (oldname == name) { - name = newname; - } -} - -void Expression::print() { - append(""); -} - -void LiteralExpr::print() { - append(val); -} - -NameExpr::NameExpr(const string &var, bool _global) : name(var), global(_global) { - adjust_thunk_name(name); -} - -void NameExpr::print() { - if (is_extern(name)) { - append_colored(COLOR_IMPNAME, name); - } - else { - append_colored(COLOR_DNAME, name); - } -} - -void NameExpr::rename(const string &oldname, const string &newname) { -// dmsg("NameExpr::rename %s from %s to %s\n", name.c_str(), oldname.c_str(), newname.c_str()); - if (oldname == name) { - name = newname; - } -} - -void FuncNameExpr::print() { - if (is_extern(name)) { - append_colored(COLOR_IMPNAME, name); - } - else if (is_library_func(name)) { - append_colored(COLOR_DEFAULT, name); - } - else { - append_colored(COLOR_DEFAULT, name); - } -} - -void FuncNameExpr::rename(const string &oldname, const string &newname) { - if (oldname == name) { - name = newname; - } -} - -void LabelExpr::print() { - append(label); -} - -void LabelExpr::rename(const string &oldname, const string &newname) { - if (oldname == label) { - label = newname; - } -} - -void LabelStatement::print() { - line += label; - append_colored(COLOR_SYMBOL, COLON); -} - -void LabelStatement::rename(const string &oldname, const string &newname) { - if (oldname == label) { - label = newname; - } -} - -void GotoStatement::print() { - append_colored(COLOR_KEYWORD, GOTO); - append(' '); - label->do_print(); -} - -void GotoStatement::rename(const string &oldname, const string &newname) { - label->rename(oldname, newname); -} - -void BreakStatement::print() { - append_colored(COLOR_KEYWORD, BREAK); -} - -ExprStatement::~ExprStatement() { - delete expr; -} - -void ExprStatement::print() { - expr->do_print(); -} - -void ExprStatement::rename(const string &oldname, const string &newname) { - expr->rename(oldname, newname); -} - -CommaExpr::~CommaExpr() { - delete lhs; - delete rhs; -} - -void CommaExpr::print() { - lhs->do_print(); - append_colored(COLOR_SYMBOL, COMMA); - append(' '); - rhs->do_print(); -} - -void CommaExpr::rename(const string &oldname, const string &newname) { - lhs->rename(oldname, newname); - rhs->rename(oldname, newname); -} - -void BinaryExpr::print() { - lhs->do_print(); - append(' '); - append_colored(COLOR_SYMBOL, op); - append(' '); - rhs->do_print(); -} - -void BinaryExpr::rename(const string &oldname, const string &newname) { - lhs->rename(oldname, newname); - rhs->rename(oldname, newname); -} - -void UnaryExpr::print() { - append_colored(COLOR_SYMBOL, op); - expr->do_print(); -} - -void UnaryExpr::rename(const string &oldname, const string &newname) { - expr->rename(oldname, newname); -} - -CastExpr::CastExpr(const string &typ) { - type = new Type(typ); - type->is_cast = true; -} - -CastExpr::~CastExpr() { - delete type; -} - -void CastExpr::print() { - type->do_print(); -} - -void CastExpr::rename(const string &oldname, const string &newname) { - type->rename(oldname, newname); -} - -void TypeCast::print() { - type->do_print(); - expr->do_print(); -} - -void TypeCast::rename(const string &oldname, const string &newname) { - type->rename(oldname, newname); - expr->rename(oldname, newname); -} - -void IntegerLiteral::print() { - append(val); -} - -uint64_t IntegerLiteral::get_value() { - return strtoull(val.c_str(), NULL, 0); -} - -void StringLiteral::print() { - append_colored(COLOR_DSTR, "\""); - string escaped; - escape_string(val, escaped); - append_colored(COLOR_DSTR, escaped); - append_colored(COLOR_DSTR, "\""); -} - -void CharExpr::print() { - append_colored(COLOR_SYMBOL, "'"); - append(val); - append_colored(COLOR_SYMBOL, "'"); -} - -void ParenExpr::print() { - append_colored(COLOR_SYMBOL, LPAREN); - if (inner) { - inner->do_print(); - } - append_colored(COLOR_SYMBOL, RPAREN); -} - -void ParenExpr::rename(const string &oldname, const string &newname) { - if (inner) { -// dmsg("ParenExpr::rename from %s to %s\n", oldname.c_str(), newname.c_str()); - inner->rename(oldname, newname); - } -} - -void ArrayExpr::print() { - array->do_print(); - append_colored(COLOR_SYMBOL, LBRACKET); - index->do_print(); - append_colored(COLOR_SYMBOL, RBRACKET); -} - -void ArrayExpr::rename(const string &oldname, const string &newname) { - array->rename(oldname, newname); - index->rename(oldname, newname); -} - -void Block::print() { - for (vector::iterator i = block.begin(); i != block.end(); i++) { - Statement *s = *i; - if (s) { - s->do_print(); - } - else { - dmsg("Attempting to print a NULL Statement\n"); - } - if (!s->no_semi) { - append_colored(COLOR_SYMBOL, SEMICOLON); - } - flush(s->no_indent); - } -} - -Block::~Block() { - for (vector::iterator i = block.begin(); i != block.end(); i++) { - delete *i; - } -} - -void Block::rename(const string &oldname, const string &newname) { - for (vector::iterator i = block.begin(); i != block.end(); i++) { - Statement *s = *i; - if (s) { -// dmsg("Block::rename from %s to %s\n", oldname.c_str(), newname.c_str()); - s->rename(oldname, newname); - } - } -} - -void VarDecl::print() { - type->print(var->name); -} - -const string &VarDecl::getName() { - Expression *expr = var; - while (true) { - NameExpr *n = dynamic_cast(expr); - if (n) { - return n->name; - } - UnaryExpr *u = dynamic_cast(expr); - if (u) { - expr = u->expr; - continue; - } - ParenExpr *p = dynamic_cast(expr); - if (p) { - expr = p->inner; - continue; - } - ArrayExpr *a = dynamic_cast(expr); - if (a) { - expr = a->array; - continue; - } - dmsg("VarDecl unexpected expr type\n"); - return empty_string; - } -} - -void VarDecl::rename(const string &oldname, const string &newname) { - type->rename(oldname, newname); - var->rename(oldname, newname); - if (init) { - init->rename(oldname, newname); - } -} - -VarDecl::~VarDecl() { - delete type; - delete var; - delete init; -} - -void Funcproto::print() { - return_type->do_print(); - for (vector::iterator i = keywords.begin(); i != keywords.end(); i++) { - append(' '); - append(*i); - } - append(' '); - append(name); - append_colored(COLOR_SYMBOL, LPAREN); - for (vector::iterator i = parameters.begin(); i != parameters.end(); i++) { - if (i != parameters.begin()) { - append_colored(COLOR_SYMBOL, COMMA); - append(' '); - } - (*i)->do_print(); - } - append_colored(COLOR_SYMBOL, RPAREN); -} - -void Funcproto::rename(const string &oldname, const string &newname) { - return_type->rename(oldname, newname); - if (oldname == name) { - name = newname; - } - for (vector::iterator i = parameters.begin(); i != parameters.end(); i++) { - (*i)->rename(oldname, newname); - } -} - -Funcproto::~Funcproto() { - delete return_type; - for (vector::iterator i = parameters.begin(); i != parameters.end(); i++) { - delete *i; - } -} - -CallExpr::~CallExpr() { - delete func; - delete args; -} - -void CallExpr::print() { - func->do_print(); - args->do_print(); -} - -void CallExpr::rename(const string &oldname, const string &newname) { -// dmsg("CallExpr::rename from %s to %s\n", oldname.c_str(), newname.c_str()); - func->rename(oldname, newname); - args->rename(oldname, newname); -} - -void Else::print() { - append_colored(COLOR_KEYWORD, ELSE); - append(' '); - brace_print(block, false); -} - -void Else::rename(const string &oldname, const string &newname) { - block.rename(oldname, newname); -} - -void If::print() { - append_colored(COLOR_KEYWORD, IF); - append(' '); - append_colored(COLOR_SYMBOL, LPAREN); - cond->do_print(); - append_colored(COLOR_SYMBOL, RPAREN); - append(' '); - brace_print(block, _else != NULL); - if (_else) { - _else->do_print(); - } -} - -void If::rename(const string &oldname, const string &newname) { - ConditionalStatement::rename(oldname, newname); - if (_else) { - _else->rename(oldname, newname); - } -} - -void ConditionalStatement::rename(const string &oldname, const string &newname) { -// dmsg("ConditionalStatement::rename from %s to %s\n", oldname.c_str(), newname.c_str()); - cond->rename(oldname, newname); - block.rename(oldname, newname); -} - -void While::print() { - append_colored(COLOR_KEYWORD, WHILE); - append(' '); - append_colored(COLOR_SYMBOL, LPAREN); - cond->do_print(); - append_colored(COLOR_SYMBOL, RPAREN); - append(' '); - brace_print(block, false); -} - -void DoWhile::print() { - append_colored(COLOR_KEYWORD, DO); - append(' '); - brace_print(block, false); - append(' '); - append_colored(COLOR_KEYWORD, WHILE); - append(' '); - append_colored(COLOR_SYMBOL, LPAREN); - cond->do_print(); - append_colored(COLOR_SYMBOL, RPAREN); -} - -void Case::print() { - if (is_default) { - append_colored(COLOR_KEYWORD, DEFAULT); - } - else { - append_colored(COLOR_KEYWORD, CASE); - append(' '); - append(label); - } - append_colored(COLOR_SYMBOL, COLON); - flush(); - indent += 3; - Block::print(); - indent -= 3; -} - -void Switch::print() { - append_colored(COLOR_KEYWORD, SWITCH); - append(' '); - append_colored(COLOR_SYMBOL, LPAREN); - cond->do_print(); - append_colored(COLOR_SYMBOL, RPAREN); - append(' '); - append_colored(COLOR_SYMBOL, LBRACE); - flush(); - indent += 3; - for (vector::iterator i = cases.begin(); i != cases.end(); i++) { - (*i)->do_print(); - } - indent -= 3; - append_colored(COLOR_SYMBOL, "}"); -} - -void Switch::rename(const string &oldname, const string &newname) { - cond->rename(oldname, newname); - for (vector::iterator i = cases.begin(); i != cases.end(); i++) { - (*i)->rename(oldname, newname); - } -} - -void Return::print() { - append_colored(COLOR_KEYWORD, RETURN); - if (expr) { - append(' '); - expr->do_print(); - } -} - -void Return::rename(const string &oldname, const string &newname) { - if (expr) { - expr->rename(oldname, newname); - } -} - -AssignExpr::~AssignExpr() { - delete lval; - delete rval; -} - -void AssignExpr::print() { - lval->do_print(); - append(' '); - append_colored(COLOR_SYMBOL, "="); - append(' '); - rval->do_print(); -} - -void AssignExpr::rename(const string &oldname, const string &newname) { -// dmsg("AssignExpr::rename from %s to %s\n", oldname.c_str(), newname.c_str()); - lval->rename(oldname, newname); - rval->rename(oldname, newname); -} - -void Ternary::print() { - expr->do_print(); - append(' '); - append_colored(COLOR_SYMBOL, "?"); - append(' '); - _true->do_print(); - append(' '); - append_colored(COLOR_SYMBOL, COLON); - append(' '); - _false->do_print(); -} - -void Ternary::rename(const string &oldname, const string &newname) { - expr->rename(oldname, newname); - _true->rename(oldname, newname); - _false->rename(oldname, newname); -} - -void Function::print() { - prototype.do_print(); - flush(); - brace_print(block); -} - -void Function::print(vector *cfunc) { - line_index = 0; - AstItem::cfunc = cfunc; - line.clear(); - indent = 0; - - do_print(); - - line_index = 0; - AstItem::cfunc = NULL; - line.clear(); - indent = 0; -} - -void Function::rename(const string &oldname, const string &newname) { - prototype.rename(oldname, newname); - block.rename(oldname, newname); -} - -List::const_iterator find_match(List::const_iterator &it, const string &sym, const string &open) { - List::const_iterator res = it; - while ((*res)->getContent() != sym || getAttributeValue(*res, "close") != open) { - res++; - } - return res; -} - -List::const_iterator find(List::const_iterator &it, const string &sym) { - List::const_iterator res = it; - while ((*res)->getContent() != sym) { - res++; - } - return res; -} - -static bool is_const_expr(Expression *e, uint64_t *val) { - IntegerLiteral *num = dynamic_cast(e); - if (num) { - *val = num->get_value(); - dmsg("Found const expression (a) %s\n", num->val.c_str()); - return true; - } - NameExpr *ne = dynamic_cast(e); - if (ne) { - char *endptr; - const char *s = ne->name.c_str(); - *val = strtoull(s, &endptr, 0); - if (endptr != s && *endptr == 0) { - //name was a valid integer literal - dmsg("Found const expression (b) %s\n", s); - return true; - } - //not an int so see if name refers to const data - uint64_t addr; - if (ne->global && address_of(ne->name, &addr) && is_read_only(addr)) { - dmsg("Found const expression (d) %s\n", ne->name.c_str()); - if (get_value(addr, val)) { - return true; - } - } - return false; - } - UnaryExpr *ue = dynamic_cast(e); - if (ue) { - if (is_const_expr(ue->expr, val)) { - uint64_t old = *val; - if (ue->op == "-") { - *val = 0 - *val; - } - else if (ue->op == "~") { - *val = ~*val; - } - else if (ue->op == "!") { - *val = !*val; - } - else { - return false; - } - dmsg("Found const expression (c) %s0x%lx\n", ue->op.c_str(), old); - return true; - } - } - return false; -} - -static Expression *simplify_const(uint64_t cval) { - dmsg("simplify_const for 0x%lx\n", cval); - string val; - if (is_function_start(cval)) { - get_name(val, cval, 0); - return new FuncNameExpr(val); - } - if (get_string(cval, val)) { - dmsg("simplify_const became a string: %s\n", val.c_str()); - return new StringLiteral(val); - } - if (is_named_addr(cval, val)) { - return new UnaryExpr("&", new NameExpr(val)); - } - return NULL; -} - -const string &map_type(const string &type_name) { - if (type_map.find(type_name) != type_map.end()) { - return type_map[type_name]; - } - return type_name; -} - -static Type *type_handler(const Element *el) { - //map the type name here - const string &type_name = el->getContent(); - return new Type(map_type(type_name)); -} - -static Return *return_handler(List::const_iterator &it, List::const_iterator &end) { - Return *result = new Return(); - Expression *expr = expr_handler(it, end); - EmptyExpr *ee = dynamic_cast(expr); - if (ee) { - delete ee; - result->expr = NULL; - } - else { - result->expr = expr; - } - return result; -} - -static Statement *statement_handler(const Element *el) { - static int scount = 0; - dmsg("statement_handler in %d\n", scount++); - Statement *result = NULL; - Expression *lhs = NULL; - const List &children = el->getChildren(); - List::const_iterator end = children.end(); - for (List::const_iterator it = children.begin(); it < end; it++) { - const Element *child = get_child(it); - if (is_keyword_color(child) && child->getContent() == BREAK) { - return new BreakStatement(); - } - switch (tag_map[child->getName()]) { - case ast_tag_op: { - //need to consume consecutive children at this level to form a statement - switch (op_map[child->getContent()]) { - case kw_return: - result = return_handler(++it, end); - dmsg("statement_handler out(1) %d\n", --scount); - return result; - default: - dmsg("no op_map match for '%s' in statement\n", child->getContent().c_str()); - lhs = expr_handler(it, end); - dmsg("no op_map match for '%s' in statement result: %s\n", child->getContent().c_str(), debug_print(lhs)); - if (result) { - dmsg("oddly, result is '%s'\n", debug_print(result)); - } - break; - } - break; - } - case ast_tag_label: - result = new LabelStatement(child->getContent()); - dmsg("statement_handler out(2) %d - Label: %s\n", --scount, debug_print(result)); - return result; - case ast_tag_syntax: { - if (child->getContent() == GOTO) { - GotoStatement *g = new GotoStatement(); - g->label = expr_handler(++it, end); - dmsg("statement_handler out(3) %d\n", --scount); - return g; - } - else { - dmsg("no syntax match for '%s' in statement\n", child->getContent().c_str()); - dmsg("Trying to build an expression\n"); - Expression *expr = expr_handler(it, end); - if (expr) { - dmsg("statement_handler out(4) %d\n", --scount); - return new ExprStatement(expr); - } - } - break; - } - default: - dmsg("no tag_map match for %s in statement, trying expression\n", child->getName().c_str()); - lhs = expr_handler(it, end); - break; - } - } - if (result == NULL) { - dmsg("statement_handler is returning NULL\n"); - if (lhs != NULL) { - result = new ExprStatement(lhs); - } - else { - dmsg("statement_handler has no result\n"); - } - } - else { - dmsg("returning from statement_handler -> %p\n", result); - - } - dmsg("statement_handler out(0) %d - %s\n", --scount, debug_print(result)); - return result; -} - -static VarDecl *vardecl_handler(const Element *el) { - bool in_dim = false; - const List &children = el->getChildren(); - VarDecl *result = new VarDecl(); - List::const_iterator it; - List::const_iterator end = children.end(); - for (it = children.begin(); result->init == NULL && it < end; it++) { - const Element *child = get_child(it); - switch (tag_map[child->getName()]) { - case ast_tag_type: - result->type = type_handler(child); - break; - case ast_tag_op: - if (child->getContent() == "*" && result->type) { - result->type->ptr++; - } - else if (child->getContent() == "=") { - result->init = expr_handler(++it, end); - } - break; - case ast_tag_variable: - result->var = new NameExpr(child->getContent()); - break; - case ast_tag_syntax: { - const string &content = child->getContent(); - if (content == "[") { - in_dim = true; - } - else if (content == "]") { - in_dim = false; - } - else if (in_dim && is_const_color(child) && result->type) { - result->type->dims.push_back(strtoul(child->getContent().c_str(), NULL, 0)); - } - break; - } - default: - break; - } - } - return result; -} - -static void funcproto_handler(const Element *el, Function *f) { - bool have_proto = false; - bool have_name = false; - const List &children = el->getChildren(); - for (List::const_iterator it = children.begin(); it < children.end(); it++) { - const Element *child = get_child(it); - if (have_proto && !have_name && is_keyword_color(child)) { - f->prototype.keywords.push_back(child->getContent()); - continue; - } - switch (tag_map[child->getName()]) { - case ast_tag_return_type: { - f->prototype.return_type = type_handler(find_child(child, "type")); - const List &rchildren = child->getChildren(); - for (List::const_iterator cit = rchildren.begin(); cit != rchildren.end(); cit++) { - const Element *e = *cit; - if (e->getName() == "op" && e->getContent() == "*") { - f->prototype.return_type->ptr++; - } - } - have_proto = true; - break; - } - case ast_tag_syntax: - break; - case ast_tag_vardecl: { - VarDecl *d = vardecl_handler(child); - if (d) { - f->prototype.parameters.push_back(d); - } - else { - //error - } - break; - } - case ast_tag_funcname: - f->prototype.name = child->getContent(); - have_name = true; - break; - default: - break; - } - } -} - -static CastExpr *cast_handler(List::const_iterator &it, List::const_iterator &end) { - dmsg("Entering cast_handler\n"); - const Element *child = get_child(it); - const string &type_name = child->getContent(); - CastExpr *result = new CastExpr(map_type(type_name)); //map type name change here - while (++it < end) { - child = get_child(it); - if (child->getName() == "op") { - if (child->getContent() == "*") { - result->type->ptr++; - } - else { - dmsg("cast_handler unknown op: %s\n", child->getContent().c_str()); - } - } - else if (child->getName() == "syntax") { - if (child->getContent() == RPAREN) { - dmsg("Leaving cast_handler (1) - %p\n", result); - it--; - return result; - } - else { - dmsg("cast_handler unknown syntax: %s\n", child->getContent().c_str()); - } - } - else { - dmsg("cast_handler unknown tag: %s\n", child->getName().c_str()); - } - } - dmsg("Leaving cast_handler (2) - %p\n", result); - return result; -} - -Expression *make_name(const string &name, bool global) { - //add checks to see if name is a const, then convert to the const - //or whether name is a static string, then convert to quoted string - return new NameExpr(name, global); -} - -Expression *make_variable(const Element *var) { - Expression *result = NULL; - const string &text = var->getContent(); - - //add checks to see if name is a const, then convert to the const - //or whether name is a static string, then convert to quoted string - - if (is_const_color(var)) { - char *end; - uint64_t val = strtoull(text.c_str(), &end, 0); - if (*end == 0) { - dmsg("numeric literal: %s\n", text.c_str()); - result = new IntegerLiteral(text); - Expression *e = simplify_const(val); - if (e) { - delete result; - result = e; - } - } - else { - dmsg("other literal: %s\n", text.c_str()); - result = new LiteralExpr(text); - } - } - else if (is_global_color(var)) { - uint64_t addr; - if (address_of(text, &addr) && is_read_only(addr) && !is_function_start(addr)) { - //try to dereference this? - dmsg("const global: %s\n", text.c_str()); - result = make_name(text, true); - } - else { - result = make_name(text, true); - } - } - else { - result = make_name(text, false); - } - return result; -} - -static Expression *make_unary(const string &op, List::const_iterator &it, List::const_iterator &end) { - UnaryExpr *u = new UnaryExpr(op, expr_handler(it, end, false)); - if (op == "*") { - NameExpr *n = dynamic_cast(u->expr); - if (n) { - dmsg("made unary expr: %s%s\n", op.c_str(), n->name.c_str()); - string new_name; - if (simplify_deref(n->name, new_name)) { - return make_name(new_name, n->global); - delete u; - } - } - } - return u; -} - -static Expression *make_binary(const string &op, Expression *lhs, List::const_iterator &it, List::const_iterator &end) { - uint64_t v1; - uint64_t v2; - BinaryExpr *b = new BinaryExpr(op, lhs, expr_handler(it, end, false)); - if (op == "+") { - if (is_const_expr(b->lhs, &v1) && is_const_expr(b->rhs, &v2)) { - Expression *e = simplify_const(v1 + v2); - if (e) { - delete b; - return e; - } - } - } - else if (op == "-") { - if (is_const_expr(b->lhs, &v1) && is_const_expr(b->rhs, &v2)) { - Expression *e = simplify_const(v1 - v2); - if (e) { - delete b; - return e; - } - } - } - return b; -} - -static Expression *expr_handler(List::const_iterator &it, List::const_iterator &end, bool comma_ok) { - static int ecount = 0; - Expression *result = NULL; - const string *open = NULL; - ParenExpr *p = NULL; - dmsg("expr_handler in %d\n", ecount++); - for (; it < end; it++) { - const Element *child = get_child(it); - if (is_funcname_color(child)) { - dmsg("expr_handler op building CallExpr(1)\n"); - CallExpr *call = new CallExpr(new FuncNameExpr(child->getContent()), expr_handler(++it, end)); - - dmsg("expr_handler op built CallExpr - %s\n", debug_print(call)); - dmsg("expr_handler out(3) %d\n", --ecount); - return call; - } - switch (tag_map[child->getName()]) { - case ast_tag_variable: - result = make_variable(child); - break; - case ast_tag_type: { - CastExpr *cast = cast_handler(it, end); - result = cast; - dmsg("expr_handler out(1) %d\n", --ecount); - return result; - } - case ast_tag_label: - result = new LabelExpr(child->getContent()); - dmsg("expr_handler out(2) %d\n", --ecount); - return result; - case ast_tag_funcname: { - dmsg("expr_handler tag building CallExpr(2)\n"); - CallExpr *call = new CallExpr(new FuncNameExpr(child->getContent()), expr_handler(++it, end)); - - dmsg("expr_handler tag built CallExpr - %s\n", debug_print(call)); - dmsg("expr_handler out(3) %d\n", --ecount); - return call; - } - case ast_tag_statement: { - dmsg("expr_handler tag building statement\n"); - Statement *s = statement_handler(child); - ExprStatement *e = dynamic_cast(s); - if (e) { - //take ownership of the sub-expression; - result = e->expr; - e->expr = NULL; - delete e; - } - else { - dmsg("Expected ExprStatement but didn't get one\n"); - delete s; - } - } - case ast_tag_syntax: { - const string &op = child->getContent(); - if (is_const_color(child)) { - dmsg("expr_handler syntax building LiteralExpr for %s\n", op.c_str()); - result = new LiteralExpr(op); - dmsg(" %s\n", debug_print(result)); - } - else if (unary_ops.find(op) != unary_ops.end() && result == NULL) { - dmsg("expr_handler syntax building UnaryExpr for %s\n", op.c_str()); - //result = new UnaryExpr(op, expr_handler(++it, end)); - result = make_unary(op, ++it, end); - dmsg(" %s\n", debug_print(result)); - } - else if (binary_ops.find(op) != binary_ops.end() && result != NULL) { - dmsg("expr_handler syntax building BinaryExpr for %s\n", op.c_str()); - result = make_binary(op, result, ++it, end); -// result = new BinaryExpr(op, result, expr_handler(++it, end)); - dmsg(" %s\n", debug_print(result)); - } - else { - switch (ops[op]) { - case g_null: - dmsg("expr_handler syntax op is g_null for '%s'\n", op.c_str()); - if (op.length() > 0) { - char *end; - uint64_t val = strtoull(op.c_str(), &end, 0); - if (*end == 0) { - result = new IntegerLiteral(op); - } - else { - result = new LiteralExpr(op); - } - } - break; - case g_lparen: { - //recurse into expr, we now have a parenthized expression - open = &getAttributeValue(child, "open"); - dmsg("expr_handler syntax building ParenExpr for %s\n", open->c_str()); - p = new ParenExpr(expr_handler(++it, end, true)); - it++; //increment past the close ) - dmsg(" ParenExpr(%s): %s\n", open->c_str(), debug_print(p)); - CastExpr *c = dynamic_cast(p->inner); - if (c) { - result = new TypeCast(p, expr_handler(++it, end)); - dmsg(" TypeCast(%s): %s\n", open->c_str(), debug_print(result)); - } - else { - if (result != NULL) { - //This looks more like a fucntion call then - //test special case for function name - ParenExpr *rp = dynamic_cast(result); - if (rp) { - NameExpr *rn = dynamic_cast(rp->inner); - if (rn) { - Expression *ne = make_name(rn->name, rn->global); - delete rp; - result = ne; - } - } - result = new CallExpr(result, p); - dmsg(" Looks like function call: %s\n", debug_print(result)); - } - else { - result = p; - } - } - break; - } - case g_rparen: { - const string &close = getAttributeValue(child, "close"); - dmsg("expr_handler rolling back rparen at level %d\n", ecount - 1); - it--; - - dmsg("expr_handler terminating on rparen %s\n", close.c_str()); - dmsg("expr_handler out(5) %d - %p - %s\n", --ecount, result, typeid(result).name()); - return result; - } - case g_lbracket: { - //recurse into expr, we now have a parenthized expression - Expression *index = expr_handler(++it, end); - result = new ArrayExpr(result, index); - break; - } - case g_rbracket: { - // this is probably unmatched, so return what we have to caller - dmsg("expr_handler terminating on rbracket\n"); - const string &close = getAttributeValue(child, "close"); - dmsg("expr_handler out(6) %d - %p\n", --ecount, result); - return result; - } - case g_comma: { //never get here ?? - Expression *rhs = expr_handler(++it, end); - result = new CommaExpr(result, rhs); - dmsg("expr_handler comma out(7) %d - %p\n", --ecount, result); - return result; - } - case g_semi: { - dmsg("expr_handler terminating on semicolon\n"); - dmsg("expr_handler out(8) %d - %p\n", --ecount, result); - return result; - } - case g_assign: { - dmsg("expr_handler terminating on assign\n"); - dmsg("expr_handler out(9) %d - %p\n", --ecount, result); - return result; - } - } - } - break; - } - case ast_tag_op: { - const string &op = child->getContent(); - if (unary_ops.find(op) != unary_ops.end() && result == NULL) { - dmsg("expr_handler op building UnaryExpr\n"); - //result = new UnaryExpr(op, expr_handler(++it, end)); - result = make_unary(op, ++it, end); - dmsg("expr_handler op built UnaryExpr(%p) for %s\n", result, op.c_str()); - dmsg(" %s\n", debug_print(result)); - } - else if (binary_ops.find(op) != binary_ops.end() && result != NULL) { - dmsg("expr_handler op building BinaryExpr\n"); - result = make_binary(op, result, ++it, end); -// result = new BinaryExpr(op, result, expr_handler(++it, end)); - dmsg("expr_handler op building BinaryExpr(%p) for %s\n", result, op.c_str()); - dmsg(" %s\n", debug_print(result)); - //dmsg("expr_handler out %d(10) - %p\n", --ecount, result); - //return result; - } - else { - switch (ops[op]) { - case g_null: - dmsg("expr_handler op op is g_null\n"); - break; - case g_assign: { - dmsg("expr_handler op building AssignExpr\n"); - Expression *rhs = expr_handler(++it, end); - result = new AssignExpr(result, rhs); - dmsg("expr_handler op built AssignExpr(%p) - %s\n", result, debug_print(result)); - dmsg("expr_handler out(11) %d\n", --ecount); - return result; - } - case g_comma: { //comma always shows up as an op? - if (comma_ok) { - Expression *rhs = expr_handler(++it, end); - result = new CommaExpr(result, rhs); - dmsg("expr_handler out(12) %d\n", --ecount); - } - else { - it--; - dmsg("expr_handler out(12.5) %d\n", --ecount); - } - return result; - } - default: - dmsg("expr_handler no case for op/%s\n", op.c_str()); - break; - } - } - break; - } - default: - dmsg("expr_handler unhandled tag_map: %s(%s)\n", child->getName().c_str(), child->getContent().c_str()); - break; - } - //it can be advanced in some of the functions called above - if (it == end) { - break; - } - } - - dmsg("expr_handler out(side loop) %d - %p\n", --ecount, result); - if (result == NULL) { - return new EmptyExpr(); - } - else { - dmsg(" returning: %s\n", debug_print(result)); - } - return result; -} - -static void conditional_common(ConditionalStatement *cs, List::const_iterator &it) { - const Element *child; - - it = find(it, LPAREN); - const string &open = getAttributeValue(*it, "open"); - it++; - - List::const_iterator end = find_match(it, RPAREN, open); - - cs->cond = expr_handler(it, end); - - it = ++end; //resume after condition's close paren - - child = get_child(it); - while (child->getName() != "block" && child->getName() != "statement") { - it++; - child = get_child(it); - } - if (child->getName() == "block") { - block_handler(child, &cs->block); - } - else { //statement - cs->block.push_back(statement_handler(child)); - } -} - -static If *if_handler(List::const_iterator &it) { - If *result = new If(); - dmsg("building new if\n"); - - conditional_common(result, it); - - //don't try to handle else here - //check for else in main handler - - return result; -} - -static While *while_handler(List::const_iterator &it, List::const_iterator &end) { - static int wcount = 0; - While *result = new While(); - dmsg("building new while - %d\n", wcount++); - - //ghidra in its infinite wisdom does not place the body of - //a while inside of a tag, but MAYBE, a block enclose - //everything from 'while(...) {...}' - - it = find(it, LPAREN); - const string &open = getAttributeValue(*it, "open"); - it++; - - List::const_iterator cend = find_match(it, RPAREN, open); - - result->cond = expr_handler(it, cend); - - it = ++cend; //resume after condition's close paren - - //let's hope they at least brace everything - it = find(it, LBRACE); - it++; - - //this is basically the same as a block_handler loop - //without the benefit of knowing where the end of the child - //list is - while (it < end) { - //we're not inside a block so we don't have a defined end - //point for child iteration - const Element *child = get_child(it); - if (child->getContent() == "}") { - //this is the only way to know we've reached the end at this level? - break; - } - - switch (tag_map[child->getName()]) { - case ast_tag_label: { - LabelStatement *label = new LabelStatement(child->getContent()); - result->push_back(label); - break; - } - case ast_tag_block: { - dmsg("while_handler::block\n"); - block_handler(child, &result->block); - break; - } - case ast_tag_op: { //this will be a compound statement?? - dmsg("while_handler::op\n"); - //need to consume consecutive children at this level to form a statement - switch (op_map[child->getContent()]) { - case kw_if: { - dmsg("while_handler::kw_if\n"); - If *_if = if_handler(++it); - if (_if) { - result->push_back(_if); - } - else { - } - break; - } - case kw_switch: { - dmsg("while_handler::kw_switch\n"); - Switch *sw = switch_handler(++it); - if (sw) { - result->push_back(sw); - } - else { - } - break; - } - case kw_while: { - //I hope this can never happend without first being in a nested - //otherwise the end iterator being passed in below will be the end of the - //outer while, not the inner while we are about to parse. - dmsg("while_handler::kw_while\n"); - While *w = while_handler(++it, end); - if (w) { - result->push_back(w); - } - else { - } - break; - } - case kw_return: -// block->block.push_back(return_handler(it)); - break; - default: - dmsg("while_handler no op_map match for %s\n", child->getContent().c_str()); - break; - } - break; - } - case ast_tag_statement: { - dmsg("while_handler::statement\n"); - Statement *s = statement_handler(child); - if (s) { - result->push_back(s); - } - else { - //error - } - break; - } - case ast_tag_syntax: - if (child->getContent() == ELSE) { - If *_if = dynamic_cast(result->back()); - if (_if) { - dmsg("while_handler appending else to previous if\n"); - Else *_else = else_handler(it); - if (_else) { - _if->_else = _else; - } - else { - } - } - else { - //error, we don't have an if statement to pair with the else - dmsg("Seeing else, bu previous is %s\n", typeid(result->back()).name()); - } - } - else if (child->getContent() == DO) { - dmsg("while_handler::statement\n"); - Statement *s = do_handler(it); - if (s) { - result->push_back(s); - } - else { - //error - } - } - break; - default: - break; - } - it++; - } - dmsg("while_handler out %d\n", --wcount); - return result; -} - -static Case *build_case(List::const_iterator &it, bool is_default = false) { - Case *result = new Case(is_default); - - if (!is_default) { - while (!is_const_color(*it)) { - it++; - } - result->label = (*it)->getContent(); - dmsg("case label is %s\n", result->label.c_str()); - - it++; - } - const Element *child = get_child(it); - while (child->getName() != "block" && child->getName() != "statement") { - it++; - child = get_child(it); - } - if (child->getName() == "block") { - block_handler(child, result); - } - else { //statement - result->block.push_back(statement_handler(child)); - } - it++; - - return result; -} - -static Switch *switch_handler(List::const_iterator &it) { - Switch *result = new Switch(); - const Element *child; - dmsg("building new switch\n"); - - it = find(it, LPAREN); - const string &open = getAttributeValue(*it, "open"); - it++; - - List::const_iterator end = find_match(it, RPAREN, open); - - result->cond = expr_handler(it, end); - - it = ++end; //resume after condition's close paren - - while (true) { - child = get_child(it); - if (child->getContent() == "}") { - break; - } - else if (child->getContent() == CASE) { - result->cases.push_back(build_case(it)); - } - else if (child->getContent() == DEFAULT) { - result->cases.push_back(build_case(it, true)); - result->cases.back()->is_default = true; - } - it++; - } - return result; -} - -static DoWhile *do_handler(List::const_iterator &it) { - DoWhile *result = new DoWhile(); - const Element *child; - dmsg("building new do/while\n"); - - child = get_child(it); - while (child->getName() != "block" && child->getName() != "statement") { - it++; - child = get_child(it); - } - if (child->getName() == "block") { - block_handler(child, &result->block); - } - else { //statement - result->block.push_back(statement_handler(child)); - } - - //find the condition - while (true) { - child = get_child(it); - if (child->getContent() == LPAREN) { - break; - } - it++; - } - const string &open = getAttributeValue(child, "open"); - it++; - - List::const_iterator end = find_match(it, RPAREN, open); - - result->cond = expr_handler(it, end); - - it = ++end; //resume after condition's close paren - - return result; -} - -static Else *else_handler(List::const_iterator &it) { - const Element *child = get_child(it); - Else *result = new Else(); - while (child->getName() != "block" && child->getName() != "statement") { - it++; - child = get_child(it); - } - if (child->getName() == "block") { - block_handler(child, &result->block); - } - else { //statement - result->block.push_back(statement_handler(child)); - } - dmsg("built an else - %p\n", result); - return result; -} - -static void block_handler(const Element *el, Block *block) { - static int bcount = 0; - dmsg("block_handler in %d\n", bcount++); - const List &children = el->getChildren(); - List::const_iterator it = children.begin(); - List::const_iterator end = children.end(); - while (it < end) { - const Element *child = get_child(it); - switch (tag_map[child->getName()]) { - case ast_tag_label: { - LabelStatement *label = new LabelStatement(child->getContent()); - block->push_back(label); - break; - } - case ast_tag_block: { - dmsg("block_handler::block\n"); - block_handler(child, block); - break; - } - case ast_tag_op: { //this will be a compound statement?? - dmsg("block_handler::op\n"); - //need to consume consecutive children at this level to form a statement - switch (op_map[child->getContent()]) { - case kw_if: { - dmsg("block_handler::kw_if\n"); - If *_if = if_handler(++it); - if (_if) { - block->push_back(_if); - } - else { - } - break; - } - case kw_switch: { - dmsg("block_handler::kw_switch\n"); - Switch *sw = switch_handler(++it); - if (sw) { - block->push_back(sw); - } - else { - } - break; - } - case kw_while: { - dmsg("block_handler::kw_while\n"); - While *w = while_handler(++it, end); - if (w) { - block->push_back(w); - } - else { - } - break; - } - case kw_return: -// block->block.push_back(return_handler(it)); - break; - default: - dmsg("no op_map match for %s\n", child->getContent().c_str()); - break; - } - break; - } - case ast_tag_statement: { - dmsg("block_handler::statement\n"); - Statement *s = statement_handler(child); - if (s) { - block->push_back(s); - } - else { - //error - } - break; - } - case ast_tag_syntax: - if (child->getContent() == ELSE) { - If *_if = dynamic_cast(block->back()); - if (_if) { - dmsg("appending else to previous if\n"); - Else *_else = else_handler(it); - if (_else) { - _if->_else = _else; - } - else { - } - } - else { - //error, we don't have an if statement to pair with the else - dmsg("Seeing else, bu previous is %s\n", typeid(block->back()).name()); - } - } - else if (child->getContent() == DO) { - dmsg("block_handler::statement\n"); - Statement *s = do_handler(it); - if (s) { - block->push_back(s); - } - else { - //error - } - } - break; - default: - break; - } - //it can be advance in some of the functions called above - if (it == end) { - break; - } - it++; - } - dmsg("block_handler out %d\n", --bcount); -} - -void init_maps(void) { - static bool maps_are_init = false; - if (!maps_are_init) { - maps_are_init = true; - - tag_map["syntax"] = ast_tag_syntax; - tag_map["break"] = ast_tag_break; - tag_map["funcproto"] = ast_tag_funcproto; - tag_map["vardecl"] = ast_tag_vardecl; - tag_map["return_type"] = ast_tag_return_type; - tag_map["type"] = ast_tag_type; - tag_map["variable"] = ast_tag_variable; - tag_map["block"] = ast_tag_block; - tag_map["statement"] = ast_tag_statement; - tag_map["funcname"] = ast_tag_funcname; - tag_map["op"] = ast_tag_op; - tag_map["label"] = ast_tag_label; - - op_map[IF] = kw_if; - op_map[SWITCH] = kw_switch; - op_map[WHILE] = kw_while; - op_map[RETURN] = kw_return; - op_map["="] = kw_assign; - - ops[LBRACE] = g_lbrace; - ops[RBRACE] = g_rbrace; - ops[LPAREN] = g_lparen; - ops[RPAREN] = g_rparen; - ops[LBRACKET] = g_lbracket; - ops[RBRACKET] = g_rbracket; - ops["&"] = g_and; - ops["|"] = g_or; - ops["^"] = g_xor; - ops["!"] = g_not; - ops["~"] = g_bnot; - ops["||"] = g_logical_or; - ops["&&"] = g_logical_and; - ops[COMMA] = g_comma; - ops[SEMICOLON] = g_semi; - ops["<<"] = g_lshift; - ops[">>"] = g_rshift; - ops["<"] = g_lt; - ops["<="] = g_lte; - ops[">"] = g_gt; - ops[">="] = g_gte; - ops["="] = g_assign; - ops["=="] = g_eq; - ops["!="] = g_ne; - ops["+"] = g_plus; - ops["-"] = g_minus; - ops["%"] = g_mod; - ops["/"] = g_div; - ops["*"] = g_star; - ops["+="] = g_plus_eq; - ops["-="] = g_minus_eq; - ops["*="] = g_star_eq; - ops["/="] = g_div_eq; - ops["%="] = g_mod_eq; - ops["&="] = g_and_eq; - ops["|="] = g_or_eq; - ops["^="] = g_xor_eq; - ops["<<="] = g_lshift_eq; - ops[">>="] = g_rshift_eq; - ops["++"] = g_plusplus; - ops["--"] = g_minusminus; - ops["?"] = g_qmark; - ops[COLON] = g_colon; - - binary_ops.insert("&"); - binary_ops.insert("|"); - binary_ops.insert("^"); - binary_ops.insert("||"); - binary_ops.insert("&&"); - binary_ops.insert("<<"); - binary_ops.insert(">>"); - binary_ops.insert("<"); - binary_ops.insert("<="); - binary_ops.insert(">"); - binary_ops.insert(">="); - binary_ops.insert("=="); - binary_ops.insert("!="); - binary_ops.insert("+"); - binary_ops.insert("-"); - binary_ops.insert("%"); - binary_ops.insert("/"); - binary_ops.insert("*"); - - unary_ops.insert("++"); - unary_ops.insert("--"); - unary_ops.insert("++ "); - unary_ops.insert("-- "); - unary_ops.insert("-"); - unary_ops.insert("!"); - unary_ops.insert("~"); - unary_ops.insert("*"); - unary_ops.insert("&"); - - reserved.insert(WHILE); - reserved.insert(DO); - reserved.insert(IF); - reserved.insert(ELSE); - reserved.insert(BREAK); - reserved.insert(DEFAULT); - reserved.insert(SWITCH); - reserved.insert(CASE); - reserved.insert(GOTO); - reserved.insert(RETURN); - reserved.insert("for"); - - reserved.insert("int"); - reserved.insert("bool"); - reserved.insert("char"); - reserved.insert("short"); - reserved.insert("long"); - reserved.insert("signed"); - reserved.insert("unsigned"); - reserved.insert("float"); - reserved.insert("double"); - reserved.insert("void"); - reserved.insert("NULL"); - - reserved.insert("uint8_t"); - reserved.insert("uint16_t"); - reserved.insert("uint32_t"); - reserved.insert("uint64_t"); - reserved.insert("int8_t"); - reserved.insert("int16_t"); - reserved.insert("int32_t"); - reserved.insert("int64_t"); - - type_map["uint1"] = "uint8_t"; - type_map["uint2"] = "uint16_t"; - type_map["uint4"] = "uint32_t"; - type_map["uint8"] = "uint64_t"; - type_map["int1"] = "int8_t"; - type_map["int2"] = "int16_t"; - type_map["int4"] = "int32_t"; - type_map["int8"] = "int64_t"; - type_map["float4"] = "float"; - type_map["float8"] = "double"; - type_map["xunknown1"] = "__uint8"; - type_map["xunknown2"] = "__uint16"; - type_map["xunknown4"] = "__uint32"; - type_map["xunknown8"] = "__uint64"; - } -} - -bool is_reserved(const string &word) { - return reserved.find(word) != reserved.end(); -} - -Function *func_from_xml(Element *func, uint64_t addr) { - init_maps(); - int num_decls = 0; - int num_blocks = 0; - if (func->getName() != "function") { - return NULL; - } - Function *result = new Function(addr); - bool have_proto = false; - const List &children = func->getChildren(); - for (List::const_iterator it = children.begin(); it < children.end(); it++) { - const Element *child = get_child(it); - if (!have_proto && child->getName() != "funcproto") { - continue; - } - have_proto = true; - switch (tag_map[child->getName()]) { - case ast_tag_funcproto: - funcproto_handler(child, result); - break; - case ast_tag_syntax: - break; - case ast_tag_vardecl: { - Statement *s = vardecl_handler(child); - if (s) { - result->block.push_back(s); - num_decls++; - } - else { - //error - } - break; - } - case ast_tag_block: - if (num_decls && !num_blocks) { - result->block.push_back(new EmptyStatement()); - } - block_handler(child, &result->block); - num_blocks++; - break; - default: - break; - } - } - return result; -} - -VarDecl *find_decl(Function *ast, const string &sword) { - vector &bk = ast->block.block; - vector &parms = ast->prototype.parameters; - - //Scan function parameters - for (vector::iterator i = parms.begin(); i != parms.end(); i++) { - VarDecl *decl = *i; - if (decl->var->name == sword) { - return decl; - } - } - - //Scan locals - for (vector::iterator i = bk.begin(); i != bk.end(); i++) { - VarDecl *decl = dynamic_cast(*i); - if (decl) { - if (decl->var->name == sword) { - return decl; - } - } - else { - break; - } - } - - return NULL; -} - -VarDecl *find_decl(Function *ast, int col, int line) { - vector &bk = ast->block.block; - vector &parms = ast->prototype.parameters; - - //Scan function parameters - for (vector::iterator i = parms.begin(); i != parms.end(); i++) { - VarDecl *decl = *i; - if (decl->col_start <= col && decl->col_end > col && decl->line_begin == line && decl->line_end == line) { - return decl; - } - } - - //Scan locals - for (vector::iterator i = bk.begin(); i != bk.end(); i++) { - VarDecl *decl = dynamic_cast(*i); - if (decl) { - if (decl->col_start <= col && decl->col_end > col && decl->line_begin == line && decl->line_end == line) { - return decl; - } - } - else { - break; - } - } - - return NULL; -} - +/* + Source for the blc IdaPro plugin + Copyright (c) 2019 Chris Eagle + Copyright (c) 2020 Alexander Pick + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., 59 Temple + Place, Suite 330, Boston, MA 02111-1307 USA + + Changelog: + ---------- + + Changes by Alexander Pick (alx@pwn.su) + + 2020-04-23 - colors and syntax highlightning added + +*/ + +#include +#include +#include +#include + +#include "xml.hh" +#include "ast.hh" +#include "ida_minimal.hh" +#include "plugin.hh" + +using std::map; +using std::set; + +//#define DEBUG_AST 1 + +#ifdef DEBUG_AST +#define dmsg(x, ...) msg(x, __VA_ARGS__) +#else +#define dmsg(x, ...) +#endif + +enum ast_tag_t { + ast_tag_null, + ast_tag_syntax, + ast_tag_break, + ast_tag_funcproto, + ast_tag_vardecl, + ast_tag_return_type, + ast_tag_type, + ast_tag_variable, + ast_tag_block, + ast_tag_statement, + ast_tag_funcname, + ast_tag_op, + ast_tag_label +}; + +enum op_keywords_t { + kw_null, + kw_if, + kw_switch, + kw_while, + kw_return, + kw_assign +}; + +static const string WHILE("while"); +static const string DO("do"); +static const string IF("if"); +static const string ELSE("else"); +static const string BREAK("break"); +static const string DEFAULT("default"); +static const string SWITCH("switch"); +static const string CASE("case"); +static const string GOTO("goto"); +static const string RETURN("return"); + +static const string LBRACE("{"); +static const string RBRACE("}"); +static const string LPAREN("("); +static const string RPAREN(")"); +static const string LBRACKET("["); +static const string RBRACKET("]"); +static const string COLON(":"); +static const string SEMICOLON(";"); +static const string COMMA(","); + +enum g_token { + g_null, + g_lbrace, + g_rbrace, + g_lparen, + g_rparen, + g_cond_close, + g_lbracket, + g_rbracket, + g_and, + g_xor, + g_or, + g_not, + g_bnot, + g_logical_and, + g_logical_or, + g_comma, + g_semi, + g_lshift, + g_rshift, + g_lt, + g_lte, + g_gt, + g_gte, + g_assign, + g_eq, + g_ne, + g_plus, + g_minus, + g_mod, + g_div, + g_star, + g_plus_eq, + g_minus_eq, + g_star_eq, + g_div_eq, + g_mod_eq, + g_and_eq, + g_or_eq, + g_xor_eq, + g_lshift_eq, + g_rshift_eq, + g_plusplus, + g_minusminus, + g_qmark, + g_colon, + + g_break, + g_keyword, + g_var, + g_funcname, + g_const, + g_type, + g_eol_cmt, + g_cmt_open, + g_cmt_close, + g_label, + g_symbol, + g_op +}; + +static map tag_map; +static map ops; +static set binary_ops; +static set unary_ops; +static map op_map; +static set reserved; +static map type_map; + +static const string empty_string(""); + +static void block_handler(const Element* el, Block* block); +//static Statement *inner_block(const Element *child); +static VarDecl* vardecl_handler(const Element* el); +static Statement* statement_handler(const Element* el); +static Expression* expr_handler(List::const_iterator& it, List::const_iterator& end, bool comma_ok = true); +static Switch* switch_handler(List::const_iterator& it); +static Else* else_handler(List::const_iterator& it); +static DoWhile* do_handler(List::const_iterator& it); + +static const string& getAttributeValue(const Element* el, const char* attr) { + int nattr = el->getNumAttributes(); + + for (int i = 0; i < nattr; i++) { + if (el->getAttributeName(i) == attr) { + return el->getAttributeValue(i); + } + } + return empty_string; +} + +// in some cases Ghidra uses and others , in all cases +// color="funcname" will be present +bool is_funcname_color(const Element* el) { + return getAttributeValue(el, "color") == "funcname"; +} + +bool is_keyword_color(const Element* el) { + return getAttributeValue(el, "color") == "keyword"; +} + +bool is_const_color(const Element* el) { + return getAttributeValue(el, "color") == "const"; +} + +bool is_global_color(const Element* el) { + return getAttributeValue(el, "color") == "global"; +} + +void escape_string(const string& str, string& escaped) { + for (string::const_iterator i = str.cbegin(); i != str.cend(); i++) { + switch (*i) { + case '\n': + escaped += "\\n"; + break; + case '\t': + escaped += "\\t"; + break; + case '\\': + escaped += "\\"; + break; + case '"': + escaped += "\\\""; + break; + default: + if (*i < 0x20 || *i == 0x7f) { + char buf[16]; + snprintf(buf, sizeof(buf), "\\x%02x", *i & 0xff); + escaped += buf; + } + else { + escaped.push_back(*i); + } + break; + } + } +} + +static const Element* find_child(const Element* el, const char* tag) { + const List& children = el->getChildren(); + for (List::const_iterator it = children.begin(); it != children.end(); it++) { + const Element* child = *it; + if (child->getName() == tag) { + return child; + } + } + return NULL; +} + +static const Element* get_child(List::const_iterator& it) { + const Element* child = *it; + dmsg("Processing %s/%s\n", child->getName().c_str(), child->getContent().c_str()); + return child; +} + +const char* debug_print(AstItem* exp) { + string l = exp->line; + exp->line.clear(); + exp->do_print(); + string r = exp->line; + exp->line = l; + return tag_remove(r.c_str()); +} + +AstItem::AstItem() : no_indent(false), no_semi(false), line_begin(1), +line_end(1), col_start(-1), col_end(-1), +color(COLOR_DEFAULT) {}; + +vector* AstItem::cfunc; +string AstItem::line; +size_t AstItem::indent; +size_t AstItem::line_index; + +void AstItem::flush(bool no_indent) { + string spaces; + if (!no_indent) { + spaces.append(indent, ' '); + } + if (cfunc) { + cfunc->push_back(spaces + line); + } + // dmsg("append: %s\n", cfunc->back().c_str()); + line.clear(); + line_index = 0; +} + +void AstItem::append(char ch, bool count) { + line.push_back(ch); + line_index += count ? 1 : 0; +} + +void AstItem::color_on(char tag) { + append(COLOR_ON, false); + append(tag, false); +} + +void AstItem::color_off(char tag) { + append(COLOR_OFF, false); + append(tag, false); +} + +void AstItem::append(const char* v) { + append(string(v)); +} + +void AstItem::append(const string& v) { + line.append(v); + line_index += v.length(); +} + +void AstItem::append_colored(char tag, const char* v) { + append_colored(tag, string(v)); +} + +void AstItem::append_colored(char tag, const string& v) { + color_on(tag); + append(v); + color_off(tag); +} + +void AstItem::print_in() { + if (cfunc) { + line_begin = cfunc->size(); + col_start = line_index; + } +} + +void AstItem::print_out() { + if (cfunc) { + line_end = cfunc->size(); + col_end = line_index; + } +} + +void AstItem::do_print() { + print_in(); + print(); + print_out(); +} + +void brace_print(AstItem& item, bool final_append = true) { + AstItem::append_colored(COLOR_SYMBOL, LBRACE); + AstItem::flush(); + AstItem::indent += 3; + item.do_print(); + AstItem::indent -= 3; + AstItem::append_colored(COLOR_SYMBOL, RBRACE); + if (final_append) { + AstItem::flush(); + } +} + +void Statement::print() { + line += ""; +} + +void Type::print() { + print(""); +} + +// type definitions +void Type::print(const string& var) { + bool need_space = true; + print_in(); + if (is_const) { + append_colored(COLOR_DNUM, "const "); // constant + } + append_colored(COLOR_SYMBOL, name); + if (ptr) { + if (!is_cast) { + append(' '); + } + line.append(ptr, '*'); + line_index += ptr; + need_space = false; + } + if (var.length() > 0) { + if (need_space) { + append(' '); + } + append_colored(COLOR_LIBNAME, var); //declarations in args and on top color + } + for (vector::iterator i = dims.begin(); i != dims.end(); i++) { + append_colored(COLOR_SYMBOL, LBRACKET); + if (*i) { + char buf[32]; + snprintf(buf, sizeof(buf), "%u", *i); + append_colored(COLOR_NUMBER, buf); + } + append_colored(COLOR_SYMBOL, RBRACKET); + } + print_out(); +} + +void Type::rename(const string& oldname, const string& newname) { + if (oldname == name) { + name = newname; + } +} + +void Expression::print() { + dmsg("Expression::print\n"); + append_colored(COLOR_LIBNAME, ""); +} + +//i.e. array indexes +void LiteralExpr::print() { + dmsg("LiteralExpr::print %s\n", val.c_str()); + append_colored(COLOR_DNUM, val); +} + +NameExpr::NameExpr(const string& var, bool _global) : name(var), global(_global) { + dmsg("NameExpr::NameExpr %s\n", name); + adjust_thunk_name(name); +} + +//process color for things like vars, params etc. +void NameExpr::print() { + dmsg("NameExpr::print %s\n", name.c_str()); + if (is_extern(name)) { + append_colored(COLOR_IMPNAME, name); + dmsg("COLOR_IMPNAME\n"); + } + else if (is_library_func(name)) { + append_colored(COLOR_MACRO, name); + dmsg("COLOR_MACRO\n"); + } + //for binaries with string names segments like macho + else if (is_string(name)) { + append_colored(COLOR_MACRO, name); + dmsg("COLOR_MACRO\n"); + //Todo: get string an display + } + else { + + //TODO: maybe use StringLiteral here? + string str = get_string(name); + + if (str != "") { + + append_colored(COLOR_DSTR, "\""); + string escaped; + escape_string(str, escaped); + append_colored(COLOR_DSTR, escaped); + append_colored(COLOR_DSTR, "\""); + + } + else { + append_colored(COLOR_LIBNAME, name); + } + + } +} + +void NameExpr::rename(const string& oldname, const string& newname) { + // dmsg("NameExpr::rename %s from %s to %s\n", name.c_str(), oldname.c_str(), newname.c_str()); + if (oldname == name) { + name = newname; + } +} + +// process function name colors +void FuncNameExpr::print() { + dmsg("FuncNameExpr::print %s\n", name.c_str()); + if (is_extern(name)) { + append_colored(COLOR_IMPNAME, name); + dmsg("FuncNameExpr COLOR_IMPNAME\n"); + } + else if (is_library_func(name)) { + append_colored(COLOR_MACRO, name); + dmsg("FuncNameExpr COLOR_MACRO\n"); + } + else { + append_colored(COLOR_KEYWORD, name); + dmsg("FuncNameExpr COLOR_KEYWORD\n"); + } +} + +void FuncNameExpr::rename(const string& oldname, const string& newname) { + if (oldname == name) { + name = newname; + } +} + +void LabelExpr::print() { + dmsg("LabelExpr::print %s\n", label.c_str()); + append(label); +} + +void LabelExpr::rename(const string& oldname, const string& newname) { + if (oldname == label) { + label = newname; + } +} + +void LabelStatement::print() { + dmsg("LabelStatement::print\n"); + line += label; + append_colored(COLOR_SYMBOL, COLON); +} + +void LabelStatement::rename(const string& oldname, const string& newname) { + if (oldname == label) { + label = newname; + } +} + +void GotoStatement::print() { + // dmsg("GotoStatement::print\n"); + append_colored(COLOR_KEYWORD, GOTO); + append(' '); + label->do_print(); +} + +void GotoStatement::rename(const string& oldname, const string& newname) { + label->rename(oldname, newname); +} + +void BreakStatement::print() { + dmsg("BreakStatement::print\n"); + append_colored(COLOR_KEYWORD, BREAK); +} + +ExprStatement::~ExprStatement() { + delete expr; +} + +void ExprStatement::print() { + dmsg("ExprStatement::print %s\n"); + expr->do_print(); +} + +void ExprStatement::rename(const string& oldname, const string& newname) { + expr->rename(oldname, newname); +} + +CommaExpr::~CommaExpr() { + delete lhs; + delete rhs; +} + +void CommaExpr::print() { + // dmsg("CommaExpr::print\n"); + lhs->do_print(); + append_colored(COLOR_SYMBOL, COMMA); + append(' '); + rhs->do_print(); +} + +void CommaExpr::rename(const string& oldname, const string& newname) { + lhs->rename(oldname, newname); + rhs->rename(oldname, newname); +} + +void BinaryExpr::print() { + dmsg("BinaryExpr::print %s\n", op.c_str()); + lhs->do_print(); + append(' '); + append_colored(COLOR_SYMBOL, op); + append(' '); + rhs->do_print(); +} + +void BinaryExpr::rename(const string& oldname, const string& newname) { + lhs->rename(oldname, newname); + rhs->rename(oldname, newname); +} + +void UnaryExpr::print() { + dmsg("UnaryExpr::print\n"); + append_colored(COLOR_SYMBOL, op); + expr->do_print(); +} + +void UnaryExpr::rename(const string& oldname, const string& newname) { + expr->rename(oldname, newname); +} + +CastExpr::CastExpr(const string& typ) { + type = new Type(typ); + type->is_cast = true; +} + +CastExpr::~CastExpr() { + delete type; +} + +void CastExpr::print() { + dmsg("CastExpr::print\n"); + type->do_print(); +} + +void CastExpr::rename(const string& oldname, const string& newname) { + type->rename(oldname, newname); +} + +void TypeCast::print() { + dmsg("TypeCast::print %s\n"); + type->do_print(); + expr->do_print(); +} + +void TypeCast::rename(const string& oldname, const string& newname) { + type->rename(oldname, newname); + expr->rename(oldname, newname); +} + +void IntegerLiteral::print() { + dmsg("IntegerLiteral::print %s\n", val.c_str()); + append_colored(COLOR_DNUM, val); +} + +uint64_t IntegerLiteral::get_value() { + return strtoull(val.c_str(), NULL, 0); +} + +void StringLiteral::print() { + dmsg("StringLiteral::print %s\n", val.c_str()); + append_colored(COLOR_DSTR, "\""); + string escaped; + escape_string(val, escaped); + append_colored(COLOR_DSTR, escaped); + append_colored(COLOR_DSTR, "\""); +} + +void CharExpr::print() { + dmsg("CharExpr::print %s\n", val.c_str()); + + append_colored(COLOR_CHAR, "'"); + append_colored(COLOR_CHAR, val); + append_colored(COLOR_CHAR, "'"); +} + +void ParenExpr::print() { + dmsg("ParenExpr::print\n"); + append_colored(COLOR_SYMBOL, LPAREN); + if (inner) { + inner->do_print(); + } + append_colored(COLOR_SYMBOL, RPAREN); +} + +void ParenExpr::rename(const string& oldname, const string& newname) { + if (inner) { + // dmsg("ParenExpr::rename from %s to %s\n", oldname.c_str(), newname.c_str()); + inner->rename(oldname, newname); + } +} + +void ArrayExpr::print() { + dmsg("ArrayExpr::print %s\n"); + array->do_print(); + append_colored(COLOR_SYMBOL, LBRACKET); + index->do_print(); + append_colored(COLOR_SYMBOL, RBRACKET); +} + +void ArrayExpr::rename(const string& oldname, const string& newname) { + array->rename(oldname, newname); + index->rename(oldname, newname); +} + +void Block::print() { + for (vector::iterator i = block.begin(); i != block.end(); i++) { + Statement* s = *i; + if (s) { + s->do_print(); + } + else { + dmsg("Attempting to print a NULL Statement\n"); + } + if (!s->no_semi) { + append_colored(COLOR_SYMBOL, SEMICOLON); + } + flush(s->no_indent); + } +} + +Block::~Block() { + for (vector::iterator i = block.begin(); i != block.end(); i++) { + delete* i; + } +} + +void Block::rename(const string& oldname, const string& newname) { + for (vector::iterator i = block.begin(); i != block.end(); i++) { + Statement* s = *i; + if (s) { + // dmsg("Block::rename from %s to %s\n", oldname.c_str(), newname.c_str()); + s->rename(oldname, newname); + } + } +} + +void VarDecl::print() { + dmsg("VarDecl::print %s\n", var->name.c_str()); + type->print(var->name); +} + +const string& VarDecl::getName() { + Expression* expr = var; + while (true) { + NameExpr* n = dynamic_cast(expr); + if (n) { + return n->name; + } + UnaryExpr* u = dynamic_cast(expr); + if (u) { + expr = u->expr; + continue; + } + ParenExpr* p = dynamic_cast(expr); + if (p) { + expr = p->inner; + continue; + } + ArrayExpr* a = dynamic_cast(expr); + if (a) { + expr = a->array; + continue; + } + dmsg("VarDecl unexpected expr type\n"); + return empty_string; + } +} + +void VarDecl::rename(const string& oldname, const string& newname) { + type->rename(oldname, newname); + var->rename(oldname, newname); + if (init) { + init->rename(oldname, newname); + } +} + +VarDecl::~VarDecl() { + delete type; + delete var; + delete init; +} + +void Funcproto::print() { + dmsg("Funcproto::print %s\n", name.c_str()); + + return_type->do_print(); + for (vector::iterator i = keywords.begin(); i != keywords.end(); i++) { + append(' '); + append(*i); + } + append(' '); + append_colored(COLOR_SYMBOL, name); + append_colored(COLOR_SYMBOL, LPAREN); + for (vector::iterator i = parameters.begin(); i != parameters.end(); i++) { + if (i != parameters.begin()) { + append_colored(COLOR_SYMBOL, COMMA); + append(' '); + } + (*i)->do_print(); + } + append_colored(COLOR_SYMBOL, RPAREN); +} + +void Funcproto::rename(const string& oldname, const string& newname) { + return_type->rename(oldname, newname); + if (oldname == name) { + name = newname; + } + for (vector::iterator i = parameters.begin(); i != parameters.end(); i++) { + (*i)->rename(oldname, newname); + } +} + +Funcproto::~Funcproto() { + delete return_type; + for (vector::iterator i = parameters.begin(); i != parameters.end(); i++) { + delete* i; + } +} + +CallExpr::~CallExpr() { + delete func; + delete args; +} + +void CallExpr::print() { + dmsg("CallExpr::print\n"); + func->do_print(); + args->do_print(); +} + +void CallExpr::rename(const string& oldname, const string& newname) { + // dmsg("CallExpr::rename from %s to %s\n", oldname.c_str(), newname.c_str()); + func->rename(oldname, newname); + args->rename(oldname, newname); +} + +void Else::print() { + append_colored(COLOR_KEYWORD, ELSE); + append(' '); + brace_print(block, false); +} + +void Else::rename(const string& oldname, const string& newname) { + block.rename(oldname, newname); +} + +void If::print() { + append_colored(COLOR_KEYWORD, IF); + append(' '); + append_colored(COLOR_SYMBOL, LPAREN); + cond->do_print(); + append_colored(COLOR_SYMBOL, RPAREN); + append(' '); + brace_print(block, _else != NULL); + if (_else) { + _else->do_print(); + } +} + +void If::rename(const string& oldname, const string& newname) { + ConditionalStatement::rename(oldname, newname); + if (_else) { + _else->rename(oldname, newname); + } +} + +void ConditionalStatement::rename(const string& oldname, const string& newname) { + // dmsg("ConditionalStatement::rename from %s to %s\n", oldname.c_str(), newname.c_str()); + cond->rename(oldname, newname); + block.rename(oldname, newname); +} + +void While::print() { + append_colored(COLOR_KEYWORD, WHILE); + append(' '); + append_colored(COLOR_SYMBOL, LPAREN); + cond->do_print(); + append_colored(COLOR_SYMBOL, RPAREN); + append(' '); + brace_print(block, false); +} + +void DoWhile::print() { + append_colored(COLOR_KEYWORD, DO); + append(' '); + brace_print(block, false); + append(' '); + append_colored(COLOR_KEYWORD, WHILE); + append(' '); + append_colored(COLOR_SYMBOL, LPAREN); + cond->do_print(); + append_colored(COLOR_SYMBOL, RPAREN); +} + +void Case::print() { + if (is_default) { + append_colored(COLOR_KEYWORD, DEFAULT); + } + else { + append_colored(COLOR_KEYWORD, CASE); + append(' '); + append(label); + } + append_colored(COLOR_SYMBOL, COLON); + flush(); + indent += 3; + Block::print(); + indent -= 3; +} + +void Switch::print() { + append_colored(COLOR_KEYWORD, SWITCH); + append(' '); + append_colored(COLOR_SYMBOL, LPAREN); + cond->do_print(); + append_colored(COLOR_SYMBOL, RPAREN); + append(' '); + append_colored(COLOR_SYMBOL, LBRACE); + flush(); + indent += 3; + for (vector::iterator i = cases.begin(); i != cases.end(); i++) { + (*i)->do_print(); + } + indent -= 3; + append_colored(COLOR_SYMBOL, "}"); +} + +void Switch::rename(const string& oldname, const string& newname) { + cond->rename(oldname, newname); + for (vector::iterator i = cases.begin(); i != cases.end(); i++) { + (*i)->rename(oldname, newname); + } +} + +void Return::print() { + append_colored(COLOR_KEYWORD, RETURN); + if (expr) { + append(' '); + expr->do_print(); + } +} + +void Return::rename(const string& oldname, const string& newname) { + if (expr) { + expr->rename(oldname, newname); + } +} + +AssignExpr::~AssignExpr() { + delete lval; + delete rval; +} + +void AssignExpr::print() { + dmsg("AssignExpr::print %s\n"); + + lval->do_print(); + append(' '); + append_colored(COLOR_SYMBOL, "="); + append(' '); + rval->do_print(); +} + +void AssignExpr::rename(const string& oldname, const string& newname) { + // dmsg("AssignExpr::rename from %s to %s\n", oldname.c_str(), newname.c_str()); + lval->rename(oldname, newname); + rval->rename(oldname, newname); +} + +void Ternary::print() { + dmsg("Ternary::print\n"); + + expr->do_print(); + append(' '); + append_colored(COLOR_SYMBOL, "?"); + append(' '); + _true->do_print(); + append(' '); + append_colored(COLOR_SYMBOL, COLON); + append(' '); + _false->do_print(); +} + +void Ternary::rename(const string& oldname, const string& newname) { + expr->rename(oldname, newname); + _true->rename(oldname, newname); + _false->rename(oldname, newname); +} + +void Function::print() { + dmsg("Function::print\n"); + + prototype.do_print(); + flush(); + brace_print(block); +} + +void Function::print(vector* cfunc) { + + dmsg("Function::print cfunc \n"); + line_index = 0; + AstItem::cfunc = cfunc; + line.clear(); + indent = 0; + + do_print(); + + line_index = 0; + AstItem::cfunc = NULL; + line.clear(); + indent = 0; +} + +void Function::rename(const string& oldname, const string& newname) { + prototype.rename(oldname, newname); + block.rename(oldname, newname); +} + +List::const_iterator find_match(List::const_iterator& it, const string& sym, const string& open) { + List::const_iterator res = it; + while ((*res)->getContent() != sym || getAttributeValue(*res, "close") != open) { + res++; + } + return res; +} + +List::const_iterator find(List::const_iterator& it, const string& sym) { + List::const_iterator res = it; + while ((*res)->getContent() != sym) { + res++; + } + return res; +} + +static bool is_const_expr(Expression* e, uint64_t* val) { + IntegerLiteral* num = dynamic_cast(e); + if (num) { + *val = num->get_value(); + dmsg("Found const expression (a) %s\n", num->val.c_str()); + return true; + } + NameExpr* ne = dynamic_cast(e); + if (ne) { + char* endptr; + const char* s = ne->name.c_str(); + *val = strtoull(s, &endptr, 0); + if (endptr != s && *endptr == 0) { + //name was a valid integer literal + dmsg("Found const expression (b) %s\n", s); + return true; + } + //not an int so see if name refers to const data + uint64_t addr; + if (ne->global && address_of(ne->name, &addr) && is_read_only(addr)) { + dmsg("Found const expression (d) %s\n", ne->name.c_str()); + if (get_value(addr, val)) { + return true; + } + } + return false; + } + UnaryExpr* ue = dynamic_cast(e); + if (ue) { + if (is_const_expr(ue->expr, val)) { + uint64_t old = *val; + if (ue->op == "-") { + *val = 0 - *val; + } + else if (ue->op == "~") { + *val = ~*val; + } + else if (ue->op == "!") { + *val = !*val; + } + else { + return false; + } + dmsg("Found const expression (c) %s0x%lx\n", ue->op.c_str(), old); + return true; + } + } + return false; +} + +static Expression* simplify_const(uint64_t cval) { + dmsg("simplify_const for 0x%lx\n", cval); + string val; + if (is_function_start(cval)) { + get_name(val, cval, 0); + return new FuncNameExpr(val); + } + if (get_string(cval, val)) { + dmsg("simplify_const became a string: %s\n", val.c_str()); + return new StringLiteral(val); + } + if (is_named_addr(cval, val)) { + return new UnaryExpr("&", new NameExpr(val)); + } + return NULL; +} + +const string& map_type(const string& type_name) { + if (type_map.find(type_name) != type_map.end()) { + return type_map[type_name]; + } + return type_name; +} + +static Type* type_handler(const Element* el) { + //map the type name here + const string& type_name = el->getContent(); + return new Type(map_type(type_name)); +} + +static Return* return_handler(List::const_iterator& it, List::const_iterator& end) { + Return* result = new Return(); + Expression* expr = expr_handler(it, end); + EmptyExpr* ee = dynamic_cast(expr); + if (ee) { + delete ee; + result->expr = NULL; + } + else { + result->expr = expr; + } + return result; +} + +static Statement* statement_handler(const Element* el) { + static int scount = 0; + dmsg("statement_handler in %d\n", scount++); + Statement* result = NULL; + Expression* lhs = NULL; + const List& children = el->getChildren(); + List::const_iterator end = children.end(); + for (List::const_iterator it = children.begin(); it < end; it++) { + const Element* child = get_child(it); + if (is_keyword_color(child) && child->getContent() == BREAK) { + return new BreakStatement(); + } + switch (tag_map[child->getName()]) { + case ast_tag_op: { + //need to consume consecutive children at this level to form a statement + switch (op_map[child->getContent()]) { + case kw_return: + result = return_handler(++it, end); + dmsg("statement_handler out(1) %d\n", --scount); + return result; + default: + dmsg("no op_map match for '%s' in statement\n", child->getContent().c_str()); + lhs = expr_handler(it, end); + dmsg("no op_map match for '%s' in statement result: %s\n", child->getContent().c_str(), debug_print(lhs)); + if (result) { + dmsg("oddly, result is '%s'\n", debug_print(result)); + } + break; + } + break; + } + case ast_tag_label: + result = new LabelStatement(child->getContent()); + dmsg("statement_handler out(2) %d - Label: %s\n", --scount, debug_print(result)); + return result; + case ast_tag_syntax: { + if (child->getContent() == GOTO) { + GotoStatement* g = new GotoStatement(); + g->label = expr_handler(++it, end); + dmsg("statement_handler out(3) %d\n", --scount); + return g; + } + else { + dmsg("no syntax match for '%s' in statement\n", child->getContent().c_str()); + dmsg("Trying to build an expression\n"); + Expression* expr = expr_handler(it, end); + if (expr) { + dmsg("statement_handler out(4) %d\n", --scount); + return new ExprStatement(expr); + } + } + break; + } + default: + dmsg("no tag_map match for %s in statement, trying expression\n", child->getName().c_str()); + + lhs = expr_handler(it, end); + break; + } + } + if (result == NULL) { + dmsg("statement_handler is returning NULL\n"); + if (lhs != NULL) { + result = new ExprStatement(lhs); + } + else { + dmsg("statement_handler has no result\n"); + } + } + else { + dmsg("returning from statement_handler -> %p\n", result); + + } + dmsg("statement_handler out(0) %d - %s\n", --scount, debug_print(result)); + return result; +} + +static VarDecl* vardecl_handler(const Element* el) { + bool in_dim = false; + const List& children = el->getChildren(); + VarDecl* result = new VarDecl(); + List::const_iterator it; + List::const_iterator end = children.end(); + for (it = children.begin(); result->init == NULL && it < end; it++) { + const Element* child = get_child(it); + switch (tag_map[child->getName()]) { + case ast_tag_type: + result->type = type_handler(child); + break; + case ast_tag_op: + if (child->getContent() == "*" && result->type) { + result->type->ptr++; + } + else if (child->getContent() == "=") { + result->init = expr_handler(++it, end); + } + break; + case ast_tag_variable: + result->var = new NameExpr(child->getContent()); + break; + case ast_tag_syntax: { + const string& content = child->getContent(); + if (content == "[") { + in_dim = true; + } + else if (content == "]") { + in_dim = false; + } + else if (in_dim && is_const_color(child) && result->type) { + result->type->dims.push_back(strtoul(child->getContent().c_str(), NULL, 0)); + } + break; + } + default: + break; + } + } + return result; +} + +static void funcproto_handler(const Element* el, Function* f) { + bool have_proto = false; + bool have_name = false; + const List& children = el->getChildren(); + for (List::const_iterator it = children.begin(); it < children.end(); it++) { + const Element* child = get_child(it); + if (have_proto && !have_name && is_keyword_color(child)) { + f->prototype.keywords.push_back(child->getContent()); + continue; + } + switch (tag_map[child->getName()]) { + case ast_tag_return_type: { + f->prototype.return_type = type_handler(find_child(child, "type")); + const List& rchildren = child->getChildren(); + for (List::const_iterator cit = rchildren.begin(); cit != rchildren.end(); cit++) { + const Element* e = *cit; + if (e->getName() == "op" && e->getContent() == "*") { + f->prototype.return_type->ptr++; + } + } + have_proto = true; + break; + } + case ast_tag_syntax: + break; + case ast_tag_vardecl: { + VarDecl* d = vardecl_handler(child); + if (d) { + f->prototype.parameters.push_back(d); + } + else { + //error + } + break; + } + case ast_tag_funcname: + f->prototype.name = child->getContent(); + have_name = true; + break; + default: + break; + } + } +} + +static CastExpr* cast_handler(List::const_iterator& it, List::const_iterator& end) { + dmsg("Entering cast_handler\n"); + const Element* child = get_child(it); + const string& type_name = child->getContent(); + CastExpr* result = new CastExpr(map_type(type_name)); //map type name change here + while (++it < end) { + child = get_child(it); + if (child->getName() == "op") { + if (child->getContent() == "*") { + result->type->ptr++; + } + else { + dmsg("cast_handler unknown op: %s\n", child->getContent().c_str()); + } + } + else if (child->getName() == "syntax") { + if (child->getContent() == RPAREN) { + dmsg("Leaving cast_handler (1) - %p\n", result); + it--; + return result; + } + else { + dmsg("cast_handler unknown syntax: %s\n", child->getContent().c_str()); + } + } + else { + dmsg("cast_handler unknown tag: %s\n", child->getName().c_str()); + } + } + dmsg("Leaving cast_handler (2) - %p\n", result); + return result; +} + +Expression* make_name(const string& name, bool global) { + //add checks to see if name is a const, then convert to the const + //or whether name is a static string, then convert to quoted string + return new NameExpr(name, global); +} + +Expression* make_variable(const Element* var) { + Expression* result = NULL; + const string& text = var->getContent(); + + //add checks to see if name is a const, then convert to the const + //or whether name is a static string, then convert to quoted string + + if (is_const_color(var)) { + char* end; + uint64_t val = strtoull(text.c_str(), &end, 0); + if (*end == 0) { + dmsg("numeric literal: %s\n", text.c_str()); + result = new IntegerLiteral(text); + Expression* e = simplify_const(val); + if (e) { + delete result; + result = e; + } + } + else { + dmsg("other literal: %s\n", text.c_str()); + result = new LiteralExpr(text); + } + } + else if (is_global_color(var)) { + uint64_t addr; + if (address_of(text, &addr) && is_read_only(addr) && !is_function_start(addr)) { + //try to dereference this? + dmsg("const global: %s\n", text.c_str()); + result = make_name(text, true); + } + else { + result = make_name(text, true); + } + } + else { + result = make_name(text, false); + } + return result; +} + +static Expression* make_unary(const string& op, List::const_iterator& it, List::const_iterator& end) { + UnaryExpr* u = new UnaryExpr(op, expr_handler(it, end, false)); + if (op == "*") { + NameExpr* n = dynamic_cast(u->expr); + if (n) { + dmsg("made unary expr: %s%s\n", op.c_str(), n->name.c_str()); + string new_name; + if (simplify_deref(n->name, new_name)) { + return make_name(new_name, n->global); + delete u; + } + } + } + return u; +} + +static Expression* make_binary(const string& op, Expression* lhs, List::const_iterator& it, List::const_iterator& end) { + uint64_t v1; + uint64_t v2; + BinaryExpr* b = new BinaryExpr(op, lhs, expr_handler(it, end, false)); + if (op == "+") { + if (is_const_expr(b->lhs, &v1) && is_const_expr(b->rhs, &v2)) { + Expression* e = simplify_const(v1 + v2); + if (e) { + delete b; + return e; + } + } + } + else if (op == "-") { + if (is_const_expr(b->lhs, &v1) && is_const_expr(b->rhs, &v2)) { + Expression* e = simplify_const(v1 - v2); + if (e) { + delete b; + return e; + } + } + } + return b; +} + +static Expression* expr_handler(List::const_iterator& it, List::const_iterator& end, bool comma_ok) { + static int ecount = 0; + Expression* result = NULL; + const string* open = NULL; + ParenExpr* p = NULL; + dmsg("expr_handler in %d\n", ecount++); + for (; it < end; it++) { + const Element* child = get_child(it); + if (is_funcname_color(child)) { + dmsg("expr_handler op building CallExpr(%s)\n", child->getContent()); + + FuncNameExpr* fne = new FuncNameExpr(child->getContent()); + CallExpr* call = new CallExpr(fne, expr_handler(++it, end)); + + dmsg("expr_handler fne name %s\n", fne->name); + dmsg("expr_handler op built CallExpr - %s\n", debug_print(call)); + dmsg("expr_handler out(3) %d\n", --ecount); + + return call; + } + switch (tag_map[child->getName()]) { + case ast_tag_variable: + result = make_variable(child); + break; + case ast_tag_type: { + CastExpr* cast = cast_handler(it, end); + result = cast; + dmsg("expr_handler out(1) %d\n", --ecount); + return result; + } + case ast_tag_label: + result = new LabelExpr(child->getContent()); + dmsg("expr_handler out(2) %d\n", --ecount); + return result; + case ast_tag_funcname: { + dmsg("expr_handler tag building CallExpr(2)\n"); + CallExpr* call = new CallExpr(new FuncNameExpr(child->getContent()), expr_handler(++it, end)); + + dmsg("expr_handler tag built CallExpr - %s\n", debug_print(call)); + dmsg("expr_handler out(3) %d\n", --ecount); + return call; + } + case ast_tag_statement: { + dmsg("expr_handler tag building statement\n"); + Statement* s = statement_handler(child); + ExprStatement* e = dynamic_cast(s); + if (e) { + //take ownership of the sub-expression; + result = e->expr; + e->expr = NULL; + delete e; + } + else { + dmsg("Expected ExprStatement but didn't get one\n"); + delete s; + } + } + case ast_tag_syntax: { + const string& op = child->getContent(); + if (is_const_color(child)) { + dmsg("expr_handler syntax building LiteralExpr for %s\n", op.c_str()); + result = new LiteralExpr(op); + dmsg(" %s\n", debug_print(result)); + } + else if (unary_ops.find(op) != unary_ops.end() && result == NULL) { + dmsg("expr_handler syntax building UnaryExpr for %s\n", op.c_str()); + //result = new UnaryExpr(op, expr_handler(++it, end)); + result = make_unary(op, ++it, end); + dmsg(" %s\n", debug_print(result)); + } + else if (binary_ops.find(op) != binary_ops.end() && result != NULL) { + dmsg("expr_handler syntax building BinaryExpr for %s\n", op.c_str()); + result = make_binary(op, result, ++it, end); + // result = new BinaryExpr(op, result, expr_handler(++it, end)); + dmsg(" %s\n", debug_print(result)); + } + else { + switch (ops[op]) { + case g_null: + dmsg("expr_handler syntax op is g_null for '%s'\n", op.c_str()); + if (op.length() > 0) { + char* end; + uint64_t val = strtoull(op.c_str(), &end, 0); + if (*end == 0) { + result = new IntegerLiteral(op); + } + else { + result = new LiteralExpr(op); + } + } + break; + case g_lparen: { + //recurse into expr, we now have a parenthized expression + open = &getAttributeValue(child, "open"); + dmsg("expr_handler syntax building ParenExpr for %s\n", open->c_str()); + p = new ParenExpr(expr_handler(++it, end, true)); + it++; //increment past the close ) + dmsg(" ParenExpr(%s): %s\n", open->c_str(), debug_print(p)); + CastExpr* c = dynamic_cast(p->inner); + if (c) { + result = new TypeCast(p, expr_handler(++it, end)); + dmsg(" TypeCast(%s): %s\n", open->c_str(), debug_print(result)); + } + else { + if (result != NULL) { + //This looks more like a fucntion call then + //test special case for function name + ParenExpr* rp = dynamic_cast(result); + if (rp) { + NameExpr* rn = dynamic_cast(rp->inner); + if (rn) { + Expression* ne = make_name(rn->name, rn->global); + delete rp; + result = ne; + dmsg(" ne: %s\n", debug_print(result)); + } + } + result = new CallExpr(result, p); + dmsg(" Looks like function call: %s\n", debug_print(result)); + } + else { + result = p; + } + } + break; + } + case g_rparen: { + const string& close = getAttributeValue(child, "close"); + dmsg("expr_handler rolling back rparen at level %d\n", ecount - 1); + it--; + + dmsg("expr_handler terminating on rparen %s\n", close.c_str()); + dmsg("expr_handler out(5) %d - %p - %s\n", --ecount, result, typeid(result).name()); + return result; + } + case g_lbracket: { + //recurse into expr, we now have a parenthized expression + Expression* index = expr_handler(++it, end); + result = new ArrayExpr(result, index); + break; + } + case g_rbracket: { + // this is probably unmatched, so return what we have to caller + dmsg("expr_handler terminating on rbracket\n"); + const string& close = getAttributeValue(child, "close"); + dmsg("expr_handler out(6) %d - %p\n", --ecount, result); + return result; + } + case g_comma: { //never get here ?? + Expression* rhs = expr_handler(++it, end); + result = new CommaExpr(result, rhs); + dmsg("expr_handler comma out(7) %d - %p\n", --ecount, result); + return result; + } + case g_semi: { + dmsg("expr_handler terminating on semicolon\n"); + dmsg("expr_handler out(8) %d - %p\n", --ecount, result); + return result; + } + case g_assign: { + dmsg("expr_handler terminating on assign\n"); + dmsg("expr_handler out(9) %d - %p\n", --ecount, result); + return result; + } + } + } + break; + } + case ast_tag_op: { + const string& op = child->getContent(); + if (unary_ops.find(op) != unary_ops.end() && result == NULL) { + dmsg("expr_handler op building UnaryExpr\n"); + //result = new UnaryExpr(op, expr_handler(++it, end)); + result = make_unary(op, ++it, end); + dmsg("expr_handler op built UnaryExpr(%p) for %s\n", result, op.c_str()); + dmsg(" %s\n", debug_print(result)); + } + else if (binary_ops.find(op) != binary_ops.end() && result != NULL) { + dmsg("expr_handler op building BinaryExpr\n"); + result = make_binary(op, result, ++it, end); + // result = new BinaryExpr(op, result, expr_handler(++it, end)); + dmsg("expr_handler op building BinaryExpr(%p) for %s\n", result, op.c_str()); + dmsg(" %s\n", debug_print(result)); + //dmsg("expr_handler out %d(10) - %p\n", --ecount, result); + //return result; + } + else { + switch (ops[op]) { + case g_null: + dmsg("expr_handler op op is g_null\n"); + break; + case g_assign: { + dmsg("expr_handler op building AssignExpr\n"); + Expression* rhs = expr_handler(++it, end); + result = new AssignExpr(result, rhs); + dmsg("expr_handler op built AssignExpr(%p) - %s\n", result, debug_print(result)); + dmsg("expr_handler out(11) %d\n", --ecount); + return result; + } + case g_comma: { //comma always shows up as an op? + if (comma_ok) { + Expression* rhs = expr_handler(++it, end); + result = new CommaExpr(result, rhs); + dmsg("expr_handler out(12) %d\n", --ecount); + } + else { + it--; + dmsg("expr_handler out(12.5) %d\n", --ecount); + } + return result; + } + default: + dmsg("expr_handler no case for op/%s\n", op.c_str()); + break; + } + } + break; + } + default: + dmsg("expr_handler unhandled tag_map: %s(%s)\n", child->getName().c_str(), child->getContent().c_str()); + break; + } + //it can be advanced in some of the functions called above + if (it == end) { + break; + } + } + + dmsg("expr_handler out(side loop) %d - %p\n", --ecount, result); + if (result == NULL) { + return new EmptyExpr(); + } + else { + dmsg(" returning: %s\n", debug_print(result)); + } + return result; +} + +static void conditional_common(ConditionalStatement* cs, List::const_iterator& it) { + const Element* child; + + it = find(it, LPAREN); + const string& open = getAttributeValue(*it, "open"); + it++; + + List::const_iterator end = find_match(it, RPAREN, open); + + cs->cond = expr_handler(it, end); + + it = ++end; //resume after condition's close paren + + child = get_child(it); + while (child->getName() != "block" && child->getName() != "statement") { + it++; + child = get_child(it); + } + if (child->getName() == "block") { + block_handler(child, &cs->block); + } + else { //statement + cs->block.push_back(statement_handler(child)); + } +} + +static If* if_handler(List::const_iterator& it) { + If* result = new If(); + dmsg("building new if\n"); + + conditional_common(result, it); + + //don't try to handle else here + //check for else in main handler + + return result; +} + +static While* while_handler(List::const_iterator& it, List::const_iterator& end) { + static int wcount = 0; + While* result = new While(); + dmsg("building new while - %d\n", wcount++); + + //ghidra in its infinite wisdom does not place the body of + //a while inside of a tag, but MAYBE, a block enclose + //everything from 'while(...) {...}' + + it = find(it, LPAREN); + const string& open = getAttributeValue(*it, "open"); + it++; + + List::const_iterator cend = find_match(it, RPAREN, open); + + result->cond = expr_handler(it, cend); + + it = ++cend; //resume after condition's close paren + + //let's hope they at least brace everything + it = find(it, LBRACE); + it++; + + //this is basically the same as a block_handler loop + //without the benefit of knowing where the end of the child + //list is + while (it < end) { + //we're not inside a block so we don't have a defined end + //point for child iteration + const Element* child = get_child(it); + if (child->getContent() == "}") { + //this is the only way to know we've reached the end at this level? + break; + } + + switch (tag_map[child->getName()]) { + case ast_tag_label: { + LabelStatement* label = new LabelStatement(child->getContent()); + result->push_back(label); + break; + } + case ast_tag_block: { + dmsg("while_handler::block\n"); + block_handler(child, &result->block); + break; + } + case ast_tag_op: { //this will be a compound statement?? + dmsg("while_handler::op\n"); + //need to consume consecutive children at this level to form a statement + switch (op_map[child->getContent()]) { + case kw_if: { + dmsg("while_handler::kw_if\n"); + If* _if = if_handler(++it); + if (_if) { + result->push_back(_if); + } + else { + } + break; + } + case kw_switch: { + dmsg("while_handler::kw_switch\n"); + Switch* sw = switch_handler(++it); + if (sw) { + result->push_back(sw); + } + else { + } + break; + } + case kw_while: { + //I hope this can never happend without first being in a nested + //otherwise the end iterator being passed in below will be the end of the + //outer while, not the inner while we are about to parse. + dmsg("while_handler::kw_while\n"); + While* w = while_handler(++it, end); + if (w) { + result->push_back(w); + } + else { + } + break; + } + case kw_return: + // block->block.push_back(return_handler(it)); + break; + default: + dmsg("while_handler no op_map match for %s\n", child->getContent().c_str()); + break; + } + break; + } + case ast_tag_statement: { + dmsg("while_handler::statement\n"); + Statement* s = statement_handler(child); + if (s) { + result->push_back(s); + } + else { + //error + } + break; + } + case ast_tag_syntax: + if (child->getContent() == ELSE) { + If* _if = dynamic_cast(result->back()); + if (_if) { + dmsg("while_handler appending else to previous if\n"); + Else* _else = else_handler(it); + if (_else) { + _if->_else = _else; + } + else { + } + } + else { + //error, we don't have an if statement to pair with the else + dmsg("Seeing else, bu previous is %s\n", typeid(result->back()).name()); + } + } + else if (child->getContent() == DO) { + dmsg("while_handler::statement\n"); + Statement* s = do_handler(it); + if (s) { + result->push_back(s); + } + else { + //error + } + } + break; + default: + break; + } + it++; + } + dmsg("while_handler out %d\n", --wcount); + return result; +} + +static Case* build_case(List::const_iterator& it, bool is_default = false) { + Case* result = new Case(is_default); + + if (!is_default) { + while (!is_const_color(*it)) { + it++; + } + result->label = (*it)->getContent(); + dmsg("case label is %s\n", result->label.c_str()); + + it++; + } + const Element* child = get_child(it); + while (child->getName() != "block" && child->getName() != "statement") { + it++; + child = get_child(it); + } + if (child->getName() == "block") { + block_handler(child, result); + } + else { //statement + result->block.push_back(statement_handler(child)); + } + it++; + + return result; +} + +static Switch* switch_handler(List::const_iterator& it) { + Switch* result = new Switch(); + const Element* child; + dmsg("building new switch\n"); + + it = find(it, LPAREN); + const string& open = getAttributeValue(*it, "open"); + it++; + + List::const_iterator end = find_match(it, RPAREN, open); + + result->cond = expr_handler(it, end); + + it = ++end; //resume after condition's close paren + + while (true) { + child = get_child(it); + if (child->getContent() == "}") { + break; + } + else if (child->getContent() == CASE) { + result->cases.push_back(build_case(it)); + } + else if (child->getContent() == DEFAULT) { + result->cases.push_back(build_case(it, true)); + result->cases.back()->is_default = true; + } + it++; + } + return result; +} + +static DoWhile* do_handler(List::const_iterator& it) { + DoWhile* result = new DoWhile(); + const Element* child; + dmsg("building new do/while\n"); + + child = get_child(it); + while (child->getName() != "block" && child->getName() != "statement") { + it++; + child = get_child(it); + } + if (child->getName() == "block") { + block_handler(child, &result->block); + } + else { //statement + result->block.push_back(statement_handler(child)); + } + + //find the condition + while (true) { + child = get_child(it); + if (child->getContent() == LPAREN) { + break; + } + it++; + } + const string& open = getAttributeValue(child, "open"); + it++; + + List::const_iterator end = find_match(it, RPAREN, open); + + result->cond = expr_handler(it, end); + + it = ++end; //resume after condition's close paren + + return result; +} + +static Else* else_handler(List::const_iterator& it) { + const Element* child = get_child(it); + Else* result = new Else(); + while (child->getName() != "block" && child->getName() != "statement") { + it++; + child = get_child(it); + } + if (child->getName() == "block") { + block_handler(child, &result->block); + } + else { //statement + result->block.push_back(statement_handler(child)); + } + dmsg("built an else - %p\n", result); + return result; +} + +static void block_handler(const Element* el, Block* block) { + static int bcount = 0; + dmsg("block_handler in %d\n", bcount++); + const List& children = el->getChildren(); + List::const_iterator it = children.begin(); + List::const_iterator end = children.end(); + while (it < end) { + const Element* child = get_child(it); + switch (tag_map[child->getName()]) { + case ast_tag_label: { + LabelStatement* label = new LabelStatement(child->getContent()); + block->push_back(label); + break; + } + case ast_tag_block: { + dmsg("block_handler::block\n"); + block_handler(child, block); + break; + } + case ast_tag_op: { //this will be a compound statement?? + dmsg("block_handler::op\n"); + //need to consume consecutive children at this level to form a statement + switch (op_map[child->getContent()]) { + case kw_if: { + dmsg("block_handler::kw_if\n"); + If* _if = if_handler(++it); + if (_if) { + block->push_back(_if); + } + else { + } + break; + } + case kw_switch: { + dmsg("block_handler::kw_switch\n"); + Switch* sw = switch_handler(++it); + if (sw) { + block->push_back(sw); + } + else { + } + break; + } + case kw_while: { + dmsg("block_handler::kw_while\n"); + While* w = while_handler(++it, end); + if (w) { + block->push_back(w); + } + else { + } + break; + } + case kw_return: + // block->block.push_back(return_handler(it)); + break; + default: + dmsg("no op_map match for %s\n", child->getContent().c_str()); + break; + } + break; + } + case ast_tag_statement: { + dmsg("block_handler::statement\n"); + Statement* s = statement_handler(child); + if (s) { + block->push_back(s); + } + else { + //error + } + break; + } + case ast_tag_syntax: + if (child->getContent() == ELSE) { + If* _if = dynamic_cast(block->back()); + if (_if) { + dmsg("appending else to previous if\n"); + Else* _else = else_handler(it); + if (_else) { + _if->_else = _else; + } + else { + } + } + else { + //error, we don't have an if statement to pair with the else + dmsg("Seeing else, bu previous is %s\n", typeid(block->back()).name()); + } + } + else if (child->getContent() == DO) { + dmsg("block_handler::statement\n"); + Statement* s = do_handler(it); + if (s) { + block->push_back(s); + } + else { + //error + } + } + break; + default: + break; + } + //it can be advance in some of the functions called above + if (it == end) { + break; + } + it++; + } + dmsg("block_handler out %d\n", --bcount); +} + +void init_maps(void) { + static bool maps_are_init = false; + if (!maps_are_init) { + maps_are_init = true; + + tag_map["syntax"] = ast_tag_syntax; + tag_map["break"] = ast_tag_break; + tag_map["funcproto"] = ast_tag_funcproto; + tag_map["vardecl"] = ast_tag_vardecl; + tag_map["return_type"] = ast_tag_return_type; + tag_map["type"] = ast_tag_type; + tag_map["variable"] = ast_tag_variable; + tag_map["block"] = ast_tag_block; + tag_map["statement"] = ast_tag_statement; + tag_map["funcname"] = ast_tag_funcname; + tag_map["op"] = ast_tag_op; + tag_map["label"] = ast_tag_label; + + op_map[IF] = kw_if; + op_map[SWITCH] = kw_switch; + op_map[WHILE] = kw_while; + op_map[RETURN] = kw_return; + op_map["="] = kw_assign; + + ops[LBRACE] = g_lbrace; + ops[RBRACE] = g_rbrace; + ops[LPAREN] = g_lparen; + ops[RPAREN] = g_rparen; + ops[LBRACKET] = g_lbracket; + ops[RBRACKET] = g_rbracket; + ops["&"] = g_and; + ops["|"] = g_or; + ops["^"] = g_xor; + ops["!"] = g_not; + ops["~"] = g_bnot; + ops["||"] = g_logical_or; + ops["&&"] = g_logical_and; + ops[COMMA] = g_comma; + ops[SEMICOLON] = g_semi; + ops["<<"] = g_lshift; + ops[">>"] = g_rshift; + ops["<"] = g_lt; + ops["<="] = g_lte; + ops[">"] = g_gt; + ops[">="] = g_gte; + ops["="] = g_assign; + ops["=="] = g_eq; + ops["!="] = g_ne; + ops["+"] = g_plus; + ops["-"] = g_minus; + ops["%"] = g_mod; + ops["/"] = g_div; + ops["*"] = g_star; + ops["+="] = g_plus_eq; + ops["-="] = g_minus_eq; + ops["*="] = g_star_eq; + ops["/="] = g_div_eq; + ops["%="] = g_mod_eq; + ops["&="] = g_and_eq; + ops["|="] = g_or_eq; + ops["^="] = g_xor_eq; + ops["<<="] = g_lshift_eq; + ops[">>="] = g_rshift_eq; + ops["++"] = g_plusplus; + ops["--"] = g_minusminus; + ops["?"] = g_qmark; + ops[COLON] = g_colon; + + binary_ops.insert("&"); + binary_ops.insert("|"); + binary_ops.insert("^"); + binary_ops.insert("||"); + binary_ops.insert("&&"); + binary_ops.insert("<<"); + binary_ops.insert(">>"); + binary_ops.insert("<"); + binary_ops.insert("<="); + binary_ops.insert(">"); + binary_ops.insert(">="); + binary_ops.insert("=="); + binary_ops.insert("!="); + binary_ops.insert("+"); + binary_ops.insert("-"); + binary_ops.insert("%"); + binary_ops.insert("/"); + binary_ops.insert("*"); + + unary_ops.insert("++"); + unary_ops.insert("--"); + unary_ops.insert("++ "); + unary_ops.insert("-- "); + unary_ops.insert("-"); + unary_ops.insert("!"); + unary_ops.insert("~"); + unary_ops.insert("*"); + unary_ops.insert("&"); + + reserved.insert(WHILE); + reserved.insert(DO); + reserved.insert(IF); + reserved.insert(ELSE); + reserved.insert(BREAK); + reserved.insert(DEFAULT); + reserved.insert(SWITCH); + reserved.insert(CASE); + reserved.insert(GOTO); + reserved.insert(RETURN); + reserved.insert("for"); + + reserved.insert("int"); + reserved.insert("bool"); + reserved.insert("char"); + reserved.insert("short"); + reserved.insert("long"); + reserved.insert("signed"); + reserved.insert("unsigned"); + reserved.insert("float"); + reserved.insert("double"); + reserved.insert("void"); + reserved.insert("NULL"); + + reserved.insert("uint8_t"); + reserved.insert("uint16_t"); + reserved.insert("uint32_t"); + reserved.insert("uint64_t"); + reserved.insert("int8_t"); + reserved.insert("int16_t"); + reserved.insert("int32_t"); + reserved.insert("int64_t"); + + type_map["uint1"] = "uint8_t"; + type_map["uint2"] = "uint16_t"; + type_map["uint4"] = "uint32_t"; + type_map["uint8"] = "uint64_t"; + type_map["int1"] = "int8_t"; + type_map["int2"] = "int16_t"; + type_map["int4"] = "int32_t"; + type_map["int8"] = "int64_t"; + type_map["float4"] = "float"; + type_map["float8"] = "double"; + type_map["xunknown1"] = "__uint8"; + type_map["xunknown2"] = "__uint16"; + type_map["xunknown4"] = "__uint32"; + type_map["xunknown8"] = "__uint64"; + } +} + +bool is_reserved(const string& word) { + return reserved.find(word) != reserved.end(); +} + +Function* func_from_xml(Element* func, uint64_t addr) { + init_maps(); + int num_decls = 0; + int num_blocks = 0; + if (func->getName() != "function") { + return NULL; + } + Function* result = new Function(addr); + bool have_proto = false; + const List& children = func->getChildren(); + for (List::const_iterator it = children.begin(); it < children.end(); it++) { + const Element* child = get_child(it); + if (!have_proto && child->getName() != "funcproto") { + continue; + } + have_proto = true; + switch (tag_map[child->getName()]) { + case ast_tag_funcproto: + funcproto_handler(child, result); + break; + case ast_tag_syntax: + break; + case ast_tag_vardecl: { + Statement* s = vardecl_handler(child); + if (s) { + result->block.push_back(s); + num_decls++; + } + else { + //error + } + break; + } + case ast_tag_block: + if (num_decls && !num_blocks) { + result->block.push_back(new EmptyStatement()); + } + block_handler(child, &result->block); + num_blocks++; + break; + default: + break; + } + } + return result; +} + +VarDecl* find_decl(Function* ast, const string& sword) { + vector& bk = ast->block.block; + vector& parms = ast->prototype.parameters; + + //Scan function parameters + for (vector::iterator i = parms.begin(); i != parms.end(); i++) { + VarDecl* decl = *i; + if (decl->var->name == sword) { + return decl; + } + } + + //Scan locals + for (vector::iterator i = bk.begin(); i != bk.end(); i++) { + VarDecl* decl = dynamic_cast(*i); + if (decl) { + if (decl->var->name == sword) { + return decl; + } + } + else { + break; + } + } + + return NULL; +} + +VarDecl* find_decl(Function* ast, int col, int line) { + vector& bk = ast->block.block; + vector& parms = ast->prototype.parameters; + + //Scan function parameters + for (vector::iterator i = parms.begin(); i != parms.end(); i++) { + VarDecl* decl = *i; + if (decl->col_start <= col && decl->col_end > col&& decl->line_begin == line && decl->line_end == line) { + return decl; + } + } + + //Scan locals + for (vector::iterator i = bk.begin(); i != bk.end(); i++) { + VarDecl* decl = dynamic_cast(*i); + if (decl) { + if (decl->col_start <= col && decl->col_end > col&& decl->line_begin == line && decl->line_end == line) { + return decl; + } + } + else { + break; + } + } + + return NULL; +} + diff --git a/bins/linux/ida70/blc.so b/bins/linux/ida70/blc.so deleted file mode 100755 index 2be36e8..0000000 Binary files a/bins/linux/ida70/blc.so and /dev/null differ diff --git a/bins/linux/ida70/blc64.so b/bins/linux/ida70/blc64.so deleted file mode 100755 index e450c9e..0000000 Binary files a/bins/linux/ida70/blc64.so and /dev/null differ diff --git a/bins/linux/ida71/blc.so b/bins/linux/ida71/blc.so deleted file mode 100755 index 2be36e8..0000000 Binary files a/bins/linux/ida71/blc.so and /dev/null differ diff --git a/bins/linux/ida71/blc64.so b/bins/linux/ida71/blc64.so deleted file mode 100755 index e450c9e..0000000 Binary files a/bins/linux/ida71/blc64.so and /dev/null differ diff --git a/bins/linux/ida72/blc.so b/bins/linux/ida72/blc.so deleted file mode 100755 index 760862d..0000000 Binary files a/bins/linux/ida72/blc.so and /dev/null differ diff --git a/bins/linux/ida72/blc64.so b/bins/linux/ida72/blc64.so deleted file mode 100755 index 8c9af98..0000000 Binary files a/bins/linux/ida72/blc64.so and /dev/null differ diff --git a/bins/linux/ida73/blc.so b/bins/linux/ida73/blc.so deleted file mode 100755 index 5ad7195..0000000 Binary files a/bins/linux/ida73/blc.so and /dev/null differ diff --git a/bins/linux/ida73/blc64.so b/bins/linux/ida73/blc64.so deleted file mode 100755 index cc27081..0000000 Binary files a/bins/linux/ida73/blc64.so and /dev/null differ diff --git a/bins/linux/ida74/blc.so b/bins/linux/ida74/blc.so deleted file mode 100755 index 5ad7195..0000000 Binary files a/bins/linux/ida74/blc.so and /dev/null differ diff --git a/bins/linux/ida74/blc64.so b/bins/linux/ida74/blc64.so deleted file mode 100755 index cc27081..0000000 Binary files a/bins/linux/ida74/blc64.so and /dev/null differ diff --git a/bins/linux/ida75/blc.so b/bins/linux/ida75/blc.so deleted file mode 100755 index a3406fc..0000000 Binary files a/bins/linux/ida75/blc.so and /dev/null differ diff --git a/bins/linux/ida75/blc64.so b/bins/linux/ida75/blc64.so deleted file mode 100755 index c6f292f..0000000 Binary files a/bins/linux/ida75/blc64.so and /dev/null differ diff --git a/bins/mac/ida70/blc.dylib b/bins/mac/ida70/blc.dylib deleted file mode 100755 index d46611e..0000000 Binary files a/bins/mac/ida70/blc.dylib and /dev/null differ diff --git a/bins/mac/ida70/blc64.dylib b/bins/mac/ida70/blc64.dylib deleted file mode 100755 index db01fd1..0000000 Binary files a/bins/mac/ida70/blc64.dylib and /dev/null differ diff --git a/bins/mac/ida71/blc.dylib b/bins/mac/ida71/blc.dylib deleted file mode 100755 index d46611e..0000000 Binary files a/bins/mac/ida71/blc.dylib and /dev/null differ diff --git a/bins/mac/ida71/blc64.dylib b/bins/mac/ida71/blc64.dylib deleted file mode 100755 index db01fd1..0000000 Binary files a/bins/mac/ida71/blc64.dylib and /dev/null differ diff --git a/bins/mac/ida72/blc.dylib b/bins/mac/ida72/blc.dylib deleted file mode 100755 index 020961a..0000000 Binary files a/bins/mac/ida72/blc.dylib and /dev/null differ diff --git a/bins/mac/ida72/blc64.dylib b/bins/mac/ida72/blc64.dylib deleted file mode 100755 index af5fbbd..0000000 Binary files a/bins/mac/ida72/blc64.dylib and /dev/null differ diff --git a/bins/mac/ida73/blc.dylib b/bins/mac/ida73/blc.dylib deleted file mode 100755 index 383f5e0..0000000 Binary files a/bins/mac/ida73/blc.dylib and /dev/null differ diff --git a/bins/mac/ida73/blc64.dylib b/bins/mac/ida73/blc64.dylib deleted file mode 100755 index e858aae..0000000 Binary files a/bins/mac/ida73/blc64.dylib and /dev/null differ diff --git a/bins/mac/ida74/blc.dylib b/bins/mac/ida74/blc.dylib deleted file mode 100755 index 383f5e0..0000000 Binary files a/bins/mac/ida74/blc.dylib and /dev/null differ diff --git a/bins/mac/ida74/blc64.dylib b/bins/mac/ida74/blc64.dylib deleted file mode 100755 index e858aae..0000000 Binary files a/bins/mac/ida74/blc64.dylib and /dev/null differ diff --git a/bins/mac/ida75/blc.dylib b/bins/mac/ida75/blc.dylib deleted file mode 100755 index afd6cb2..0000000 Binary files a/bins/mac/ida75/blc.dylib and /dev/null differ diff --git a/bins/mac/ida75/blc64.dylib b/bins/mac/ida75/blc64.dylib deleted file mode 100755 index d2e6b1b..0000000 Binary files a/bins/mac/ida75/blc64.dylib and /dev/null differ diff --git a/bins/win/ida70/blc.dll b/bins/win/ida70/blc.dll deleted file mode 100755 index 9a95b12..0000000 Binary files a/bins/win/ida70/blc.dll and /dev/null differ diff --git a/bins/win/ida70/blc64.dll b/bins/win/ida70/blc64.dll deleted file mode 100755 index 25adbc1..0000000 Binary files a/bins/win/ida70/blc64.dll and /dev/null differ diff --git a/bins/win/ida71/blc.dll b/bins/win/ida71/blc.dll deleted file mode 100755 index 0c04138..0000000 Binary files a/bins/win/ida71/blc.dll and /dev/null differ diff --git a/bins/win/ida71/blc64.dll b/bins/win/ida71/blc64.dll deleted file mode 100755 index 1e054b6..0000000 Binary files a/bins/win/ida71/blc64.dll and /dev/null differ diff --git a/bins/win/ida72/blc.dll b/bins/win/ida72/blc.dll deleted file mode 100755 index 4b6405e..0000000 Binary files a/bins/win/ida72/blc.dll and /dev/null differ diff --git a/bins/win/ida72/blc64.dll b/bins/win/ida72/blc64.dll deleted file mode 100755 index 0b0d2d1..0000000 Binary files a/bins/win/ida72/blc64.dll and /dev/null differ diff --git a/bins/win/ida73/blc.dll b/bins/win/ida73/blc.dll deleted file mode 100755 index 1395aaf..0000000 Binary files a/bins/win/ida73/blc.dll and /dev/null differ diff --git a/bins/win/ida73/blc64.dll b/bins/win/ida73/blc64.dll deleted file mode 100755 index 48c2cc7..0000000 Binary files a/bins/win/ida73/blc64.dll and /dev/null differ diff --git a/bins/win/ida74/blc.dll b/bins/win/ida74/blc.dll deleted file mode 100755 index 744ab81..0000000 Binary files a/bins/win/ida74/blc.dll and /dev/null differ diff --git a/bins/win/ida74/blc64.dll b/bins/win/ida74/blc64.dll deleted file mode 100755 index 15939b3..0000000 Binary files a/bins/win/ida74/blc64.dll and /dev/null differ diff --git a/bins/win/ida75/blc.dll b/bins/win/ida75/blc.dll deleted file mode 100755 index 8ee6289..0000000 Binary files a/bins/win/ida75/blc.dll and /dev/null differ diff --git a/bins/win/ida75/blc64.dll b/bins/win/ida75/blc64.dll deleted file mode 100755 index 2760ca5..0000000 Binary files a/bins/win/ida75/blc64.dll and /dev/null differ diff --git a/blc.sln b/blc.sln index d76776f..9d5f43c 100644 --- a/blc.sln +++ b/blc.sln @@ -1,7 +1,7 @@  Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio 15 -VisualStudioVersion = 15.0.28307.539 +# Visual Studio Version 16 +VisualStudioVersion = 16.0.29806.167 MinimumVisualStudioVersion = 10.0.40219.1 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "blc", "blc.vcxproj", "{8E7ACC53-0733-45E9-BB24-90D2E9D93988}" EndProject diff --git a/blc.vcxproj b/blc.vcxproj index 21f8988..ad50862 100644 --- a/blc.vcxproj +++ b/blc.vcxproj @@ -17,27 +17,27 @@ {8E7ACC53-0733-45E9-BB24-90D2E9D93988} blc - 10.0.17763.0 + 10.0 DynamicLibrary false MultiByte - v141 + v142 DynamicLibrary false MultiByte - v141 + v142 DynamicLibrary false MultiByte Static - v141 + v142 diff --git a/plugin.cc b/plugin.cc index 7245f51..7b497d7 100644 --- a/plugin.cc +++ b/plugin.cc @@ -1,1215 +1,1802 @@ -/* - Source for blc IdaPro plugin - Copyright (c) 2019 Chris Eagle - - This program is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the Free - Software Foundation; either version 2 of the License, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., 59 Temple - Place, Suite 330, Boston, MA 02111-1307 USA -*/ - -#ifndef USE_DANGEROUS_FUNCTIONS -#define USE_DANGEROUS_FUNCTIONS 1 -#endif // USE_DANGEROUS_FUNCTIONS - -#ifndef USE_STANDARD_FILE_FUNCTIONS -#define USE_STANDARD_FILE_FUNCTIONS -#endif - -#ifndef NO_OBSOLETE_FUNCS -#define NO_OBSOLETE_FUNCS -#endif - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -//#define DEBUG 1 - -#include "plugin.hh" -#include "ast.hh" - -#if defined(__NT__) // MS Windows -#define DIRSEP "\\" -#else -#define DIRSEP "/" -#endif - -using std::iostream; -using std::ifstream; -using std::istreambuf_iterator; -using std::map; -using std::set; - -struct LocalVar { - string ghidra_name; - string current_name; //current display name in disassembly display - ea_t offset; //offset into stack frame if stack var (BADADDR otherwise) - - LocalVar(const string &gname, const string &iname, ea_t _offset = BADADDR) : - ghidra_name(gname), current_name(iname), offset(_offset) {}; -}; - -struct Decompiled { - Function *ast; - func_t *ida_func; - strvec_t *sv; //text of the decompiled function displayed in a custom_viewer - map locals; - - Decompiled(Function *f, func_t *func) : ast(f), ida_func(func), sv(NULL) {}; - ~Decompiled(); - - void set_ud(strvec_t *ud); - strvec_t *get_ud() {return sv;}; -}; - -Decompiled::~Decompiled() { - delete ast; - for (map::iterator i = locals.begin(); i != locals.end(); i++) { - delete i->second; - } - delete sv; -} - -void Decompiled::set_ud(strvec_t *ud) { - delete sv; - sv = ud; -} - -void decompile_at(ea_t ea, TWidget *w = NULL); -int do_ida_rename(qstring &name, ea_t func); - -static map type_sizes; -static map > histories; -static map views; -static map function_map; -static set titles; - -arch_map_t arch_map; - -static string get_available_title() { - string title("A"); - while (titles.find(title) != titles.end()) { - int i = 0; - while (true) { - title[i] += 1; - if (title[i] > 'Z') { - title[i] = 'A'; - if (title.length() == i) { - title.push_back('A'); - break; - } - else { - i++; - } - } - else { - break; - } - } - } - return title; -} - -//--------------------------------------------------------------------------- -// get the word under the (keyboard or mouse) cursor -static bool get_current_word(TWidget *v, bool mouse, qstring &word, qstring *line) { - // query the cursor position - int x, y; - if (get_custom_viewer_place(v, mouse, &x, &y) == NULL) { - return false; - } - // query the line at the cursor - tag_remove(line, get_custom_viewer_curline(v, mouse)); - if (x >= line->length()) { - return false; - } - char *ptr = line->begin() + x; - char *end = ptr; - // find the end of the word - while ((qisalnum(*end) || *end == '_') && *end != '\0') { - end++; - } - - if (end == ptr) { - return false; - } - - // find the beginning of the word - while (ptr > line->begin() && (qisalnum(ptr[-1]) || ptr[-1] == '_')) { - ptr--; - } - if (!qisalpha(*ptr) && *ptr != '_') { - //starts with a digit - return false; - } - word = qstring(ptr, end - ptr); - return true; -} - -static bool navigate_to_word(TWidget *w, bool cursor) { - qstring word; - qstring line; - if (get_current_word(w, cursor, word, &line)) { - ea_t ea = get_name_ea(BADADDR, word.c_str()); - if (ea != BADADDR) { - if (is_function_start(ea) && !is_extern_addr(ea)) { - map >::iterator mi = histories.find(w); - if (mi == histories.end() || mi->second.size() == 0 || mi->second.back() != ea) { - histories[w].push_back(ea); - decompile_at(ea, w); - } - } - else { - jumpto(ea); - } - return true; - } - } - return false; -} - -//--------------------------------------------------------------------------- -// Keyboard callback -static bool idaapi ct_keyboard(TWidget *w, int key, int shift, void *ud) { - ea_t addr = 0; - if (shift == 0) { - strvec_t *sv = (strvec_t *)ud; - switch (key) { - case 'G': - if (ask_addr(&addr, "Jump address")) { - func_t *f = get_func(addr); - if (f) { - decompile_at(f->start_ea, w); - } - } - return true; - case 'N': { //rename the thing under the cursor - Decompiled *dec = function_map[w]; - qstring word; - qstring line; - bool refresh = false; - if (get_current_word(w, false, word, &line)) { - string sword(word.c_str()); -// msg("Try to rename: %s\n", word.c_str()); - if (!is_reserved(sword)) { //can't rename to a reserved word - qstring new_name(word); - map::iterator mi = dec->locals.find(sword); - if (mi != dec->locals.end()) { -// msg("%s is a local\n", word.c_str()); - LocalVar *lv = mi->second; - if (ask_str(&word, HIST_IDENT, "Please enter item name") && sword != word.c_str()) { - string newname(word.c_str()); - //need to make sure new name will be legal - if (is_reserved(newname) || dec->locals.find(newname) != dec->locals.end() || - get_name_ea(BADADDR, newname.c_str()) != BADADDR) { -// msg("rename fail 1\n"); - return true; - } - if (lv->offset != BADADDR) { //stack var -// msg("renaming a stack var %s to %s\n", sword.c_str(), word.c_str()); - if (set_member_name(get_frame(dec->ida_func), lv->offset, word.c_str())) { - lv->current_name = newname; - dec->locals.erase(sword); - dec->locals[newname] = lv; - dec->ast->rename(sword, newname); - refresh = true; - } - else { -// msg("set_member_name failed\n"); - } - } - else { //not stack var, reg var?? - qstring iname; - netnode nn(dec->ida_func->start_ea); -// msg("renaming a reg var %s to %s\n", sword.c_str(), word.c_str()); - lv->current_name = newname; - dec->locals.erase(sword); - dec->locals[word.c_str()] = lv; - dec->ast->rename(sword, word.c_str()); - nn.hashset(lv->ghidra_name.c_str(), word.c_str()); - refresh = true; - } - } - } - else if (do_ida_rename(new_name, dec->ida_func->start_ea) == 2) { - //renming a global - string snew_name(new_name.c_str()); - dec->ast->rename(sword, snew_name); -// msg("rename: %s -> %s\n", word.c_str(), new_name.c_str()); - refresh = true; - } - else { - } - } - } - if (refresh) { - vector code; - dec->ast->print(&code); - strvec_t *sv = new strvec_t(); - for (vector::iterator si = code.begin(); si != code.end(); si++) { - sv->push_back(simpleline_t(si->c_str())); - } - - callui(ui_custom_viewer_set_userdata, w, sv); - refresh_custom_viewer(w); - repaint_custom_viewer(w); - dec->set_ud(sv); - } - return true; - } - case 'Y': { //Set type for the thing under the cursor - Decompiled *dec = function_map[w]; //the ast for the function we are editing - qstring word; - qstring line; - if (get_current_word(w, false, word, &line)) { - //need to determine the thing being typed along with it's old type - //user may have selected the type name at a variable's declaration, - //or the user may have selected the variable name at its declaration - //or some place it is used, so we need to find the variable's declaration - //node in the ast (unless it's a global) so that we can change the Type - //node within the declaration node. - - int x = -1; - int y = -1; - - place_t *pl = get_custom_viewer_place(w, false, &x, &y); - tcc_place_type_t pt = get_viewer_place_type(w); - if (pl && pt == TCCPT_SIMPLELINE_PLACE) { - simpleline_place_t *slp = (simpleline_place_t*)pl; - y = slp->n; - } - else { - msg("Couldn't retrieve line number\n"); - return false; - } - - //indent doesn't get factored into ast x/y data - for (const char *cptr = line.c_str(); *cptr == ' '; cptr++) { - x--; - } - - string sword(word.c_str()); - map::iterator mi = dec->locals.find(sword); - VarDecl *decl = NULL; - if (mi != dec->locals.end()) { -// msg("Find decl by name (%s)\n", sword.c_str()); - decl = find_decl(dec->ast, sword); - } - else { -// msg("Find decl by x,y (%d,%d)\n", x, y); - decl = find_decl(dec->ast, x, y); - } - if (decl == NULL) { - //last chance - see if word refers to a global, then ask IDA its type - } - else { -// msg("You seem to be referring to this decl: %s on line %d col %d\n", decl->var->name.c_str(), decl->line_begin, decl->col_start); - } -#if 0 -//not ready yet - //need to get string representation of the decl (if type is known) to display to user - if (ask_str(&word, HIST_IDENT, "Please enter the type declaration")) { - //now we need to parse what the user entered to extract only type related info - //then determine whether the user entered a type known to ida, and if so - //update the ast to change the variable's type. If the variable is a stack variable, - //global variable, or function parameter, also change the type in IDA. - //If the type is for a register variable, then update the variable's type in a - //netnode (like the variable name map) - - //use parse_decl to parse user text into a type - //then will need to extract IDA's tinfo_t information back to an updated ast Type node - } -#endif - } - return true; - } - case IK_OEM_2: { // This is key that IDA associates with '/' - //Add eol comment on current line - int x, y; - if (get_custom_viewer_place(w, false, &x, &y) == NULL) { - return false; - } - msg("add comment on line %d\n", y); - return true; - } - case IK_ESCAPE: { - map >::iterator mi = histories.find(w); - if (mi != histories.end()) { - qvector &v = mi->second; - if (v.size() == 1) { - close_widget(w, WCLS_DONT_SAVE_SIZE | WCLS_CLOSE_LATER); - string t = views[w]; - views.erase(w); - delete function_map[w]; - function_map.erase(w); - titles.erase(t); - } - else { - v.pop_back(); - decompile_at(v.back(), w); - } - return true; - } - break; - } - case IK_RETURN: { //jump to symbol under cursor - return navigate_to_word(w, false); - } - default: -// msg("Detected key press: 0x%x\n", key); - break; - } - } - return false; -} - -static bool idaapi ct_dblclick(TWidget *cv, int shift, void *ud) { -// msg("Double clicked on: %s\n", word.c_str()); - return navigate_to_word(cv, true); -} - -static const custom_viewer_handlers_t handlers( - ct_keyboard, - NULL, // popup - NULL, // mouse_moved - NULL, // click - ct_dblclick, // dblclick - NULL, //ct_curpos, - NULL, // close - NULL, // help - NULL);// adjust_place - -string ghidra_dir; - -map proc_map; - -map return_reg_map; - -static const char *name_dialog; - -//get the format string for IDA's standard rename dialog -void find_ida_name_dialog() { - help_t i; - for (i = 0; ; i++) { - const char *hlp = itext(i); - const char *lf = strchr(hlp, '\n'); - if (lf != NULL) { - lf++; - if (strncmp("Rename address\n", lf, 15) == 0) { - name_dialog = hlp; -// msg("Found:\n%s\n", hlp); - break; - } - } - } -} - -// return -1 - name is not associated with a symbol -// return 0 - duplicate name -// return 1 - no change -// return 2 - name changed -// return 3 - new name, but couldn't change it -int do_ida_rename(qstring &name, ea_t func) { - ea_t name_ea = get_name_ea(func, name.c_str()); - if (name_ea == BADADDR) { - //somehow the original name is invalid -// msg("rename: %s has no addr\n", name.c_str()); - return -1; - } - qstring orig = name; - bool res = ask_str(&name, HIST_IDENT, "Please enter item name"); - if (res && name != orig) { - ea_t new_name_ea = get_name_ea(func, name.c_str()); - if (new_name_ea != BADADDR) { - //new name is same as existing name -// msg("rename: new name already in use\n", name.c_str()); - return 0; - } -// msg("Custom rename: %s at adddress 0x%zx\n", name.c_str(), name_ea); - res = set_name(name_ea, name.c_str()); - return res ? 2 : 3; - } -// msg("rename: no change\n"); - return 1; -} - -void init_ida_ghidra() { - const char *ghidra = getenv("GHIDRA_DIR"); - if (ghidra) { - ghidra_dir = ghidra; - } - else { - ghidra_dir = idadir("plugins"); - } -// find_ida_name_dialog(); - - arch_map[PLFM_MIPS] = mips_setup; - - proc_map[PLFM_6502] = "6502"; - proc_map[PLFM_68K] = "68000"; - proc_map[PLFM_6800] = "6805"; - //proc_map[PLFM_xxx] = "8048"; - proc_map[PLFM_8051] = "8051"; - //proc_map[PLFM_Z80] = "8085"; - proc_map[PLFM_ARM] = "ARM"; - //proc_map[PLFM_ARM] = "AARCH64"; - proc_map[PLFM_AVR] = "Atmel"; - proc_map[PLFM_CR16] = "CR16"; - proc_map[PLFM_DALVIK] = "Dalvik"; - proc_map[PLFM_JAVA] = "JVM"; - proc_map[PLFM_MIPS] = "MIPS"; - proc_map[PLFM_HPPA] = "pa-risc"; - proc_map[PLFM_PIC] = "PIC"; - proc_map[PLFM_PPC] = "PowerPC"; - proc_map[PLFM_SPARC] = "sparc"; - proc_map[PLFM_MSP430] = "TI_MSP430"; - proc_map[PLFM_TRICORE] = "tricore"; - proc_map[PLFM_386] = "x86"; - proc_map[PLFM_Z80] = "Z80"; - - return_reg_map[PLFM_6502] = "6502"; - return_reg_map[PLFM_68K] = "68000"; - return_reg_map[PLFM_6800] = "6805"; - //return_reg_map[PLFM_xxx] = "8048"; - return_reg_map[PLFM_8051] = "8051"; - //return_reg_map[PLFM_Z80] = "8085"; - return_reg_map[PLFM_ARM] = "r0:r0:r0:r0"; - //return_reg_map[PLFM_ARM] = "r0:r0:r0:r0"; - return_reg_map[PLFM_AVR] = "Atmel"; - return_reg_map[PLFM_CR16] = "CR16"; - return_reg_map[PLFM_DALVIK] = "Dalvik"; - return_reg_map[PLFM_JAVA] = "JVM"; - return_reg_map[PLFM_MIPS] = "v0:v0:v0:v0"; - return_reg_map[PLFM_HPPA] = "PA-RISC"; - return_reg_map[PLFM_PIC] = "PIC"; - return_reg_map[PLFM_PPC] = "PowerPC"; - return_reg_map[PLFM_SPARC] = "Sparc"; - return_reg_map[PLFM_MSP430] = "TI_MSP430"; - return_reg_map[PLFM_TRICORE] = "tricore"; - return_reg_map[PLFM_386] = "al:ax:eax:rax"; - return_reg_map[PLFM_Z80] = "Z80"; - - type_sizes["void"] = 1; - type_sizes["bool"] = 1; - type_sizes["uint1"] = 1; - type_sizes["uint2"] = 2; - type_sizes["uint4"] = 4; - type_sizes["uint8"] = 8; - type_sizes["int1"] = 1; - type_sizes["int2"] = 2; - type_sizes["int4"] = 4; - type_sizes["int8"] = 8; - type_sizes["float4"] = 4; - type_sizes["float8"] = 8; - type_sizes["float10"] = 10; - type_sizes["float16"] = 16; - type_sizes["xunknown1"] = 1; - type_sizes["xunknown2"] = 2; - type_sizes["xunknown4"] = 4; - type_sizes["xunknown8"] = 8; - type_sizes["code"] = 1; - type_sizes["char"] = 1; - type_sizes["wchar2"] = 2; - type_sizes["wchar4"] = 4; -} - -#if IDA_SDK_VERSION < 730 - -#define WOPN_DP_TAB WOPN_TAB - -bool inf_is_64bit() { - return inf.is_64bit(); -} - -bool inf_is_32bit() { - return inf.is_32bit(); -} - -void inf_get_cc(compiler_info_t *cc) { - *cc = inf.cc; -} - -bool inf_is_be() { - return inf.is_be(); -} - -filetype_t inf_get_filetype() { - return (filetype_t)inf.filetype; -} - -#endif - -int get_proc_id() { -#if IDA_SDK_VERSION < 750 - return ph.id; -#else - return PH.id; -#endif -} - -bool get_sleigh_id(string &sleigh) { - sleigh.clear(); - map::iterator proc = proc_map.find(get_proc_id()); - if (proc == proc_map.end()) { - return false; - } - compiler_info_t cc; - inf_get_cc(&cc); - bool is_64 = inf_is_64bit(); - bool is_be = inf_is_be(); - filetype_t ftype = inf_get_filetype(); - - sleigh = proc->second + (is_be ? ":BE" : ":LE"); - - switch (get_proc_id()) { - case PLFM_6502: - sleigh += ":16:default"; - break; - case PLFM_68K: - //options include "default" "MC68030" "MC68020" "Coldfire" - sleigh += ":32:default"; - break; - case PLFM_6800: - sleigh += ":8:default"; - break; - case PLFM_8051: - sleigh += ":16:default"; - break; - case PLFM_ARM: - //options include "v8" "v8T" "v8LEInstruction" "v7" "v7LEInstruction" "Cortex" - // "v6" "v5t" "v5" "v4t" "v4" "default" - if (is_64) { //AARCH64 - sleigh = "AARCH64"; - sleigh += (is_be ? ":BE:64:v8A" : ":LE:64:v8A"); - } - else { - sleigh += ":32:v7"; - } - break; - case PLFM_AVR: - sleigh += ":16:default"; - break; - case PLFM_CR16: - sleigh += ":16:default"; - break; - case PLFM_DALVIK: - sleigh += ":32:default"; - break; - case PLFM_JAVA: - sleigh += ":32:default"; - break; - case PLFM_MIPS: { - //options include "R6" "micro" "64-32addr" "micro64-32addr" "64-32R6addr" "default" - qstring abi; - if (get_abi_name(&abi) > 0 && abi.find("n32") == 0) { - sleigh += ":64:64-32addr"; - } - else { - sleigh += is_64 ? ":64:default" : ":32:default"; - } - break; - } - case PLFM_HPPA: - sleigh += ":32:default"; - break; - case PLFM_PIC: - break; - case PLFM_PPC: { - //options include "default" "64-32addr" "4xx" "MPC8270" "QUICC" "A2-32addr" - // "A2ALT-32addr" "A2ALT" "VLE-32addr" "VLEALT-32addr" - qstring abi; - if (get_abi_name(&abi) > 0 && abi.find("xbox") == 0) { - // ABI name is set to "xbox" for X360 PPC executables - sleigh += ":64:A2ALT-32addr"; - } - else { - sleigh += is_64 ? ":64:default" : ":32:default"; - } - break; - } - case PLFM_SPARC: - sleigh += is_64 ? ":64" : ":32"; - sleigh += ":default"; - break; - case PLFM_MSP430: - sleigh += ":16:default"; - break; - case PLFM_TRICORE: - sleigh += ":32:default"; - break; - case PLFM_386: - //options include "System Management Mode" "Real Mode" "Protected Mode" "default" - sleigh += is_64 ? ":64" : (inf_is_32bit() ? ":32" : ":16"); - if (sleigh.find(":16") != string::npos) { - sleigh += ":Real Mode"; - } - else { - sleigh += ":default"; - } - - if (cc.id == COMP_BC) { - sleigh += ":borlandcpp"; - } - else if (cc.id == COMP_MS) { - sleigh += ":windows"; - } - else if (cc.id == COMP_GNU) { - sleigh += ":gcc"; - } - break; - case PLFM_Z80: - break; - default: - return false; - } - msg("Using sleigh id: %s\n", sleigh.c_str()); - return true; -} - -void get_ida_bytes(uint8_t *buf, uint64_t size, uint64_t ea) { - get_bytes(buf, size, (ea_t)ea); -} - -bool does_func_return(void *func) { - func_t *f = (func_t*)func; - return func_does_return(f->start_ea); -} - -uint64_t get_func_start(void *func) { - func_t *f = (func_t*)func; - return f->start_ea; -} - -uint64_t get_func_start(uint64_t ea) { - func_t *f = get_func((ea_t)ea); - return f ? f->start_ea : BADADDR; -} - -uint64_t get_func_end(uint64_t ea) { - func_t *f = get_func((ea_t)ea); - return f ? f->end_ea : BADADDR; -} - -//Create a Ghidra to Ida name mapping for a single loval variable (including formal parameters) -void map_var_from_decl(Decompiled *dec, VarDecl *decl) { - Function *ast = dec->ast; - func_t *func = dec->ida_func; - struc_t *frame = get_frame(func); - ea_t ra = frame_off_retaddr(func); - const string gname = decl->getName(); - size_t stack = gname.find("Stack"); - LocalVar *lv = new LocalVar(gname, gname); //default current name will be ghidra name - if (stack != string::npos) { //if it's a stack var, change current to ida name - uint32_t stackoff = strtoul(&gname[stack + 5], NULL, 0); - member_t *var = get_member(frame, ra - stackoff); - lv->offset = ra - stackoff; - if (var) { //now we know there's an ida name assigned - qstring iname; - get_member_name(&iname, var->id); - ast->rename(gname, iname.c_str()); - dec->locals[iname.c_str()] = lv; - lv->current_name = iname.c_str(); - } - else { //ghidra says there's a variable here, let's name it in ida - //TODO - need to compute sizeof(decl) to properly create - // the new data member - qstring iname; - iname.sprnt("var_%X", stackoff - func->frregs); - if (add_struc_member(frame, iname.c_str(), ra - stackoff, byte_flag(), NULL, 1) == 0) { - ast->rename(gname, iname.c_str()); - dec->locals[iname.c_str()] = lv; - lv->current_name = iname.c_str(); - } - else { - dec->locals[gname] = lv; - } - } - } - else { //handle non-stack (register) local variables - netnode nn(dec->ida_func->start_ea); - qstring iname; - if (nn.hashstr(&iname, gname.c_str()) <= 0) { - //no existing mapping - dec->locals[gname] = lv; - } - else { - //we already have a mapping for this ghidra variable - ast->rename(gname, iname.c_str()); - dec->locals[iname.c_str()] = lv; - lv->current_name = iname.c_str(); - } - } -} - -void map_ghidra_to_ida(Decompiled *dec) { - Function *ast = dec->ast; - vector &bk = ast->block.block; - vector &parms = ast->prototype.parameters; - - //add mappings for formal parameter names - for (vector::iterator i = parms.begin(); i != parms.end(); i++) { - VarDecl *decl = *i; - map_var_from_decl(dec, decl); - } - - //add mappings for variable names - for (vector::iterator i = bk.begin(); i != bk.end(); i++) { - VarDecl *decl = dynamic_cast(*i); - if (decl) { - map_var_from_decl(dec, decl); - } - else { - break; - } - } -} - -void decompile_at(ea_t addr, TWidget *w) { - func_t *func = get_func(addr); - Function *ast = NULL; - if (func) { - int res = do_decompile(func->start_ea, func->end_ea, &ast); - if (ast) { -#ifdef DEBUG - msg("got a Functon tree!\n"); -#endif - Decompiled *dec = new Decompiled(ast, func); - - //now try to map ghidra stack variable names to ida stack variable names - msg("mapping ida names to ghidra names\n"); - map_ghidra_to_ida(dec); - - vector code; -#ifdef DEBUG - msg("Generating C code\n"); -#endif - dec->ast->print(&code); - -#ifdef DEBUG - msg("Displaying C code\n"); -#endif - strvec_t *sv = new strvec_t(); - dec->set_ud(sv); - for (vector::iterator si = code.begin(); si != code.end(); si++) { - sv->push_back(simpleline_t(si->c_str())); - } - - qstring func_name; - qstring fmt; - get_func_name(&func_name, func->start_ea); - string title = get_available_title(); - fmt.sprnt("Ghidra code - %s", title.c_str()); // make the suffix change with more windows - - simpleline_place_t s1; - simpleline_place_t s2((int)(sv->size() - 1)); - - if (w == NULL) { - w = create_custom_viewer(fmt.c_str(), &s1, &s2, - &s1, NULL, sv, &handlers, sv); - TWidget *code_view = create_code_viewer(w); - set_code_viewer_is_source(code_view); - display_widget(code_view, WOPN_DP_TAB); - histories[w].push_back(addr); - views[w] = title; - titles.insert(title); - } - else { - callui(ui_custom_viewer_set_userdata, w, sv); - refresh_custom_viewer(w); - repaint_custom_viewer(w); - delete function_map[w]; - } - function_map[w] = dec; - } -#ifdef DEBUG - msg("do_decompile returned: %d\n", res); -#endif - } - else { -#ifdef DEBUG - msg("do_decompile failed to return a function\n"); -#endif - } -} - -const char *tag_remove(const char *tagged) { - static qstring ll; - tag_remove(&ll, tagged); - return ll.c_str(); -} - -#if IDA_SDK_VERSION >= 750 - -struct blc_plugmod_t : public plugmod_t { - /// Invoke the plugin. - virtual bool idaapi run(size_t arg); - - /// Virtual destructor. - virtual ~blc_plugmod_t(); -}; - -plugmod_t *idaapi blc_init(void) { - //do ida related init - init_ida_ghidra(); - - if (ghidra_init()) { - return new blc_plugmod_t(); - } - else { - return NULL; - } -} - -blc_plugmod_t::~blc_plugmod_t(void) { - ghidra_term(); -} - -bool idaapi blc_plugmod_t::run(size_t /*arg*/) { - ea_t addr = get_screen_ea(); -#ifdef DEBUG - msg("decompile_at 0x%llx\n", (uint64_t)addr); -#endif - decompile_at(addr); - return true; -} - -#define blc_run NULL -#define blc_term NULL - -#else - -//make life easier in a post 7.5 world -#define PLUGIN_MULTI 0 - -int idaapi blc_init(void) { - //do ida related init - init_ida_ghidra(); - - if (ghidra_init()) { - return PLUGIN_KEEP; - } - else { - return PLUGIN_SKIP; - } -} - -void idaapi blc_term(void) { - ghidra_term(); -} - -bool idaapi blc_run(size_t /*arg*/) { - ea_t addr = get_screen_ea(); - decompile_at(addr); - return true; -} -#endif - -int64_t get_name(string &name, uint64_t ea, int flags) { - qstring ida_name; - int64_t res = get_name(&ida_name, (ea_t)ea, flags); - if (res > 0) { - name = ida_name.c_str(); - } - return res; -} - -int64_t get_func_name(string &name, uint64_t ea) { - qstring ida_name; - int64_t res = get_func_name(&ida_name, (ea_t)ea); - if (res > 0) { - name = ida_name.c_str(); - } - return res; -} - -bool is_function_start(uint64_t ea) { - func_t *f = get_func((ea_t)ea); - return f != NULL && f->start_ea == (ea_t)ea; -} - -void get_input_file_path(string &path) { - char buf[512]; - get_input_file_path(buf, sizeof(buf)); - path = buf; -} - -bool is_thumb_mode(uint64_t ea) { - return get_sreg((ea_t)ea, 20) == 1; -} - -//is ea a function internal jump target, if so -//return true and place its name in name -//else return false -bool is_code_label(uint64_t ea, string &name) { - xrefblk_t xr; - for (bool success = xr.first_to((ea_t)ea, XREF_ALL); success; success = xr.next_to()) { - if (xr.iscode == 0) { - break; - } - if (xr.type != fl_JN) { - continue; - } - qstring ida_name; - int64_t res = get_name(&ida_name, (ea_t)ea, GN_LOCAL); - if (res > 0) { - name = ida_name.c_str(); - return true; - } - } - return false; -} - -bool is_extern_addr(uint64_t ea) { - qstring sname; - segment_t *s = getseg(ea); - if (s) { - get_segm_name(&sname, s); - if (sname == "extern") { - return true; - } - } - return false; -} - -bool is_external_ref(uint64_t ea, uint64_t *fptr) { - ea_t got; - func_t *pfn = get_func((ea_t)ea); - if (pfn == NULL) { - return false; - } - if (is_extern_addr(pfn->start_ea)) { - if (fptr) { - *fptr = pfn->start_ea; - } - return true; - } - ea_t _export = calc_thunk_func_target(pfn, &got); - bool res = _export != BADADDR; - if (res) { - if (fptr) { - *fptr = got; - } - msg("0x%zx is external, with got entry at 0x%zx\n", ea, (size_t)got); - } - return res; -} - -bool is_extern(const string &name) { - bool res = false; - ea_t ea = get_name_ea(BADADDR, name.c_str()); - if (ea == BADADDR) { - return false; - } - if (is_function_start(ea)) { - res = is_external_ref(ea, NULL); - } - else { - res = is_extern_addr(ea); - } -// msg("is_extern called for %s (%d)\n", name.c_str(), res); - return res; -} - -bool address_of(const string &name, uint64_t *addr) { - bool res = false; - ea_t ea = get_name_ea(BADADDR, name.c_str()); - if (ea == BADADDR) { - return false; - } - *addr = ea; - return true; -} - -bool is_library_func(const string &name) { - bool res = false; - ea_t ea = get_name_ea(BADADDR, name.c_str()); - if (is_function_start(ea)) { - func_t *f = get_func(ea); - res = f ? (f->flags & FUNC_LIB) != 0 : false; - } - return res; -} - -bool is_named_addr(uint64_t ea, string &name) { - qstring res; - //a sanity check on ea - segment_t *s = getseg(0); - if (s != NULL && ea < s->end_ea) { - //ea falls in first segment of zero based binary - //this are generally headers and ea is probably - //not a pointer but instead just a small number - return false; - } - if (get_name(&res, (ea_t)ea) > 0) { - name = res.c_str(); - return true; - } - return false; -} - -bool is_pointer_var(uint64_t ea, uint32_t size, uint64_t *tgt) { - xrefblk_t xb; - if (xb.first_from(ea, XREF_DATA) && xb.type == dr_O) { - // xb.to - contains the referenced address - *tgt = xb.to; - return true; - } - return false; -} - -bool is_read_only(uint64_t ea) { - qstring sname; - segment_t *s = getseg(ea); - if (s) { - if ((s->perm & SEGPERM_WRITE) == 0) { - return true; - } - //not explicitly read only, so let's make some guesses - //based on the segment name - get_segm_name(&sname, s); - if (sname.find("got") <= 1) { - return true; - } - if (sname.find("rodata") <= 1) { - return true; - } - if (sname.find("rdata") <= 1) { - return true; - } - if (sname.find("idata") <= 1) { - return true; - } - if (sname.find("rel.ro") != qstring::npos) { - return true; - } - } - return false; -} - -bool simplify_deref(const string &name, string &new_name) { - uint64_t tgt; - ea_t addr = get_name_ea(BADADDR, name.c_str()); -#if IDA_SDK_VERSION < 750 - uint32_t max_ptr_size = (uint32_t)ph.max_ptr_size(); -#else - uint32_t max_ptr_size = (uint32_t)PH.max_ptr_size(); -#endif - if (addr != BADADDR && is_read_only(addr) && is_pointer_var(addr, max_ptr_size, &tgt)) { - if (get_name(new_name, tgt, 0)) { -// msg("could simplify *%s to %s\n", name.c_str(), new_name.c_str()); - return true; - } - } - return false; -} - -void adjust_thunk_name(string &name) { - ea_t ea = get_name_ea(BADADDR, name.c_str()); - if (is_function_start(ea)) { - func_t *f = get_func(ea); - ea_t fun = calc_thunk_func_target(f, &ea); - if (fun != BADADDR) { - qstring tname; - if (get_name(&tname, fun)) { - name = tname.c_str(); - } - } - } -} - -//TODO think about sign extension for values smaller than 8 bytes -bool get_value(uint64_t addr, uint64_t *val) { - flags_t f = get_full_flags(addr); - if (is_qword(f)) { - *val = get_qword(addr); - } - else if (is_dword(f)) { - *val = get_dword(addr); - } - else if (is_byte(f)) { - *val = get_byte(addr); - } - else if (is_word(f)) { - *val = get_word(addr); - } - else { - return false; - } - return true; -} - -bool get_string(uint64_t addr, string &str) { - qstring res; - flags_t f = get_full_flags(addr); - if (is_strlit(f)) { - get_strlit_contents(&res, addr, -1, STRTYPE_C); - str = res.c_str(); - return true; - } - else if (!is_data(f)) { - size_t maxlen = get_max_strlit_length(addr, STRTYPE_C); - if (maxlen > 4) { - create_strlit(addr, 0, STRTYPE_C); - get_strlit_contents(&res, addr, -1, STRTYPE_C); - str = res.c_str(); - return true; - } - } - return false; -} - -//-------------------------------------------------------------------------- -char comment[] = "Ghidra decompiler integration."; - -char help[] = "I have nothing to offer.\n"; - -char wanted_name[] = "Ghidra Decompiler"; - -char wanted_hotkey[] = "Alt-F3"; - -plugin_t PLUGIN = -{ - IDP_INTERFACE_VERSION, - PLUGIN_MULTI, // plugin flags - blc_init, // initialize - blc_term, // terminate. this pointer may be NULL. - blc_run, // invoke plugin - comment, // long comment about the plugin - // it could appear in the status line - // or as a hint - help, // multiline help about the plugin - wanted_name, // the preferred short name of the plugin - wanted_hotkey // the preferred hotkey to run the plugin -}; +/* + Source for blc IdaPro plugin + Copyright (c) 2019 Chris Eagle + Copyright (c) 2020 Alexander Pick + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., 59 Temple + Place, Suite 330, Boston, MA 02111-1307 USA + + Changelog: + ---------- + + Changes by Alexander Pick (alx@pwn.su) + + 2020-04-24 - fixed something in the externs recognition for iOS and other (XTRN) + - string recognition + 2020-04-27 - added new comment functionality + 2020-04-28 - IDA 7.5 compatibility + +*/ + +#ifndef USE_DANGEROUS_FUNCTIONS +#define USE_DANGEROUS_FUNCTIONS 1 +#endif // USE_DANGEROUS_FUNCTIONS + +#ifndef USE_STANDARD_FILE_FUNCTIONS +#define USE_STANDARD_FILE_FUNCTIONS +#endif + +#ifndef NO_OBSOLETE_FUNCS +#define NO_OBSOLETE_FUNCS +#endif + +#define __DEFINE_PH__ 1 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +//#define DEBUG 1 + +#include "plugin.hh" +#include "ast.hh" + +#if defined(__NT__) // MS Windows +#define DIRSEP "\\" +#else +#define DIRSEP "/" +#endif + +// debug flags +//#define DEBUG_PLUGIN 1 +//#define DEVFUNC 1 + +// enable support for additional procs: +// NEC850/RH850 - changed as NSA releases a own module in 9.2 valled V850 vs. v850 (previously) + +#define NEWPROCS 1 + +#ifdef DEBUG_PLUGIN +#define dmsg(x, ...) msg(x, __VA_ARGS__) +#else +#define dmsg(x, ...) +#endif + +using std::iostream; +using std::ifstream; +using std::istreambuf_iterator; +using std::map; +using std::set; + +//prefix for netnode nodes +#define NETNODEPRE "$ blc_" + +struct LocalVar { + string ghidra_name; + string current_name; //current display name in disassembly display + ea_t offset; //offset into stack frame if stack var (BADADDR otherwise) + + LocalVar(const string& gname, const string& iname, ea_t _offset = BADADDR) : + ghidra_name(gname), current_name(iname), offset(_offset) {}; +}; + +struct Decompiled { + Function* ast; + func_t* ida_func; + strvec_t* sv; //text of the decompiled function displayed in a custom_viewer + map locals; + + Decompiled(Function *f, func_t *func) : ast(f), ida_func(func), sv(NULL) {}; + ~Decompiled(); + + void set_ud(strvec_t *ud); + strvec_t *get_ud() {return sv;}; +}; + +Decompiled::~Decompiled() { + delete ast; + for (map::iterator i = locals.begin(); i != locals.end(); i++) { + delete i->second; + } + delete sv; +} + +void Decompiled::set_ud(strvec_t* ud) { + delete sv; + sv = ud; +} + +void decompile_at(ea_t ea, TWidget* w = NULL); +int do_ida_rename(qstring& name, ea_t func); + +static map type_sizes; +static map > histories; +static map views; +static map function_map; +static set titles; + +arch_map_t arch_map; + +string sleigh_id; + +static string get_available_title() { + string title("A"); + while (titles.find(title) != titles.end()) { + int i = 0; + while (true) { + title[i] += 1; + if (title[i] > 'Z') { + title[i] = 'A'; + if (title.length() == i) { + title.push_back('A'); + break; + } + else { + i++; + } + } + else { + break; + } + } + } + return title; +} + +//--------------------------------------------------------------------------- +// get the word under the (keyboard or mouse) cursor +static bool get_current_word(TWidget* v, bool mouse, qstring& word, qstring* line) { + // query the cursor position + int x, y; + if (get_custom_viewer_place(v, mouse, &x, &y) == NULL) { + dmsg("get_current_word: !get_custom_viewer_place()\n"); + return false; + } + // query the line at the cursor + tag_remove(line, get_custom_viewer_curline(v, mouse)); + if (x >= line->length()) { + dmsg("get_current_word: x >= line->length()\n"); + return false; + } + char* ptr = line->begin() + x; + + char* end = ptr; + char* next = ptr; + char* last = ptr; + + // find the end of the word + while ( + (qisalnum(*end) || *end == '_' || + (*end == ':' && ( + (*next) == ':') || ((*last) == ':') + ) + ) // added :: as part of the words for std:: etc. names + && *end != '\0') { + last = end; + end++; + next = end + 1; + } + + if (end == ptr) { + dmsg("get_current_word: end == ptr\n"); + return false; + } + + // find the beginning of the word + while (ptr > line->begin() && (qisalnum(ptr[-1]) || ptr[-1] == '_' || *end == ':')) { + ptr--; + } + if (!qisalpha(*ptr) && *ptr != '_') { + dmsg("get_current_word: starts with a digit or something else\n"); + // return false; + } + word = qstring(ptr, end - ptr); + + dmsg("get_current_word: %s\n", word.c_str()); + + return true; +} + +static bool navigate_to_word(TWidget* w, bool cursor) { + + qstring word; + + qstring line; + + if (get_current_word(w, cursor, word, &line)) { + + ea_t ea = get_name_ea(BADADDR, word.c_str()); + + if (ea != BADADDR) { + + if (is_function_start(ea) && !is_extern_addr(ea)) { + + map >::iterator mi = histories.find(w); + + if (mi == histories.end() || mi->second.size() == 0 || mi->second.back() != ea) { + histories[w].push_back(ea); + decompile_at(ea, w); + } + + } + else { + + jumpto(ea); + + } + return true; + } + } + return false; +} + +static void refresh_widget(TWidget* w) { + + Decompiled* dec = function_map[w]; + + qstring nodename = NETNODEPRE; + + string nodeappend = std::to_string((long)dec->ida_func->start_ea); + + nodename.append(nodeappend.c_str()); + + netnode cno(nodename.c_str()); + + vector code; + + dec->ast->print(&code); + + strvec_t* sv = new strvec_t(); + + int ci = 0; // lines start at 0 in IDA + + for (vector::iterator si = code.begin(); si != code.end(); si++) { + + qstring pline = si->c_str(); + + if (cno != BADNODE) { + + // get length of entry + int len = cno.supstr(ci, NULL, 0); + + if (len > 1) { + + //msg("length %i \n",len); + + //allocate a buffer of sufficient size + char* outstr = new char[len]; + + // get the comment at the current line number in iteration from superval + cno.supval(ci, outstr, len); + + // append it as comment + pline.append(" // "); + pline.append(outstr); + + + } + + sv->push_back(simpleline_t(pline)); + + } + else { + + sv->push_back(simpleline_t(pline)); + + } + + ci++; + } + + callui(ui_custom_viewer_set_userdata, w, sv); + + refresh_custom_viewer(w); + + repaint_custom_viewer(w); + + dec->set_ud(sv); + +} + +//get the line number in the current custom viewer +int get_custom_viewer_line_number(TWidget* w, int* x, int* y) { + + place_t* pl = get_custom_viewer_place(w, false, x, y); + tcc_place_type_t pt = get_viewer_place_type(w); + + if (pl && pt == TCCPT_SIMPLELINE_PLACE) { + + simpleline_place_t* slp = (simpleline_place_t*)pl; + + return slp->n; + + } + else { + msg("Couldn't retrieve line number\n"); + return false; + } +} + +//--------------------------------------------------------------------------- +// Keyboard callback +static bool idaapi ct_keyboard(TWidget* w, int key, int shift, void* ud) { + + ea_t addr = 0; + + if (shift == 0) { + + strvec_t* sv = (strvec_t*)ud; + + switch (key) { + +#if DEVFUNC + //Refresh decompile + case 0x52: { //R - If I define it as R it won't work for some reason... + + Decompiled* dec = function_map[w]; + + func_t* f = dec->ida_func; + + if (f) { + + msg("Re-Decompiled function at 0x%x.\n", f->start_ea); + + decompile_at(f->start_ea, w); + + refresh_widget(w); + } + + return true; + + } +#endif + // Open XRefs Window for focused function + case 'X': { + //view xrefs to function + qstring word; + qstring line; + + if (get_current_word(w, false, word, &line)) { + + qstring mname(word); + + ea_t name_ea = get_name_ea(BADADDR, word.c_str()); + + if (name_ea == BADADDR) { + //somehow the original name is invalid + dmsg("xref: %s has no addr\n", word.c_str()); + return -1; + } + + open_xrefs_window(name_ea); + + return true; + } + return true; + } + // Jump to address + case 'G': + if (ask_addr(&addr, "Jump address")) { + func_t* f = get_func(addr); + if (f) { + decompile_at(f->start_ea, w); + } + } + return true; + // rename the thing under the cursor + case 'N': { + Decompiled* dec = function_map[w]; + qstring word; + qstring line; + bool refresh = false; + if (get_current_word(w, false, word, &line)) { + string sword(word.c_str()); + dmsg("Try to rename: %s\n", word.c_str()); + if (!is_reserved(sword)) { // can't rename a reserved word + qstring new_name(word); + map::iterator mi = dec->locals.find(sword); + if (mi != dec->locals.end()) { + dmsg("%s is a local\n", word.c_str()); + LocalVar* lv = mi->second; + if (ask_str(&word, HIST_IDENT, "Please enter item name") && sword != word.c_str()) { + string newname(word.c_str()); + //need to make sure new name will be legal + if (is_reserved(newname) || dec->locals.find(newname) != dec->locals.end() || + get_name_ea(BADADDR, newname.c_str()) != BADADDR) { + msg("rename: \"newname\" is not a valid new name\n"); + return true; + } + if (lv->offset != BADADDR) { //stack var + dmsg("renaming a stack var %s to %s\n", sword.c_str(), word.c_str()); + if (set_member_name(get_frame(dec->ida_func), lv->offset, word.c_str())) { + lv->current_name = newname; + dec->locals.erase(sword); + dec->locals[newname] = lv; + dec->ast->rename(sword, newname); + refresh = true; + } + else { + dmsg("set_member_name failed\n"); + } + } + else { //not stack var, reg var?? + qstring iname; + netnode nn(dec->ida_func->start_ea); + dmsg("renaming a reg var %s to %s\n", sword.c_str(), word.c_str()); + lv->current_name = newname; + dec->locals.erase(sword); + dec->locals[word.c_str()] = lv; + dec->ast->rename(sword, word.c_str()); + nn.hashset(lv->ghidra_name.c_str(), word.c_str()); + refresh = true; + } + } + } + else { + // has an own ask dialog + int res = do_ida_rename(new_name, dec->ida_func->start_ea); + + if (res == 2) { + //renming a global + string snew_name(new_name.c_str()); + dec->ast->rename(sword, snew_name); + dmsg("rename: %s -> %s\n", word.c_str(), new_name.c_str()); + refresh = true; + } + else { + dmsg("rename: bad return code, res = %i\n", res); + } + + + } + } + + else { + + } + } + if (refresh) { + + Decompiled* dec = function_map[w]; + + func_t* f = dec->ida_func; + + decompile_at(f->start_ea, w); + + refresh_widget(w); + + dmsg("Refresh done\n"); + + } + return true; + } + //Set type for the thing under the cursor + case 'Y': { + Decompiled* dec = function_map[w]; //the ast for the function we are editing + qstring word; + qstring line; + if (get_current_word(w, false, word, &line)) { + //need to determine the thing being typed along with it's old type + //user may have selected the type name at a variable's declaration, + //or the user may have selected the variable name at its declaration + //or some place it is used, so we need to find the variable's declaration + //node in the ast (unless it's a global) so that we can change the Type + //node within the declaration node. + + int x = -1; + int y = -1; + + y = get_custom_viewer_line_number(w, &x, &y); + + //indent doesn't get factored into ast x/y data + for (const char* cptr = line.c_str(); *cptr == ' '; cptr++) { + x--; + } + + string sword(word.c_str()); + map::iterator mi = dec->locals.find(sword); + VarDecl* decl = NULL; + if (mi != dec->locals.end()) { + dmsg("Find decl by name (%s)\n", sword.c_str()); + decl = find_decl(dec->ast, sword); + } + else { + dmsg("Find decl by x,y (%d,%d)\n", x, y); + decl = find_decl(dec->ast, x, y); + } + if (decl == NULL) { + //last chance - see if word refers to a global, then ask IDA its type + } + else { + // msg("You seem to be referring to this decl: %s on line %d col %d\n", decl->var->name.c_str(), decl->line_begin, decl->col_start); + } +#if 0 +//not ready yet + //need to get string representation of the decl (if type is known) to display to user + if (ask_str(&word, HIST_IDENT, "Please enter the type declaration")) { + //now we need to parse what the user entered to extract only type related info + //then determine whether the user entered a type known to ida, and if so + //update the ast to change the variable's type. If the variable is a stack variable, + //global variable, or function parameter, also change the type in IDA. + //If the type is for a register variable, then update the variable's type in a + //netnode (like the variable name map) + + //use parse_decl to parse user text into a type + //then will need to extract IDA's tinfo_t information back to an updated ast Type node + } +#endif + } + return true; + } + // write a comment + case IK_DIVIDE: // on an short US keyboard you cannot add an comment, IK_OEM_2 is the other "/" + case IK_OEM_2: + case 'C': // alternate comment key + { + + //Add eol comment on current line + + //get current line number + int x = -1; + int y = -1; + + y = get_custom_viewer_line_number(w, &x, &y); + + if (y == NULL) { + return false; + } + dmsg("comment: x:%i y:%i\n", x, y); + + Decompiled* dec = function_map[w]; + + // node name is $ blc+ startoffset of function, this avoids issues with renaming + qstring nodename = NETNODEPRE; + + // generating netnode name and open that node + //thanks Chris for your book :-) - page 294 f. + + string nodeappend = std::to_string((long)dec->ida_func->start_ea); + + nodename.append(nodeappend.c_str()); + + netnode cno(nodename.c_str()); + + // read existing comment if any + + int len = cno.supstr(y, NULL, 0); + + qstring comment; + + if (len > 1) { + + char* obuf = new char[len]; //allocate a buffer of sufficient size + cno.supval(y, obuf, len); //extract data from the supval + + comment = obuf; + + } + + // sorry only, one line comments for now + // TODO: Allow multi lines + + if (ask_str(&comment, HIST_CMT, "Please enter your comment")) { + + // save comment to a netnode + // https://www.hex-rays.com/products/ida/support/sdkdoc/netnode_8hpp.html + + // check if node exists, if not create it + if (cno == BADNODE) { + cno.create(nodename.c_str()); + } + + //save comment at array index "linenumber" + cno.supset(y, comment.c_str()); + + refresh_widget(w); + + msg("Added comment \"%s\" on line %d\n", comment.c_str(), y); + } + + return true; + } + // back + case IK_ESCAPE: { + map >::iterator mi = histories.find(w); + if (mi != histories.end()) { + qvector& v = mi->second; + if (v.size() == 1) { + close_widget(w, WCLS_DONT_SAVE_SIZE | WCLS_CLOSE_LATER); + string t = views[w]; + views.erase(w); + delete function_map[w]; + function_map.erase(w); + titles.erase(t); + } + else { + v.pop_back(); + decompile_at(v.back(), w); + } + return true; + } + break; + } + // navigate to + case IK_RETURN: { //jump to symbol under cursor + return navigate_to_word(w, false); + } + default: + dmsg("Detected key press: 0x%x\n", key); + break; + } + } + return false; +} + + + +static bool idaapi ct_dblclick(TWidget* cv, int shift, void* ud) { + // msg("Double clicked on: %s\n", word.c_str()); + return navigate_to_word(cv, true); +} + +static const custom_viewer_handlers_t handlers( + ct_keyboard, + NULL, // popup + NULL, // mouse_moved + NULL, // click + ct_dblclick, // dblclick + NULL, //ct_curpos, + NULL, // close + NULL, // help + NULL);// adjust_place + +string ghidra_dir; + +map proc_map; + +map return_reg_map; + +int blc_init_old(void); + +static const char *name_dialog; + +#if IDA_SDK_VERSION >= 750 +plugmod_t* idaapi blc_init_new(void); +#elif IDA_SDK_VERSION > 740 +size_t idaapi blc_init_new(void); +#else +int idaapi blc_init_new(void); +#endif + +void idaapi blc_term(void); + +//get the format string for IDA's standard rename dialog +void find_ida_name_dialog() { + help_t i; + for (i = 0; ; i++) { + const char* hlp = itext(i); + const char* lf = strchr(hlp, '\n'); + if (lf != NULL) { + lf++; + if (strncmp("Rename address\n", lf, 15) == 0) { + name_dialog = hlp; + // msg("Found:\n%s\n", hlp); + break; + } + } + } +} + +// return -1 - name is not associated with a symbol +// return 0 - duplicate name +// return 1 - no change +// return 2 - name changed +// return 3 - new name, but couldn't change it +int do_ida_rename(qstring& name, ea_t func) { + ea_t name_ea = get_name_ea(func, name.c_str()); + if (name_ea == BADADDR) { + //somehow the original name is invalid + dmsg("rename: %s has no addr\n", name.c_str()); + return -1; + } + qstring orig = name; + bool res = ask_str(&name, HIST_IDENT, "Please enter item name"); + if (res && name != orig) { + ea_t new_name_ea = get_name_ea(func, name.c_str()); + if (new_name_ea != BADADDR) { + //new name is same as existing name + msg("rename: new name already in use\n", name.c_str()); + return 0; + } + // msg("Custom rename: %s at adddress 0x%zx\n", name.c_str(), name_ea); + res = set_name(name_ea, name.c_str()); + return res ? 2 : 3; + } + // msg("rename: no change\n"); + return 1; +} + +void init_ida_ghidra() { + const char* ghidra = getenv("GHIDRA_DIR"); + if (ghidra) { + ghidra_dir = ghidra; + } + else { + ghidra_dir = idadir("plugins"); + } + // find_ida_name_dialog(); + + arch_map[PLFM_MIPS] = mips_setup; + + proc_map[PLFM_6502] = "6502"; + proc_map[PLFM_68K] = "68000"; + proc_map[PLFM_6800] = "6805"; + //proc_map[PLFM_xxx] = "8048"; + proc_map[PLFM_8051] = "8051"; + //proc_map[PLFM_Z80] = "8085"; + proc_map[PLFM_ARM] = "ARM"; + //proc_map[PLFM_ARM] = "AARCH64"; + proc_map[PLFM_AVR] = "Atmel"; + proc_map[PLFM_CR16] = "CR16"; + proc_map[PLFM_DALVIK] = "Dalvik"; + proc_map[PLFM_JAVA] = "JVM"; + proc_map[PLFM_MIPS] = "MIPS"; + proc_map[PLFM_HPPA] = "pa-risc"; + proc_map[PLFM_PIC] = "PIC"; + proc_map[PLFM_PPC] = "PowerPC"; + proc_map[PLFM_SPARC] = "sparc"; + proc_map[PLFM_MSP430] = "TI_MSP430"; + proc_map[PLFM_TRICORE] = "tricore"; + proc_map[PLFM_386] = "x86"; + proc_map[PLFM_Z80] = "Z80"; +#if NEWPROCS + proc_map[PLFM_NEC_V850X] = "V850"; +#endif + + return_reg_map[PLFM_6502] = "6502"; + return_reg_map[PLFM_68K] = "68000"; + return_reg_map[PLFM_6800] = "6805"; + //return_reg_map[PLFM_xxx] = "8048"; + return_reg_map[PLFM_8051] = "8051"; + //return_reg_map[PLFM_Z80] = "8085"; + return_reg_map[PLFM_ARM] = "r0:r0:r0:r0"; + //return_reg_map[PLFM_ARM] = "r0:r0:r0:r0"; + return_reg_map[PLFM_AVR] = "Atmel"; + return_reg_map[PLFM_CR16] = "CR16"; + return_reg_map[PLFM_DALVIK] = "Dalvik"; + return_reg_map[PLFM_JAVA] = "JVM"; + return_reg_map[PLFM_MIPS] = "v0:v0:v0:v0"; + return_reg_map[PLFM_HPPA] = "PA-RISC"; + return_reg_map[PLFM_PIC] = "PIC"; + return_reg_map[PLFM_PPC] = "PowerPC"; + return_reg_map[PLFM_SPARC] = "Sparc"; + return_reg_map[PLFM_MSP430] = "TI_MSP430"; + return_reg_map[PLFM_TRICORE] = "tricore"; + return_reg_map[PLFM_386] = "al:ax:eax:rax"; + return_reg_map[PLFM_Z80] = "Z80"; +#if NEWPROCS + return_reg_map[PLFM_NEC_V850X] = "V850"; +#endif + + type_sizes["void"] = 1; + type_sizes["bool"] = 1; + type_sizes["uint1"] = 1; + type_sizes["uint2"] = 2; + type_sizes["uint4"] = 4; + type_sizes["uint8"] = 8; + type_sizes["int1"] = 1; + type_sizes["int2"] = 2; + type_sizes["int4"] = 4; + type_sizes["int8"] = 8; + type_sizes["float4"] = 4; + type_sizes["float8"] = 8; + type_sizes["float10"] = 10; + type_sizes["float16"] = 16; + type_sizes["xunknown1"] = 1; + type_sizes["xunknown2"] = 2; + type_sizes["xunknown4"] = 4; + type_sizes["xunknown8"] = 8; + type_sizes["code"] = 1; + type_sizes["char"] = 1; + type_sizes["wchar2"] = 2; + type_sizes["wchar4"] = 4; +} + +#if IDA_SDK_VERSION < 730 + +#define WOPN_DP_TAB WOPN_TAB + +bool inf_is_64bit() { + return inf.is_64bit(); +} + +bool inf_is_32bit() { + return inf.is_32bit(); +} + +void inf_get_cc(compiler_info_t* cc) { + *cc = inf.cc; +} + +bool inf_is_be() { + return inf.is_be(); +} + +filetype_t inf_get_filetype() { + return (filetype_t)inf.filetype; +} + +#endif + +int get_proc_id() { +#if IDA_SDK_VERSION < 750 + return ph.id; +#else + return PH.id; +#endif +} + +bool get_sleigh_id(string &sleigh) { + sleigh.clear(); + map::iterator proc = proc_map.find(get_proc_id()); + if (proc == proc_map.end()) { + return false; + } + compiler_info_t cc; + inf_get_cc(&cc); + bool is_64 = inf_is_64bit(); + bool is_be = inf_is_be(); + filetype_t ftype = inf_get_filetype(); + + sleigh = proc->second + (is_be ? ":BE" : ":LE"); + + switch (get_proc_id()) { + case PLFM_6502: + sleigh += ":16:default"; + break; + case PLFM_68K: + //options include "default" "MC68030" "MC68020" "Coldfire" + sleigh += ":32:default"; + break; + case PLFM_6800: + sleigh += ":8:default"; + break; + case PLFM_8051: + sleigh += ":16:default"; + break; + case PLFM_ARM: + //options include "v8" "v8T" "v8LEInstruction" "v7" "v7LEInstruction" "Cortex" + // "v6" "v5t" "v5" "v4t" "v4" "default" + if (is_64) { //AARCH64 + sleigh = "AARCH64"; + sleigh += (is_be ? ":BE:64:v8A" : ":LE:64:v8A"); + } + else { + sleigh += ":32:v7"; + } + break; + case PLFM_AVR: + sleigh += ":16:default"; + break; + case PLFM_CR16: + sleigh += ":16:default"; + break; + case PLFM_DALVIK: + sleigh += ":32:default"; + break; + case PLFM_JAVA: + sleigh += ":32:default"; + break; + case PLFM_MIPS: { + //options include "R6" "micro" "64-32addr" "micro64-32addr" "64-32R6addr" "default" + qstring abi; + if (get_abi_name(&abi) > 0 && abi.find("n32") == 0) { + sleigh += ":64:64-32addr"; + } + else { + sleigh += is_64 ? ":64:default" : ":32:default"; + } + break; + } + case PLFM_HPPA: + sleigh += ":32:default"; + break; + case PLFM_PIC: + break; + case PLFM_PPC: { + //options include "default" "64-32addr" "4xx" "MPC8270" "QUICC" "A2-32addr" + // "A2ALT-32addr" "A2ALT" "VLE-32addr" "VLEALT-32addr" + qstring abi; + if (get_abi_name(&abi) > 0 && abi.find("xbox") == 0) { + // ABI name is set to "xbox" for X360 PPC executables + sleigh += ":64:A2ALT-32addr"; + } + else { + sleigh += is_64 ? ":64:default" : ":32:default"; + } + break; + } + case PLFM_SPARC: + sleigh += is_64 ? ":64" : ":32"; + sleigh += ":default"; + break; + case PLFM_MSP430: + sleigh += ":16:default"; + break; + case PLFM_TRICORE: + sleigh += ":32:default"; + break; + case PLFM_386: + //options include "System Management Mode" "Real Mode" "Protected Mode" "default" + sleigh += is_64 ? ":64" : (inf_is_32bit() ? ":32" : ":16"); + if (sleigh.find(":16") != string::npos) { + sleigh += ":Real Mode"; + } + else { + sleigh += ":default"; + } + + if (cc.id == COMP_BC) { + sleigh += ":borlandcpp"; + } + else if (cc.id == COMP_MS) { + sleigh += ":windows"; + } + else if (cc.id == COMP_GNU) { + sleigh += ":gcc"; + } + break; + case PLFM_Z80: + break; +#if NEWPROCS + case PLFM_NEC_V850X: + sleigh += ":32:default"; + break; +#endif + default: + return false; + } + + return true; +} + +void get_ida_bytes(uint8_t* buf, uint64_t size, uint64_t ea) { + get_bytes(buf, size, (ea_t)ea); +} + +bool does_func_return(void* func) { + func_t* f = (func_t*)func; + return func_does_return(f->start_ea); +} + +uint64_t get_func_start(void* func) { + func_t* f = (func_t*)func; + return f->start_ea; +} + +uint64_t get_func_start(uint64_t ea) { + func_t* f = get_func((ea_t)ea); + return f ? f->start_ea : BADADDR; +} + +uint64_t get_func_end(uint64_t ea) { + func_t* f = get_func((ea_t)ea); + return f ? f->end_ea : BADADDR; +} + +//Create a Ghidra to Ida name mapping for a single loval variable (including formal parameters) +void map_var_from_decl(Decompiled* dec, VarDecl* decl) { + Function* ast = dec->ast; + func_t* func = dec->ida_func; + struc_t* frame = get_frame(func); + ea_t ra = frame_off_retaddr(func); + const string gname = decl->getName(); + size_t stack = gname.find("Stack"); + LocalVar* lv = new LocalVar(gname, gname); //default current name will be ghidra name + if (stack != string::npos) { //if it's a stack var, change current to ida name + uint32_t stackoff = strtoul(&gname[stack + 5], NULL, 0); + member_t* var = get_member(frame, ra - stackoff); + lv->offset = ra - stackoff; + if (var) { //now we know there's an ida name assigned + qstring iname; + get_member_name(&iname, var->id); + ast->rename(gname, iname.c_str()); + dec->locals[iname.c_str()] = lv; + lv->current_name = iname.c_str(); + } + else { //ghidra says there's a variable here, let's name it in ida + //TODO - need to compute sizeof(decl) to properly create + // the new data member + qstring iname; + iname.sprnt("var_%X", stackoff - func->frregs); + if (add_struc_member(frame, iname.c_str(), ra - stackoff, byte_flag(), NULL, 1) == 0) { + ast->rename(gname, iname.c_str()); + dec->locals[iname.c_str()] = lv; + lv->current_name = iname.c_str(); + } + else { + dec->locals[gname] = lv; + } + } + } + else { //handle non-stack (register) local variables + netnode nn(dec->ida_func->start_ea); + qstring iname; + if (nn.hashstr(&iname, gname.c_str()) <= 0) { + //no existing mapping + dec->locals[gname] = lv; + } + else { + //we already have a mapping for this ghidra variable + ast->rename(gname, iname.c_str()); + dec->locals[iname.c_str()] = lv; + lv->current_name = iname.c_str(); + } + } +} + +void map_ghidra_to_ida(Decompiled* dec) { + Function* ast = dec->ast; + vector& bk = ast->block.block; + vector& parms = ast->prototype.parameters; + + //add mappings for formal parameter names + for (vector::iterator i = parms.begin(); i != parms.end(); i++) { + VarDecl* decl = *i; + map_var_from_decl(dec, decl); + } + + //add mappings for variable names + for (vector::iterator i = bk.begin(); i != bk.end(); i++) { + VarDecl* decl = dynamic_cast(*i); + if (decl) { + map_var_from_decl(dec, decl); + } + else { + break; + } + } +} + +void decompile_at(ea_t addr, TWidget* w) { + string xml; + string cfunc; + func_t* func = get_func(addr); + Function* ast = NULL; + if (func) { + + // We need to hard reset things to get the symboltab reloaded + // dirty solution but best I came up with atm. + // If we not refresh ist teh decompiler output will be broken in + // certain situations, i.e. if something was changed outside the + // decompiler window + ghidra_init(); + + int res = do_decompile(func->start_ea, func->end_ea, &ast); + if (ast) { + dmsg("got a Functon tree!\n"); + Decompiled* dec = new Decompiled(ast, func); + + // now try to map ghidra stack variable names to ida stack variable names + dmsg("mapping ida names to ghidra names\n"); + map_ghidra_to_ida(dec); + + vector code; + + // Generating C code + dec->ast->print(&code); + + // Displaying C code + strvec_t* sv = new strvec_t(); + + dec->set_ud(sv); + + // build code view line by line from generated ast including comments + + qstring nodename = NETNODEPRE; + + string nodeappend = std::to_string((long)dec->ida_func->start_ea); + + nodename.append(nodeappend.c_str()); + + netnode cno(nodename.c_str()); + + int ci = 0; // lines start at 0 in IDA + + for (vector::iterator si = code.begin(); si != code.end(); si++) { + + qstring pline = si->c_str(); + + if (cno != BADNODE) { + // get length of entry + int len = cno.supstr(ci, NULL, 0); + + if (len > 1) { + + //allocate a buffer of sufficient size + char* outstr = new char[len]; + + // get the comment at the current line number in iteration from superval + cno.supval(ci, outstr, len); + + // append it as comment + pline.append(" // "); + pline.append(outstr); + + } + + sv->push_back(simpleline_t(pline)); + + } + else { + sv->push_back(simpleline_t(pline)); + } + + ci++; + } + + qstring func_name; + qstring fmt; + + get_func_name(&func_name, func->start_ea); + + // TODO: Improve tab titles + string title = get_available_title(); + + fmt.sprnt("Ghidra Code-%s", title.c_str()); // make the suffix change with more windows + + simpleline_place_t s1; + simpleline_place_t s2((int)(sv->size() - 1)); + if (w == NULL) { + + // create new code viewer + // sv = viewer content + w = create_custom_viewer(fmt.c_str(), &s1, &s2, &s1, NULL, sv, &handlers, sv); + + TWidget* code_view = create_code_viewer(w); + + /// Specify that the given code viewer is used to display source code + set_code_viewer_is_source(code_view); + + display_widget(code_view, WOPN_DP_TAB); + + histories[w].push_back(addr); + views[w] = title; + + titles.insert(title); + } + else { + + // if viewer already exists + + callui(ui_custom_viewer_set_userdata, w, sv); + + refresh_custom_viewer(w); + repaint_custom_viewer(w); + + delete function_map[w]; + } + + function_map[w] = dec; + + } + // msg("do_decompile returned: %d\n%s\n%s\n", res, code.c_str(), cfunc.c_str()); + } +} + +const char* tag_remove(const char* tagged) { + static qstring ll; + tag_remove(&ll, tagged); + return ll.c_str(); +} + +#if IDA_SDK_VERSION >= 750 + +struct blc_plugmod_t : public plugmod_t { + /// Invoke the plugin. + virtual bool idaapi run(size_t arg); + + /// Virtual destructor. + virtual ~blc_plugmod_t(); +}; + +plugmod_t *idaapi blc_init(void) { + //do ida related init + init_ida_ghidra(); + + if (ghidra_init()) { + return new blc_plugmod_t(); + } + else { + return NULL; + } +} + +blc_plugmod_t::~blc_plugmod_t(void) { + ghidra_term(); +} + +bool idaapi blc_plugmod_t::run(size_t /*arg*/) { + ea_t addr = get_screen_ea(); +#ifdef DEBUG + msg("decompile_at 0x%llx\n", (uint64_t)addr); +#endif + decompile_at(addr); + return true; +} + +#define blc_run NULL +#define blc_term NULL + +#else + +//make life easier in a post 7.5 world +#define PLUGIN_MULTI 0 + +int idaapi blc_init(void) { + //do ida related init + init_ida_ghidra(); + + if (ghidra_init()) { + return PLUGIN_KEEP; + } + else { + return PLUGIN_SKIP; + } +} + +void idaapi blc_term(void) { + ghidra_term(); +} + +bool idaapi blc_run(size_t /*arg*/) { + ea_t addr = get_screen_ea(); + decompile_at(addr); + return true; +} +#endif + +int64_t get_name(string& name, uint64_t ea, int flags) { + qstring ida_name; + int64_t res = get_name(&ida_name, (ea_t)ea, flags); + if (res > 0) { + name = ida_name.c_str(); + } + return res; +} + +int64_t get_func_name(string& name, uint64_t ea) { + qstring ida_name; + int64_t res = get_func_name(&ida_name, (ea_t)ea); + if (res > 0) { + name = ida_name.c_str(); + } + return res; +} + +bool is_function_start(uint64_t ea) { + func_t* f = get_func((ea_t)ea); + return f != NULL && f->start_ea == (ea_t)ea; +} + +void get_input_file_path(string& path) { + char buf[512]; + get_input_file_path(buf, sizeof(buf)); + path = buf; +} + +bool is_thumb_mode(uint64_t ea) { + return get_sreg((ea_t)ea, 20) == 1; +} + +//is ea a function internal jump target, if so +//return true and place its name in name +//else return false +bool is_code_label(uint64_t ea, string& name) { + xrefblk_t xr; + for (bool success = xr.first_to((ea_t)ea, XREF_ALL); success; success = xr.next_to()) { + if (xr.iscode == 0) { + break; + } + if (xr.type != fl_JN) { + continue; + } + qstring ida_name; + int64_t res = get_name(&ida_name, (ea_t)ea, GN_LOCAL); + if (res > 0) { + name = ida_name.c_str(); + return true; + } + } + return false; +} +/* + check if adress is extern by validating the segment type in which ea is located + or checking for common segment names +*/ +bool is_extern_addr(uint64_t ea) { + + qstring sname, stype; + segment_t* s = getseg(ea); + + if (s) { + + get_segm_name(&sname, s); + get_segm_class(&stype, s); + + dmsg("is_extern_addr ea: %x %s %s\n", ea, sname.c_str(), stype.c_str()); + + if (stype == "XTRN" || + //strcmp return 0 if equal! + !strcmp(sname.c_str(), "extern") || // name in a lot of ELF Binaries + !strcmp(sname.c_str(), ".idata")) // name in PE bins on Windows + { + dmsg("is_extern_addr true\n"); + return true; + } + } + return false; +} + +bool is_external_ref(uint64_t ea, uint64_t* fptr) { + ea_t got; + func_t* pfn = get_func((ea_t)ea); + if (pfn == NULL) { + return false; + } + if (is_extern_addr(pfn->start_ea)) { + if (fptr) { + *fptr = pfn->start_ea; + } + return true; + } + ea_t _export = calc_thunk_func_target(pfn, &got); + bool res = _export != BADADDR; + if (res) { + if (fptr) { + *fptr = got; + } + dmsg("0x%zx is external, with got entry at 0x%zx\n", ea, (size_t)got); + } + return res; +} + +bool is_extern(const string& name) { + bool res = false; + ea_t ea = get_name_ea(BADADDR, name.c_str()); + if (ea == BADADDR) { + dmsg("is_extern called for %s (BADADDR)\n", name.c_str()); + return false; + } + if (is_function_start(ea)) { + dmsg("is_extern - is_function_start\n"); + res = is_external_ref(ea, NULL); + } + else { + res = is_extern_addr(ea); + } +#if DEBUG_PLUGIN + if ((res == false) && true) { + + // code for debugging xrefs + + //decode insn to get assembly command + insn_t ida_instruction; + + if (decode_insn(&ida_instruction, ea) <= 0) { + return false; + } + + dmsg("is_indirect_jump_insn = %d\n", is_indirect_jump_insn(ida_instruction)); + dmsg("is_call_insn = %d\n", is_call_insn(ida_instruction)); + + string astring; + dmsg("is_code_label = %d\n", is_code_label(ea, astring)); + } +#endif + dmsg("is_extern called for %s (%d)\n", name.c_str(), res); + return res; +} + +bool address_of(const string& name, uint64_t* addr) { + bool res = false; + ea_t ea = get_name_ea(BADADDR, name.c_str()); + if (ea == BADADDR) { + return false; + } + *addr = ea; + return true; +} + +bool is_library_func(const string& name) { + bool res = false; + ea_t ea = get_name_ea(BADADDR, name.c_str()); + if (is_function_start(ea)) { + func_t* f = get_func(ea); + res = f ? (f->flags & FUNC_LIB) != 0 : false; + } + return res; +} + +bool is_named_addr(uint64_t ea, string& name) { + qstring res; + //a sanity check on ea + segment_t* s = getseg(0); + if (s != NULL && ea < s->end_ea) { + //ea falls in first segment of zero based binary + //this are generally headers and ea is probably + //not a pointer but instead just a small number + return false; + } + if (get_name(&res, (ea_t)ea) > 0) { + name = res.c_str(); + return true; + } + return false; +} + +bool is_pointer_var(uint64_t ea, uint32_t size, uint64_t* tgt) { + xrefblk_t xb; + if (xb.first_from(ea, XREF_DATA) && xb.type == dr_O) { + // xb.to - contains the referenced address + *tgt = xb.to; + return true; + } + return false; +} + +bool is_read_only(uint64_t ea) { + qstring sname; + segment_t* s = getseg(ea); + if (s) { + if ((s->perm & SEGPERM_WRITE) == 0) { + return true; + } + //not explicitly read only, so let's make some guesses + //based on the segment name + get_segm_name(&sname, s); + if (sname.find("got") <= 1) { + return true; + } + if (sname.find("rodata") <= 1) { + return true; + } + if (sname.find("rdata") <= 1) { + return true; + } + if (sname.find("idata") <= 1) { + return true; + } + if (sname.find("rel.ro") != qstring::npos) { + return true; + } + } + return false; +} + +bool simplify_deref(const string &name, string &new_name) { + uint64_t tgt; + ea_t addr = get_name_ea(BADADDR, name.c_str()); +#if IDA_SDK_VERSION < 750 + uint32_t max_ptr_size = (uint32_t)ph.max_ptr_size(); +#else + uint32_t max_ptr_size = (uint32_t)PH.max_ptr_size(); +#endif + if (addr != BADADDR && is_read_only(addr) && is_pointer_var(addr, max_ptr_size, &tgt)) { + if (get_name(new_name, tgt, 0)) { +// msg("could simplify *%s to %s\n", name.c_str(), new_name.c_str()); + return true; + } + } + return false; +} + +void adjust_thunk_name(string& name) { + + ea_t ea = get_name_ea(BADADDR, name.c_str()); + + dmsg("adjust_thunk_name(%s)\n", name.c_str()); + + if (is_function_start(ea)) { + + func_t* f = get_func(ea); + ea_t fun = calc_thunk_func_target(f, &ea); + + if (fun != BADADDR) { + + qstring tname; + + //this seems to return success even on failure, e.g. in the debugger + get_name(&tname, fun); + + string stname = tname.c_str(); + + if (!stname.empty()) { + dmsg(" adjust_thunk_name: setting new name \"%s\"\n", tname.c_str()); + name = tname.c_str(); + } + } + } + +} + +//TODO think about sign extension for values smaller than 8 bytes +bool get_value(uint64_t addr, uint64_t* val) { + flags_t f = get_full_flags(addr); + if (is_qword(f)) { + *val = get_qword(addr); + } + else if (is_dword(f)) { + *val = get_dword(addr); + } + else if (is_byte(f)) { + *val = get_byte(addr); + } + else if (is_word(f)) { + *val = get_word(addr); + } + else { + return false; + } + return true; +} + +// TODO: optimize with new functions +bool get_string(uint64_t addr, string& str) { + qstring res; + flags_t f = get_full_flags(addr); + + if (is_strlit(f)) { + get_strlit_contents(&res, addr, -1, STRTYPE_C); + str = res.c_str(); + return true; + } + else if (!is_data(f)) { + size_t maxlen = get_max_strlit_length(addr, STRTYPE_C); + if (maxlen > 4) { + create_strlit(addr, 0, STRTYPE_C); + get_strlit_contents(&res, addr, -1, STRTYPE_C); + str = res.c_str(); + return true; + } + } + return false; +} + +bool get_str_lit(uint64_t addr, string* str) { + + qstring res; + + flags_t f = get_full_flags(addr); + + //msg("get_str_lit(): %x\n",addr); + + if (is_strlit(f)) { + + //msg("get_str_lit(): is_strlit()\n"); + + get_strlit_contents(&res, addr, -1, STRTYPE_C); + + *str = res.c_str(); + + return true; + } + //try to resolve another way... + if (is_off(f, OPND_ALL)) { + + refinfo_t ri; + + get_refinfo(&ri, addr, OPND_ALL); + + //msg("get_str_lit(): refbase %x target %x\n", ri.base, ri.target); + + uval_t v; //target addr + + get_data_value(&v, addr, 0); + + //msg("get_str_lit(): %x is offset with target %x\n", addr, v); + + get_str_lit(ri.base + v, str); + + return true; + + } + + return false; +} + + +bool get_string_ea(uint64_t addr, string* str) { + + qstring res; + + get_str_lit(addr, str); + + if (*str != "") { + return true; + } + + flags_t f = get_full_flags(addr); + + if (is_off(f, OPND_ALL)) { + + //msg("is Offset\n"); + + refinfo_t ri; + + get_refinfo(&ri, addr, OPND_ALL); + + if (ri.target != BADADDR) { + + //untested + get_str_lit(ri.target, str); + + } + else if (is_code(f)) { + + // shamelessly borrowed from the NSA: + // https://github.com/NationalSecurityAgency/ghidra/blob/21f4802c2a9930ef3447f70d37f391b91c3cda5b/GhidraBuild/IDAPro/Python/6xx/plugins/xmlexp.py + + insn_t out; + decode_insn(&out, addr); + + ea_t value = out.ops->addr; + + ea_t target = value - ri.tdelta + ri.base; + + get_str_lit(target, str); + + dmsg("CODE: offset %x\n", addr); + + } + else if (is_data(f)) { + + uval_t v; + + get_data_value(&v, addr, 0); + + get_str_lit(v, str); + + dmsg("DATA: offset %x %x\n", addr, v); + + } + + } + + if (*str != "") { + return true; + } + + return false; +} + +string get_string(const string& name) { + + ea_t ea = get_name_ea(BADADDR, name.c_str()); + string str; + + bool res = get_string_ea(ea, &str); + + /* if (!res) { + msg("Error getting string for %s (ea: %x)\n", name.c_str(), ea); + } + else { + msg("str: %s\n", str.c_str()); + } + */ + return str.c_str(); +} + +bool is_string(const string& name) { + + ea_t ea = get_name_ea(BADADDR, name.c_str()); + + if (ea == BADADDR) { + return false; + } + + qstring sname; + segment_t* s = getseg(ea); + if (s) { + get_segm_name(&sname, s); + + if (strstr(sname.c_str(), "string")) { + return true; + } + } + return false; +} + +void print_blc_banner() { + if (!sleigh_id.c_str()) + msg("Ghidra Decompiler (blc) - CPU not supported!\n"); + else { + msg("Ghidra Decompiler (blc) ready.\nUsing sleigh id: %s\n", sleigh_id.c_str()); + } +} + +#if IDA_SDK_VERSION >= 750 +plugmod_t* idaapi blc_init_new(void) { + + plugmod_t* res = blc_init(); + + print_blc_banner(); + + return res; +} +#elif IDA_SDK_VERSION >= 740 +size_t idaapi blc_init_new(void) { + + size_t res = blc_init(); + + print_blc_banner(); + + return res; +} +#else +int idaapi blc_init_new(void) { + int res = blc_init(); + + print_blc_banner(); + + return res; +} +#endif + +//-------------------------------------------------------------------------- +char comment[] = "Ghidra decompiler integration."; + +char help[] = "I have nothing to offer.\n"; + +char wanted_name[] = "Ghidra Decompiler"; + +char wanted_hotkey[] = "Alt-F3"; + +plugin_t PLUGIN = +{ + IDP_INTERFACE_VERSION, + PLUGIN_MULTI, // plugin flags + blc_init_new, // initialize + blc_term, // terminate. this pointer may be NULL. + blc_run, // invoke plugin + comment, // long comment about the plugin + // it could appear in the status line + // or as a hint + help, // multiline help about the plugin + wanted_name, // the preferred short name of the plugin + wanted_hotkey // the preferred hotkey to run the plugin +}; diff --git a/plugin.hh b/plugin.hh index e6bfc88..b535f30 100644 --- a/plugin.hh +++ b/plugin.hh @@ -1,113 +1,124 @@ -/* - Source for the blc IdaPro plugin - Copyright (c) 2019 Chris Eagle - - This program is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the Free - Software Foundation; either version 2 of the License, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., 59 Temple - Place, Suite 330, Boston, MA 02111-1307 USA -*/ - -#ifndef __BLC_PLUGIN_H -#define __BLC_PLUGIN_H - -#include -#include -#include -#include -#include - -using std::istream; -using std::ostream; -using std::string; -using std::map; -using std::vector; - -#include "ast.hh" - -extern string ghidra_dir; -//extern int blc_index; - -typedef void (*arch_setup_t)(uint64_t start, uint64_t end); -typedef map arch_map_t; - -extern arch_map_t arch_map; - -int get_proc_id(); - -void init_ida_ghidra(); - -void get_ida_bytes(uint8_t *buf, uint64_t size, uint64_t ea); - -int64_t get_name(string &name, uint64_t ea, int flags); - -int64_t get_func_name(string &name, uint64_t ea); - -bool is_function_start(uint64_t ea); - -void get_input_file_path(string &path); - -bool get_sleigh_id(string &sleigh); - -bool does_func_return(/*func_t*/ void *f); - -uint64_t get_func_start(/*func_t*/ void *f); - -uint64_t get_func_start(uint64_t ea); -uint64_t get_func_end(uint64_t ea); - -int do_decompile(uint64_t start_ea, uint64_t end_ea, Function **ast); - -const char *tag_remove(const char *tagged); - -bool is_thumb_mode(uint64_t ea); - -//is ea a function internal jump target, if so -//return true and place its name in name -//else return false -bool is_code_label(uint64_t ea, string &name); - -//is ea an external function, if so -//return true and place its import table address in fptr -//else return false -bool is_extern_addr(uint64_t ea); -bool is_external_ref(uint64_t ea, uint64_t *fptr); -bool is_extern(const string &name); - -//is ea a library function, if so -//return true -//else return false -bool is_library_func(const string &name); - -bool is_named_addr(uint64_t ea, string &name); - -bool is_pointer_var(uint64_t ea, uint32_t size, uint64_t *tgt); - -bool is_read_only(uint64_t ea); - -bool address_of(const string &name, uint64_t *ea); - -bool simplify_deref(const string &name, string &new_name); - -void adjust_thunk_name(string &name); - -void mips_setup(uint64_t start, uint64_t end); - -bool get_value(uint64_t addr, uint64_t *val); - -bool get_string(uint64_t addr, string &str); - -bool ghidra_init(void); -void ghidra_term(void); - -#endif - +/* + Source for the blc IdaPro plugin + Copyright (c) 2019 Chris Eagle + Copyright (c) 2020 Alexander Pick + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., 59 Temple + Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef __BLC_PLUGIN_H +#define __BLC_PLUGIN_H + +#include +#include +#include +#include +#include + +using std::istream; +using std::ostream; +using std::string; +using std::map; +using std::vector; + +#include "ast.hh" + +extern string ghidra_dir; +//extern int blc_index; + +extern string sleigh_id; + +typedef void (*arch_setup_t)(uint64_t start, uint64_t end); +typedef map arch_map_t; + +extern arch_map_t arch_map; + +int get_proc_id(); + +void init_ida_ghidra(); + +void get_ida_bytes(uint8_t *buf, uint64_t size, uint64_t ea); + +int64_t get_name(string &name, uint64_t ea, int flags); + +int64_t get_func_name(string &name, uint64_t ea); + +bool is_function_start(uint64_t ea); + +void get_input_file_path(string &path); + +bool get_sleigh_id(string &sleigh); + +bool does_func_return(/*func_t*/ void *f); + +uint64_t get_func_start(/*func_t*/ void *f); + +uint64_t get_func_start(uint64_t ea); +uint64_t get_func_end(uint64_t ea); + +int do_decompile(uint64_t start_ea, uint64_t end_ea, Function **ast); + +const char *tag_remove(const char *tagged); + +bool is_thumb_mode(uint64_t ea); + +//is ea a function internal jump target, if so +//return true and place its name in name +//else return false +bool is_code_label(uint64_t ea, string &name); + +//is ea an external function, if so +//return true and place its import table address in fptr +//else return false +bool is_extern_addr(uint64_t ea); +bool is_external_ref(uint64_t ea, uint64_t *fptr); +bool is_extern(const string &name); + +//is ea a library function, if so +//return true +//else return false +bool is_library_func(const string &name); + +bool is_named_addr(uint64_t ea, string &name); + +bool is_pointer_var(uint64_t ea, uint32_t size, uint64_t *tgt); + +bool is_read_only(uint64_t ea); + +bool address_of(const string &name, uint64_t *ea); + +bool simplify_deref(const string &name, string &new_name); + +void adjust_thunk_name(string &name); + +void mips_setup(uint64_t start, uint64_t end); + +bool get_value(uint64_t addr, uint64_t *val); + +bool get_string(uint64_t addr, string &str); + +bool get_str_lit(uint64_t addr, string* str); + +bool get_string_ea(uint64_t addr, string* str); + +string get_string(const string& name); + +bool is_string(const string& name); + +bool ghidra_init(void); +void ghidra_term(void); + +#endif + diff --git a/printc.cc b/printc.cc index 430c64e..c8b70ca 100644 --- a/printc.cc +++ b/printc.cc @@ -1383,7 +1383,7 @@ void PrintC::resetDefaultsPrintC(void) option_convention = true; option_hide_exts = true; option_inplace_ops = false; - option_nocasts = false; + option_nocasts = true; option_NULL = false; option_unplaced = false; setCStyleComments(); diff --git a/run.cc b/run.cc index d736b82..00fb654 100644 --- a/run.cc +++ b/run.cc @@ -1,359 +1,379 @@ -/* - Source for the blc IdaPro plugin - Copyright (c) 2019 Chris Eagle - - This program is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the Free - Software Foundation; either version 2 of the License, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., 59 Temple - Place, Suite 330, Boston, MA 02111-1307 USA -*/ - -#include -#include -#include -#include -#include -#include - -using std::iostream; -using std::ifstream; -using std::ostringstream; -using std::map; - -#include "libdecomp.hh" -#include "capability.hh" -#include "sleigh_arch.hh" -#include "xml.hh" - -#include "plugin.hh" -#include "ida_minimal.hh" -#include "ida_arch.hh" -#include "ast.hh" - -//#define DEBUG 1 - -stringstream *err_stream; - -static string sleigh_id; -ida_arch *arch; // in lieu of Architecture *IfaceDecompData::conf - -void escape_value(const string &value, string &res) { - const char *content = value.c_str(); - while (*content) { - if (*content == '&') { - res += "&"; - } - else if (*content == '>') { - res += ">"; - } - else if (*content == '<') { - res += "<"; - } - else if (*content == '"') { - res += """; - } - else if (*content == '\'') { - res += "'"; - } - else { - res += *content; - } - content++; - } -} - -void dump_el(const Element *el, int indent, string &res) { - const List &children = el->getChildren(); - - int clen = el->getContent().length(); - - int nattr = el->getNumAttributes(); - - res.append(indent, ' '); - res.push_back('<'); - res += el->getName(); - - for (int i = 0; i < nattr; i++) { - res.push_back(' '); - res += el->getAttributeName(i); - res += "=\""; - escape_value(el->getAttributeValue(i).c_str(), res); - res += "\""; - } - - int nchildren = 0; - - for (List::const_iterator it = children.begin(); it != children.end(); it++) { - const Element *child = *it; - nchildren++; - if (nchildren == 1) { - res += ">\n"; - } - dump_el(child, indent + 3, res); - } - if (nchildren) { - if (el->getContent().length() > 0) { - res += "NON-ZERO content in element with children\n"; - } - res.append(indent, ' '); - res += "getName(); - res += ">\n"; - } - else { - if (clen) { - res += ">"; - escape_value(el->getContent().c_str(), res); - res += "getName(); - res += ">\n"; - } - else { - res += "/>\n"; - } - } -} - -static const string empty_string(""); - -const string &getAttributeValue(const Element *el, const char *attr) { - int nattr = el->getNumAttributes(); - - for (int i = 0; i < nattr; i++) { - if (el->getAttributeName(i) == attr) { - return el->getAttributeValue(i); - } - } - return empty_string; -} - -void check_err_stream() { - if (err_stream->tellp()) { - msg("%s\n", err_stream->str().c_str()); - err_stream->str(""); - } -} - -TrackedSet &get_tracked_set(uint64_t start, uint64_t end) { - //need to add a TrackedSet to arch->context(which is a ContextInternal for us)->trackbase - //if we are tracking any registers. In particular, if any registers are fixed on entry - //we should add them to the TrackedSet for ea. This is probabaly more useful for some archs - //than others. - AddrSpace *as = arch->getSpaceByName("ram"); - Address func_begin(as, start); - Address func_end(as, end); - return arch->context->createSet(func_begin, func_end); -} - -void add_tracked_reg(TrackedSet ®s, uint64_t offset, uint64_t value, uint32_t size) { - regs.push_back(TrackedContext()); - TrackedContext ® = regs.back(); - reg.loc.space = arch->getSpaceByName("register"); - reg.loc.offset = offset; - reg.loc.size = size; - reg.val = value; -} - -void mips_setup(uint64_t start, uint64_t end) { - TrackedSet ®s = get_tracked_set(start, end); - - //this is very n64 specific - // this is $t9 - need to do this better - add_tracked_reg(regs, 0xc8, start, 8); - add_tracked_reg(regs, 0xcc, start & 0xffffffffll, 4); - add_tracked_reg(regs, 0xc8, start >> 32, 4); -} - -bool ghidra_init(void) { - startDecompilerLibrary(ghidra_dir.c_str()); - - err_stream = new stringstream(); - -// IfaceCapability::registerAllCommands(term); // Register commands for decompiler and all modules - - string filename; - get_input_file_path(filename); - - get_sleigh_id(sleigh_id); - - //implement most of IfcLoadFile::execute here since file is - //already loaded in IDA - - arch = new ida_arch(filename, sleigh_id, err_stream); - - DocumentStorage store; // temporary storage for xml docs - - string errmsg; - bool iserror = false; - try { - arch->init(store); - //at this point we have arch->context (a ContextInternal) available - // we can do things like: - // context->setVariableDefault("addrsize",1); // Address size is 32-bits - // context->setVariableDefault("opsize",1); // Operand size is 32-bits - // that make sense for our architecture - } catch(XmlError &err) { - errmsg = err.explain; - iserror = true; - } catch(LowlevelError &err) { - errmsg = err.explain; - iserror = true; - } - if (iserror) { - msg("%s\n", errmsg.c_str()); - msg("Could not create architecture\n"); - delete arch; - arch = NULL; - return false; - } - - check_err_stream(); - - msg("Ghidra architecture successfully created\n"); - - return true; -} - -void ghidra_term(void) { - shutdownDecompilerLibrary(); - -// GhidraCapability::shutDown(); - delete err_stream; - err_stream = NULL; -} - -void do_pcode(const Funcdata *fd) { - //typedef map PcodeOpTree - /// \brief Start of all (alive) PcodeOp objects sorted by sequence number - - PcodeOpTree::const_iterator iter; - int i = 0; - for (iter = fd->beginOpAll(); iter != fd->endOpAll(); iter++) { - i++; - const SeqNum &sn = iter->first; - const PcodeOp *pcode = iter->second; - ostringstream os; - pcode->printRaw(os); - msg("%p: %u, (%s / %s): %s\n", (void*)sn.getAddr().getOffset(), sn.getOrder(), pcode->getOpcode()->getName().c_str(), get_opname(pcode->code()), os.str().c_str()); - } - msg("Found %d PcodeOpTree\n", i); -/* - /// \brief Start of all (alive) PcodeOp objects attached to a specific Address - PcodeOpTree::const_iterator beginOp(const Address &addr) const { return obank.begin(addr); } - - /// \brief End of all (alive) PcodeOp objects attached to a specific Address - PcodeOpTree::const_iterator endOp(const Address &addr) const { return obank.end(addr); } -*/ -} - -// Extract the info that the decompiler needs to instantiate its address space manager -// This also builds the internal register map while it walks the sleigh spec. - -// see IfcDecompile::execute -int do_decompile(uint64_t start_ea, uint64_t end_ea, Function **result) { - Scope *global = arch->symboltab->getGlobalScope(); - Address addr(arch->getDefaultCodeSpace(), start_ea); - Funcdata *fd = global->findFunction(addr); - *result = NULL; - - if (strncmp("ARM", sleigh_id.c_str(), 3) == 0) { - //if ARM check for and set thumb ranges - if (is_thumb_mode(start_ea)) { - arch->context->setVariable("TMode", addr, 1); - } - } - - int4 res = -1; - if (fd) { - string xml; - string c_code; - - string func_name; - get_func_name(func_name, start_ea); - if (func_name != fd->getName()) { - // Function name has changed since the last decompile - // We need to flush the old symbol name from Ghidra's database to - // force it to request the new name from IDA - global->removeSymbolMappings(fd->getSymbol()); - fd = global->findFunction(addr); - } - -// msg("Decompiling %s\n", func_name.c_str()); - - arch->clearAnalysis(fd); // Clear any old analysis - - arch_map_t::iterator setup = arch_map.find(get_proc_id()); - if (setup != arch_map.end()) { - (*setup->second)(start_ea, end_ea); - } - - arch->allacts.getCurrent()->reset(*fd); - - res = arch->allacts.getCurrent()->perform(*fd); - - if (res < 0) { - ostringstream os; -// msg("Break at "); - arch->allacts.getCurrent()->printState(os); - msg("%s\n", os.str().c_str()); - } - else { -// msg("Decompilation complete"); - if (res == 0) { -// msg(" (no change)"); - } - - do_pcode(fd); - - stringstream ss; - arch->print->setIndentIncrement(3); - arch->print->setOutputStream(&ss); - - //print as C - arch->print->docFunction(fd); - c_code = ss.str(); - ss.str(""); - - arch->print->setXML(true); - arch->print->docFunction(fd); - arch->print->setXML(false); - xml = ss.str(); - -#ifdef DEBUG - msg("%s\n", xml.c_str()); -#endif - - //print the xml - Document *doc = xml_tree(ss); - - if (doc) { - string pretty; - dump_el(doc->getRoot(), 0, pretty); -// msg("%s\n", pretty.c_str()); - - *result = func_from_xml(doc->getRoot(), start_ea); -#ifdef DEBUG - msg("%s\n", c_code.c_str()); -#endif - delete doc; - } - } - check_err_stream(); - } - else { -#ifdef DEBUG - msg("Error, no Funcdata at 0x%llx\n", (uint64_t)start_ea); -#endif - } - return res; -} - +/* + Source for the blc IdaPro plugin + Copyright (c) 2019 Chris Eagle + Copyright (c) 2020 Alexander Pick + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., 59 Temple + Place, Suite 330, Boston, MA 02111-1307 USA + + Changelog: + ---------- + + Changes by Alexander Pick (alx@pwn.su) + + 2020-04-28 - Added a selector to correct the return value of blc_init() for IDA 7.5 +*/ + +#include +#include +#include +#include +#include +#include + +using std::iostream; +using std::ifstream; +using std::ostringstream; +using std::map; + +#include "libdecomp.hh" +#include "capability.hh" +#include "sleigh_arch.hh" +#include "xml.hh" + +#include "plugin.hh" +#include "ida_minimal.hh" +#include "ida_arch.hh" +#include "ast.hh" + +//#define DEBUG_RUN 1 + +#ifdef DEBUG_RUN +#define dmsg(x, ...) msg(x, __VA_ARGS__) +#else +#define dmsg(x, ...) +#endif + +stringstream *err_stream; + +ida_arch *arch; // in lieu of Architecture *IfaceDecompData::conf + +void escape_value(const string &value, string &res) { + const char *content = value.c_str(); + while (*content) { + if (*content == '&') { + res += "&"; + } + else if (*content == '>') { + res += ">"; + } + else if (*content == '<') { + res += "<"; + } + else if (*content == '"') { + res += """; + } + else if (*content == '\'') { + res += "'"; + } + else { + res += *content; + } + content++; + } +} + +void dump_el(const Element *el, int indent, string &res) { + const List &children = el->getChildren(); + + int clen = el->getContent().length(); + + int nattr = el->getNumAttributes(); + + res.append(indent, ' '); + res.push_back('<'); + res += el->getName(); + + for (int i = 0; i < nattr; i++) { + res.push_back(' '); + res += el->getAttributeName(i); + res += "=\""; + escape_value(el->getAttributeValue(i).c_str(), res); + res += "\""; + } + + int nchildren = 0; + + for (List::const_iterator it = children.begin(); it != children.end(); it++) { + const Element *child = *it; + nchildren++; + if (nchildren == 1) { + res += ">\n"; + } + dump_el(child, indent + 3, res); + } + if (nchildren) { + if (el->getContent().length() > 0) { + res += "NON-ZERO content in element with children\n"; + } + res.append(indent, ' '); + res += "getName(); + res += ">\n"; + } + else { + if (clen) { + res += ">"; + escape_value(el->getContent().c_str(), res); + res += "getName(); + res += ">\n"; + } + else { + res += "/>\n"; + } + } +} + +static const string empty_string(""); + +const string &getAttributeValue(const Element *el, const char *attr) { + int nattr = el->getNumAttributes(); + + for (int i = 0; i < nattr; i++) { + if (el->getAttributeName(i) == attr) { + return el->getAttributeValue(i); + } + } + return empty_string; +} + +void check_err_stream() { + if (err_stream->tellp()) { + msg("%s\n", err_stream->str().c_str()); + err_stream->str(""); + } +} + +TrackedSet &get_tracked_set(uint64_t start, uint64_t end) { + //need to add a TrackedSet to arch->context(which is a ContextInternal for us)->trackbase + //if we are tracking any registers. In particular, if any registers are fixed on entry + //we should add them to the TrackedSet for ea. This is probabaly more useful for some archs + //than others. + AddrSpace *as = arch->getSpaceByName("ram"); + Address func_begin(as, start); + Address func_end(as, end); + return arch->context->createSet(func_begin, func_end); +} + +void add_tracked_reg(TrackedSet ®s, uint64_t offset, uint64_t value, uint32_t size) { + regs.push_back(TrackedContext()); + TrackedContext ® = regs.back(); + reg.loc.space = arch->getSpaceByName("register"); + reg.loc.offset = offset; + reg.loc.size = size; + reg.val = value; +} + +void mips_setup(uint64_t start, uint64_t end) { + TrackedSet ®s = get_tracked_set(start, end); + + //this is very n64 specific + // this is $t9 - need to do this better + add_tracked_reg(regs, 0xc8, start, 8); + add_tracked_reg(regs, 0xcc, start & 0xffffffffll, 4); + add_tracked_reg(regs, 0xc8, start >> 32, 4); +} + +bool ghidra_init(void) { + startDecompilerLibrary(ghidra_dir.c_str()); + + err_stream = new stringstream(); + + // IfaceCapability::registerAllCommands(term); // Register commands for decompiler and all modules + + string filename; + get_input_file_path(filename); + + get_sleigh_id(sleigh_id); + + dmsg("SI: %s", sleigh_id); + + //implement most of IfcLoadFile::execute here since file is + //already loaded in IDA + + arch = new ida_arch(filename, sleigh_id, err_stream); + + DocumentStorage store; // temporary storage for xml docs + + string errmsg; + bool iserror = false; + try { + arch->init(store); + //at this point we have arch->context (a ContextInternal) available + // we can do things like: + // context->setVariableDefault("addrsize",1); // Address size is 32-bits + // context->setVariableDefault("opsize",1); // Operand size is 32-bits + // that make sense for our architecture + } + catch (XmlError & err) { + errmsg = err.explain; + iserror = true; + } + catch (LowlevelError & err) { + errmsg = err.explain; + iserror = true; + } + if (iserror) { + msg("%s\n", errmsg.c_str()); + msg("Could not create architecture\n"); + delete arch; + arch = NULL; + return false; + } + + check_err_stream(); + + dmsg("Ghidra architecture successfully created\n"); + + return true; +} + +void ghidra_term(void) { + shutdownDecompilerLibrary(); + +// GhidraCapability::shutDown(); + delete err_stream; + err_stream = NULL; +} + +void do_pcode(const Funcdata *fd) { + //typedef map PcodeOpTree + /// \brief Start of all (alive) PcodeOp objects sorted by sequence number + + PcodeOpTree::const_iterator iter; + int i = 0; + for (iter = fd->beginOpAll(); iter != fd->endOpAll(); iter++) { + i++; + const SeqNum &sn = iter->first; + const PcodeOp *pcode = iter->second; + ostringstream os; + pcode->printRaw(os); + dmsg("%p: %u, (%s / %s): %s\n", (void*)sn.getAddr().getOffset(), sn.getOrder(), pcode->getOpcode()->getName().c_str(), get_opname(pcode->code()), os.str().c_str()); + } + dmsg("Found %d PcodeOpTree\n", i); +/* + /// \brief Start of all (alive) PcodeOp objects attached to a specific Address + PcodeOpTree::const_iterator beginOp(const Address &addr) const { return obank.begin(addr); } + + /// \brief End of all (alive) PcodeOp objects attached to a specific Address + PcodeOpTree::const_iterator endOp(const Address &addr) const { return obank.end(addr); } +*/ +} + +// Extract the info that the decompiler needs to instantiate its address space manager +// This also builds the internal register map while it walks the sleigh spec. + +// see IfcDecompile::execute +int do_decompile(uint64_t start_ea, uint64_t end_ea, Function **result) { + Scope *global = arch->symboltab->getGlobalScope(); + Address addr(arch->getDefaultCodeSpace(), start_ea); + Funcdata *fd = global->findFunction(addr); + *result = NULL; + + if (strncmp("ARM", sleigh_id.c_str(), 3) == 0) { + //if ARM check for and set thumb ranges + if (is_thumb_mode(start_ea)) { + arch->context->setVariable("TMode", addr, 1); + } + } + + int4 res = -1; + if (fd) { + string xml; + string c_code; + + string func_name; + get_func_name(func_name, start_ea); + if (func_name != fd->getName()) { + // Function name has changed since the last decompile + // We need to flush the old symbol name from Ghidra's database to + // force it to request the new name from IDA + global->removeSymbolMappings(fd->getSymbol()); + fd = global->findFunction(addr); + } + + dmsg("Decompiling %s\n", func_name.c_str()); + + arch->clearAnalysis(fd); // Clear any old analysis + + arch_map_t::iterator setup = arch_map.find(get_proc_id()); + if (setup != arch_map.end()) { + (*setup->second)(start_ea, end_ea); + } + + arch->allacts.getCurrent()->reset(*fd); + + res = arch->allacts.getCurrent()->perform(*fd); + + if (res < 0) { + ostringstream os; + dmsg("Break at "); + arch->allacts.getCurrent()->printState(os); + msg("%s\n", os.str().c_str()); + } + else { + + dmsg("Decompilation complete\n"); + + if (res == 0) { + dmsg(" (no change)\n"); + } + + do_pcode(fd); + + stringstream ss; + arch->print->setIndentIncrement(3); + arch->print->setOutputStream(&ss); + + //print as C + arch->print->docFunction(fd); + c_code = ss.str(); + ss.str(""); + + arch->print->setXML(true); + arch->print->docFunction(fd); + arch->print->setXML(false); + xml = ss.str(); + +#ifdef DEBUG + msg("%s\n", xml.c_str()); +#endif + + //print the xml + Document *doc = xml_tree(ss); + + if (doc) { + string pretty; + + dump_el(doc->getRoot(), 0, pretty); + + //this will dump the xml + //dmsg("%s\n", pretty.c_str()); + + *result = func_from_xml(doc->getRoot(), start_ea); + + //this will dump the pseudocode + //dmsg("%s\n", c_code.c_str()); + delete doc; + } + } + check_err_stream(); + } + else { + dmsg("Error, no Funcdata at 0x%x\n", (uint32_t)start_ea); + } + return res; +} +