diff --git a/CHANGELOG.md b/CHANGELOG.md index 36d7b90..67a9fdb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,13 @@ - nothing to log +# v.0.8.6 - 2021-04-08 + +- Added: submodule for string analysis + - discovers strings (ascii) in binary + +- Minor bug fix in intel hex reader + # v.0.8.5 - 2021-03-27 - Completed decoder (byte pipeline) testing. diff --git a/Makefile b/Makefile index a3b2a80..7d9f6e9 100644 --- a/Makefile +++ b/Makefile @@ -12,6 +12,7 @@ SRC := \ $(wildcard engine/src/analyzer/modules/sfr/*.c) \ $(wildcard engine/src/analyzer/modules/labels/*.c) \ $(wildcard engine/src/analyzer/modules/vector/*.c) \ + $(wildcard engine/src/analyzer/modules/string/*.c) \ $(wildcard engine/src/analyzer/report/*.c) \ $(wildcard engine/src/analyzer/util/*.c) \ $(wildcard engine/src/arch/*.c) \ diff --git a/README.md b/README.md index 40e8d65..97af642 100644 --- a/README.md +++ b/README.md @@ -311,6 +311,44 @@ SFR ID: 50 xref from 0x0054 sts 0x006e, r24 ; DATA[0x6e] <- R24 ``` +#### Discovering strings in binary + +```c +int main(const int argc, const char **argv) { + + /* ignoring checks for this example */ + vmcu_model_t *m328p = vmcu_model_ctor(VMCU_DEVICE_M328P); + vmcu_report_t *report = vmcu_analyze_ihex("file.hex", m328p); + + for(int32_t i = 0; i < report->n_string; i++) { + + vmcu_string_t *str = &report->string[i]; + + printf("Found string \"%s", str->bytes); + printf("\" l = %d", str->length); + printf(" @ 0x%04x\n", str->addr); + } + + printf("\nTotal strings found: %d\n", report->n_string); + + vmcu_report_dtor(report); + vmcu_model_dtor(m328p); + + return EXIT_SUCCESS; +} +``` + +```console +Found string "Welcome " l = 8 @ 0x092e +Found string "[1] Login\n" l = 11 @ 0x0933 +Found string "[2] Memory management\n" l = 23 @ 0x0939 +Found string "Please authenticate yourself with your hardware token\n" l = 55 @ 0x0946 +Found string "Please insert token. (%d characters)\n" l = 38 @ 0x0962 +Found string "Token can only contain the characters [A-Z/a-z/0-9]\n" l = 53 @ 0x0975 + +Total strings found: 6 +``` + # Showcase ![mdx_debug](https://user-images.githubusercontent.com/46600932/104666434-33f9da80-56d4-11eb-882b-724b13536412.png) @@ -459,6 +497,9 @@ take a look at engine/*/arch/ - [ ] ISR analysis - [x] SFR analysis - [ ] Cycle analysis + - [x] String analysis + - [x] ASCII + - [ ] UTF16 - [ ] ... - [ ] Format Reader diff --git a/driver/strings/Makefile b/driver/strings/Makefile new file mode 100644 index 0000000..f2d368d --- /dev/null +++ b/driver/strings/Makefile @@ -0,0 +1,12 @@ +BUILD := ../../build +APP_DIR := $(BUILD)/apps +OBJ_DIR := $(BUILD)/objects +TARGET := strings +INCLUDE := -I../../engine/include/libvmcu/ +LIBS := -lvmcu -lm + +driver: strings.c + @cd ..;cd ..;make all + @mkdir -p $(OBJ_DIR)/driver/$(TARGET) + gcc $(INCLUDE) -c strings.c -o $(OBJ_DIR)/driver/$(TARGET)/$(TARGET).o + gcc -o $(APP_DIR)/$(TARGET) $(OBJ_DIR)/driver/$(TARGET)/$(TARGET).o -L$(APP_DIR)/ $(LIBS) \ No newline at end of file diff --git a/driver/strings/strings.c b/driver/strings/strings.c new file mode 100644 index 0000000..ee73dbd --- /dev/null +++ b/driver/strings/strings.c @@ -0,0 +1,93 @@ +/* A basic driver to find and print strings */ + +// C Headers +#include +#include +#include +#include +#include + +// libvmcu +#include "libvmcu_analyzer.h" +#include "libvmcu_system.h" + +/* libvmcu Structures */ + +vmcu_model_t *m328p = NULL; +vmcu_report_t *report = NULL; + +/* Forward Declaration of static Functions */ + +static void print_string(const vmcu_string_t *str); +static void cleanup(void); + +/* --- Extern --- */ + +int main(const int argc, const char **argv) { + + if(argc != 2) { + + printf("Usage: ./strings \n"); + return EXIT_FAILURE; + } + + atexit(cleanup); + + m328p = vmcu_model_ctor(VMCU_DEVICE_M328P); + report = vmcu_analyze_ihex(argv[1], m328p); + + if(report == NULL) + return EXIT_FAILURE; + + for(int32_t i = 0; i < report->n_string; i++) { + + vmcu_string_t *str = &report->string[i]; + + printf("Found string \""); + print_string(str); + printf("\" "); + + printf("l = %d", str->length); + printf(" @ 0x%04x\n", str->addr); + } + + printf("\nTotal strings found: %d\n", report->n_string); + + return EXIT_SUCCESS; +} + +/* --- Static --- */ + +static void print_string(const vmcu_string_t *str) { + + /* we need a string printer in order to avoid + * printing ascii control codes. + * + * This function only handles '\n'. + * */ + + for(int32_t i = 0; i < str->length; i++) { + + const char ch = str->bytes[i]; + + if(ch == '\n') { + + printf("\\n"); + continue; + } + + if(0x00 <= ch && ch <= 0x1f) + continue; + + printf("%c", ch); + } +} + +static void cleanup(void) { + + if(report != NULL) + vmcu_report_dtor(report); + + if(m328p != NULL) + vmcu_model_dtor(m328p); +} diff --git a/engine/include/analyzer/modules/string/string_analyzer.h b/engine/include/analyzer/modules/string/string_analyzer.h new file mode 100644 index 0000000..7b843e0 --- /dev/null +++ b/engine/include/analyzer/modules/string/string_analyzer.h @@ -0,0 +1,10 @@ +/* String Analyzer Submodule Header */ + +#ifndef VMCU_STRING_ANALYZER_H +#define VMCU_STRING_ANALYZER_H + +typedef struct vmcu_report vmcu_report_t; + +extern int vmcu_analyze_strings(vmcu_report_t *report); + +#endif diff --git a/engine/include/analyzer/report/report.h b/engine/include/analyzer/report/report.h index 3c5e460..7820350 100644 --- a/engine/include/analyzer/report/report.h +++ b/engine/include/analyzer/report/report.h @@ -11,6 +11,7 @@ #include "engine/include/analyzer/report/sfr.h" #include "engine/include/analyzer/report/label.h" #include "engine/include/analyzer/report/vector.h" +#include "engine/include/analyzer/report/string.h" typedef struct vmcu_report { @@ -26,6 +27,9 @@ typedef struct vmcu_report { int32_t n_vector; vmcu_vector_t *vector; + int32_t n_string; + vmcu_string_t *string; + } vmcu_report_t; extern vmcu_report_t* vmcu_report_ctor(void); diff --git a/engine/include/analyzer/report/string.h b/engine/include/analyzer/report/string.h new file mode 100644 index 0000000..c1d50cb --- /dev/null +++ b/engine/include/analyzer/report/string.h @@ -0,0 +1,18 @@ +/* String (Datastructure) Header */ + +#ifndef VMCU_STRING_H +#define VMCU_STRING_H + +// C Headers +#include + +typedef struct vmcu_string { + + uint16_t addr; + uint16_t length; + + char *bytes; + +} vmcu_string_t; + +#endif diff --git a/engine/include/libvmcu/libvmcu_analyzer.h b/engine/include/libvmcu/libvmcu_analyzer.h index 7af2c95..dcfe9be 100644 --- a/engine/include/libvmcu/libvmcu_analyzer.h +++ b/engine/include/libvmcu/libvmcu_analyzer.h @@ -396,6 +396,15 @@ typedef struct vmcu_vector { ///< interrupt vector structure } vmcu_vector_t; +typedef struct vmcu_string { ///< string structure + + uint16_t addr; ///< base address of string + uint16_t length; ///< length of string + + char *bytes; ///< actual char buffer + +} vmcu_string_t; + typedef struct vmcu_report { ///< report (summary) of the binary int32_t progsize; ///< instruction count @@ -410,6 +419,9 @@ typedef struct vmcu_report { ///< report (summary) of the binar int32_t n_vector; ///< vector count vmcu_vector_t *vector; ///< vector list (sorted, ascending) + int32_t n_string; ///< string count + vmcu_string_t *string; ///< string list (sorted, ascending) + } vmcu_report_t; /* <------------------------------- Functions - Model Loader (arch/) ----------------------------------> */ diff --git a/engine/src/analyzer/analyzer.c b/engine/src/analyzer/analyzer.c index 15196cc..97c2e5d 100644 --- a/engine/src/analyzer/analyzer.c +++ b/engine/src/analyzer/analyzer.c @@ -8,6 +8,7 @@ #include "engine/include/analyzer/modules/sfr/sfr_analyzer.h" #include "engine/include/analyzer/modules/labels/label_analyzer.h" #include "engine/include/analyzer/modules/vector/vector_analyzer.h" +#include "engine/include/analyzer/modules/string/string_analyzer.h" #include "engine/include/analyzer/report/report.h" #include "engine/include/disassembler/disassembler.h" @@ -35,6 +36,7 @@ vmcu_report_t* vmcu_analyze_ihex(const char *hex_file, vmcu_model_t *mcu) { /* --- architecture unspecific analysis --- */ vmcu_analyze_labels(report); + vmcu_analyze_strings(report); return report; } diff --git a/engine/src/analyzer/modules/string/string_analyzer.c b/engine/src/analyzer/modules/string/string_analyzer.c new file mode 100644 index 0000000..37085df --- /dev/null +++ b/engine/src/analyzer/modules/string/string_analyzer.c @@ -0,0 +1,144 @@ +/* String Analyzer Submodule Implementation */ + +// C Headers +#include +#include +#include + +// Project Headers +#include "engine/include/analyzer/modules/string/string_analyzer.h" +#include "engine/include/analyzer/report/report.h" + +#define is_printable(byte) ((0x01 <= byte) && (byte <= 0x7e)) + +/* + * Definition of an ascii string in libvmcu: + * + * A sequence of bytes is a string if and only if + * + * -------------------------------------------------------------------------------------------------- + * [1] 0x01 <= bytes[i] <= 0x7e , for all i where i >= 0 ^ i < n, n = length of string (+ null byte) + * [2] bytes[n] = 0x00 (null byte) + * [3] n > 2 (null byte inclusive) + * -------------------------------------------------------------------------------------------------- + * + * We might add an extra condition in order to improve string analysis: + * + * [4] bytes[i] must not be visited by the controlflow graph + * + * A further improvement would be to iterate backwards and search for + * printable bytes instead of null bytes. + * + * --- Encoding --- + * + * This submodule currently supports + * + * [x] ASCII + * [ ] UTF16 + * + * --- Improvements --- + * + * - reduce heap usage of this submodule in the future (maybe by using a slab) + * - add support for UTF16 encoding + * + * */ + +/* Forward Declaration of static Functions */ + +static uint8_t* extract_bytes(const vmcu_report_t *report, int32_t *size); +static int32_t count_bytes(const vmcu_report_t *report); + +static void mkstr(vmcu_string_t *str, uint8_t *bytes, int32_t s, int32_t e); + +/* --- Extern --- */ + +int vmcu_analyze_strings(vmcu_report_t *report) { + + int32_t size; int32_t s = -1, slen = 0; + + uint8_t *bytes = extract_bytes(report, &size); + report->string = malloc((size / 2 + 1) * sizeof(vmcu_string_t)); + + for(int32_t i = 0; i < size; i++) { + + if(is_printable(bytes[i]) == true) { + + s = (s == -1) ? i : s; + slen += 1; + + continue; + } + + if(bytes[i] == '\0' && slen > 1) { + + int32_t n = report->n_string++; + mkstr(&report->string[n], bytes, s, i); + } + + s = -1; + slen = 0; + } + + if(report->n_string <= 0) { + + free(report->string); + report->string = NULL; + + goto cleanup; + } + + size_t total = report->n_string * sizeof(vmcu_string_t); + report->string = realloc(report->string, total); + +cleanup: + free(bytes); + return 0; +} + +/* --- Static --- */ + +static uint8_t* extract_bytes(const vmcu_report_t *report, int32_t *size) { + + *size = count_bytes(report); + uint8_t *bytes = malloc(*size * sizeof(uint8_t)); + + for(int32_t i = 0, j = 0; i < report->progsize; i++) { + + vmcu_instr_t *instr = &report->disassembly[i]; + + bytes[j++] = (instr->opcode & 0x000000ff) >> 0; + bytes[j++] = (instr->opcode & 0x0000ff00) >> 8; + + if(instr->dword == false) + continue; + + bytes[j++] = (instr->opcode & 0x00ff0000) >> 16; + bytes[j++] = (instr->opcode & 0xff000000) >> 24; + } + + return bytes; +} + +static int32_t count_bytes(const vmcu_report_t *report) { + + int32_t acc = 0; + + for(int32_t i = 0; i < report->progsize; i++) + acc += (2 + (2 * (report->disassembly[i].dword))); + + return acc; +} + +static void mkstr(vmcu_string_t *str, uint8_t *bytes, int32_t s, int32_t e) { + + str->bytes = malloc((e - s + 1) * sizeof(char)); + + for(int32_t i = s; i <= e; i++) + str->bytes[i - s] = bytes[i]; + + str->addr = (s / 2); + str->length = (e - s); +} + + + diff --git a/engine/src/analyzer/report/report.c b/engine/src/analyzer/report/report.c index fa236cc..d6cdb1b 100644 --- a/engine/src/analyzer/report/report.c +++ b/engine/src/analyzer/report/report.c @@ -26,6 +26,9 @@ vmcu_report_t* vmcu_report_ctor(void) { report->n_vector = 0; report->vector = NULL; + report->n_string = 0; + report->string = NULL; + return report; } @@ -55,6 +58,12 @@ void vmcu_report_dtor(vmcu_report_t *this) { free(this->vector[i].xto); } + for(int32_t i = 0; i < this->n_string; i++) { + + if(this->string[i].length > 0) + free(this->string[i].bytes); + } + if(this->sfr != NULL) free(this->sfr); @@ -64,6 +73,9 @@ void vmcu_report_dtor(vmcu_report_t *this) { if(this->vector != NULL) free(this->vector); + if(this->string != NULL) + free(this->string); + if(this->disassembly != NULL) free(this->disassembly); diff --git a/engine/src/reader/ihex_reader.c b/engine/src/reader/ihex_reader.c index 1d6c4dc..8a90db0 100644 --- a/engine/src/reader/ihex_reader.c +++ b/engine/src/reader/ihex_reader.c @@ -56,14 +56,20 @@ vmcu_instr_t* vmcu_read_ihex(const char *hex_file, int32_t *size) { while(getline(&line, &len, file) != -1) { - if(check_ihex(line) == false) + if(check_ihex(line) == false) { + + *size = 0; goto err; + } if(line[RECORD] != DATA_RECORD) continue; - if(read_ihex_line(line, buffer, size) < 0) + if(read_ihex_line(line, buffer, size) < 0) { + + *size = 0; goto err; + } } if(*size == 0)