-
Notifications
You must be signed in to change notification settings - Fork 59
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Frontend] Generalise SourceCodeFile class #1997
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,13 +14,78 @@ | |
# | ||
################################################################################ | ||
|
||
from typing import Any, Optional | ||
from typing import Any, Optional, Generic, TypeVar | ||
|
||
from tree_sitter import Language, Parser | ||
import tree_sitter_c | ||
import tree_sitter_cpp | ||
import tree_sitter_go | ||
import tree_sitter_java | ||
import tree_sitter_rust | ||
|
||
class Project(): | ||
import logging | ||
|
||
logger = logging.getLogger(name=__name__) | ||
|
||
T = TypeVar('T', bound='SourceCodeFile') | ||
|
||
|
||
class SourceCodeFile(): | ||
"""Class for holding file-specific information.""" | ||
LANGUAGE: dict[str, Language] = { | ||
'c': Language(tree_sitter_c.language()), | ||
'cpp': Language(tree_sitter_cpp.language()), | ||
'c++': Language(tree_sitter_cpp.language()), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we should use language constants from |
||
'go': Language(tree_sitter_go.language()), | ||
'jvm': Language(tree_sitter_java.language()), | ||
'rust': Language(tree_sitter_rust.language()), | ||
} | ||
|
||
def __init__(self, | ||
language: str, | ||
source_file: str, | ||
entrypoint: str = '', | ||
source_content: Optional[bytes] = None): | ||
logger.info('Processing %s' % source_file) | ||
|
||
self.root = None | ||
self.source_file = source_file | ||
self.language = language | ||
self.entrypoint = entrypoint | ||
self.tree_sitter_lang = self.LANGUAGE.get(language) | ||
self.parser = Parser(self.tree_sitter_lang) | ||
|
||
if source_content: | ||
self.source_content = source_content | ||
else: | ||
with open(self.source_file, 'rb') as f: | ||
self.source_content = f.read() | ||
|
||
# Initialization ruotines | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. typo |
||
self.load_tree() | ||
|
||
# Language specific process | ||
self.language_specific_process() | ||
|
||
def load_tree(self): | ||
"""Load the the source code into a treesitter tree, and set | ||
the root node.""" | ||
if not self.root: | ||
self.root = self.parser.parse(self.source_content).root_node | ||
|
||
def language_specific_process(self): | ||
"""Dummy function to perform some specific processes in subclasses.""" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. am not sure if we should use |
||
pass | ||
|
||
def has_libfuzzer_harness(self) -> bool: | ||
"""Dummy function for source code files.""" | ||
return False | ||
|
||
|
||
class Project(Generic[T]): | ||
"""Wrapper for doing analysis of a collection of source files.""" | ||
|
||
def __init__(self, source_code_files: list[Any]): | ||
def __init__(self, source_code_files: list[T]): | ||
self.source_code_files = source_code_files | ||
|
||
def dump_module_logic(self, | ||
|
@@ -35,7 +100,7 @@ def dump_module_logic(self, | |
|
||
def extract_calltree(self, | ||
source_file: str = '', | ||
source_code: Optional[Any] = None, | ||
source_code: Optional[T] = None, | ||
function: Optional[str] = None, | ||
visited_functions: Optional[set[str]] = None, | ||
depth: int = 0, | ||
|
@@ -48,14 +113,14 @@ def extract_calltree(self, | |
def get_reachable_functions( | ||
self, | ||
source_file: str = '', | ||
source_code: Optional[Any] = None, | ||
source_code: Optional[T] = None, | ||
function: Optional[str] = None, | ||
visited_functions: Optional[set[str]] = None) -> set[str]: | ||
"""Get a list of reachable functions for a provided function name.""" | ||
# Dummy function for subclasses | ||
return set() | ||
|
||
def get_source_codes_with_harnesses(self) -> list[Any]: | ||
def get_source_codes_with_harnesses(self) -> list[T]: | ||
"""Gets the source codes that holds libfuzzer harnesses.""" | ||
harnesses = [] | ||
for source_code in self.source_code_files: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LANGUAGE
is too generic of a name here