From cefa47ae23511e830ab6620fe74a78cff0d8a7e1 Mon Sep 17 00:00:00 2001 From: David Cruciani Date: Mon, 20 Dec 2021 14:54:59 +0100 Subject: [PATCH] gharchive process --- bin/finder.py | 32 ++++++++++++++------- git_vuln_finder/__init__.py | 3 ++ git_vuln_finder/run.py | 31 +++++++++++++++++++- git_vuln_finder/vulnerability.py | 49 ++++++++++++++++++++++++++++++++ tests/gharchive_test.json | 48 +++++++++++++++++++++++++++++++ 5 files changed, 152 insertions(+), 11 deletions(-) create mode 100644 tests/gharchive_test.json diff --git a/bin/finder.py b/bin/finder.py index 8bce93a..cbd811d 100644 --- a/bin/finder.py +++ b/bin/finder.py @@ -14,7 +14,7 @@ import json import sys import argparse -from git_vuln_finder import find, find_vuln, summary +from git_vuln_finder import find, find_event def main(): @@ -45,24 +45,36 @@ def main(): parser.add_argument( "-t", help="Include tags matching a specific commit", action="store_true" ) + parser.add_argument( + "-gh", help="special option for gharchive, pass a file containing a PushEvent in JSON format" + ) args = parser.parse_args() if args.p not in ["vulnpatterns", "cryptopatterns", "cpatterns", "all"]: parser.print_usage() parser.exit() - if not args.r: + if not args.r and not args.gh: parser.print_usage() parser.exit() - # Launch the process - all_potential_vulnerabilities, all_cve_found, found = find( - args.r, - tags_matching=args.t, - commit_state=args.s, - verbose=args.v, - defaultpattern=args.p, - ) + if args.gh: + with open(args.gh, "r") as read_file: + event = json.load(read_file) + + for element in event: + for i in range(0,len(element["payload"]["commits"])): + all_potential_vulnerabilities, all_cve_found, found = find_event(element["payload"]["commits"][i], element) + + else: + # Launch the process + all_potential_vulnerabilities, all_cve_found, found = find( + args.r, + tags_matching=args.t, + commit_state=args.s, + verbose=args.v, + defaultpattern=args.p, + ) # Output the result as json. Can be piped to another software. if not args.c: diff --git a/git_vuln_finder/__init__.py b/git_vuln_finder/__init__.py index a2572bc..09d4220 100644 --- a/git_vuln_finder/__init__.py +++ b/git_vuln_finder/__init__.py @@ -1,6 +1,9 @@ from git_vuln_finder.pattern import build_pattern from git_vuln_finder.pattern import get_patterns from git_vuln_finder.vulnerability import find_vuln +from git_vuln_finder.vulnerability import find_vuln_event from git_vuln_finder.vulnerability import summary +from git_vuln_finder.vulnerability import summary_event from git_vuln_finder.vulnerability import extract_cve from git_vuln_finder.run import find +from git_vuln_finder.run import find_event diff --git a/git_vuln_finder/run.py b/git_vuln_finder/run.py index 181633b..f3c0ffd 100644 --- a/git_vuln_finder/run.py +++ b/git_vuln_finder/run.py @@ -12,7 +12,7 @@ import sys import git import typing -from git_vuln_finder import get_patterns, find_vuln, summary +from git_vuln_finder import get_patterns, find_vuln, find_vuln_event, summary, summary_event def find( @@ -95,3 +95,32 @@ def find( found += 1 return all_potential_vulnerabilities, all_cve_found, found + +def find_event(commit, element): + # Initialization of the variables for the results + found = 0 + all_potential_vulnerabilities = {} + all_cve_found = set() + + # Initialization of the patterns + patterns = get_patterns() + vulnpatterns = patterns["en"]["medium"]["vuln"] + cryptopatterns = patterns["en"]["medium"]["crypto"] + cpatterns = patterns["en"]["medium"]["c"] + + defaultpattern = [vulnpatterns, cryptopatterns, cpatterns] + + for p in defaultpattern: + ret = find_vuln_event(commit["message"], pattern=p) + if ret: + potential_vulnerabilities, cve_found = summary_event( + commit, + p, + element, + vuln_match=ret["match"] + ) + all_potential_vulnerabilities.update(potential_vulnerabilities) + all_cve_found.update(cve_found) + found += 1 + + return all_potential_vulnerabilities, all_cve_found, found diff --git a/git_vuln_finder/vulnerability.py b/git_vuln_finder/vulnerability.py index 4581618..606ad16 100644 --- a/git_vuln_finder/vulnerability.py +++ b/git_vuln_finder/vulnerability.py @@ -31,6 +31,23 @@ def find_vuln(commit, pattern, verbose=False): else: return None +def find_vuln_event(commit_msg, pattern, verbose=False): + """Find a potential vulnerability from a commit message thanks to a regex + pattern. + """ + m = pattern.search(commit_msg) + if m: + if verbose: + print("Match found: {}".format(m.group(0)), file=sys.stderr) + print(commit_msg, file=sys.stderr) + print("---", file=sys.stderr) + ret = {} + ret["commit"] = commit_msg + ret["match"] = m.groups() + return ret + else: + return None + def summary( repo, @@ -103,6 +120,38 @@ def summary( return rcommit.hexsha, potential_vulnerabilities, cve_found +def summary_event( + commit, + pattern, + element, + vuln_match=None, + commit_state="under-review" +): + potential_vulnerabilities = {} + + cve, cve_found = extract_cve(commit["message"]) + + potential_vulnerabilities[commit["sha"]] = {} + potential_vulnerabilities[commit["sha"]]["repo_name"] = element["repo"]["name"] + potential_vulnerabilities[commit["sha"]]["message"] = commit["message"] + potential_vulnerabilities[commit["sha"]]["language"] = langdetect(commit["message"]) + potential_vulnerabilities[commit["sha"]]["commit-id"] = commit["sha"] + potential_vulnerabilities[commit["sha"]]["author"] = commit["author"]["name"] + potential_vulnerabilities[commit["sha"]]["author-email"] = commit["author"]["email"] + potential_vulnerabilities[commit["sha"]]["authored_date"] = element["created_at"] + potential_vulnerabilities[commit["sha"]]["branches"] = element["payload"]["ref"] + potential_vulnerabilities[commit["sha"]]["pattern-selected"] = pattern.pattern + potential_vulnerabilities[commit["sha"]]["pattern-matches"] = vuln_match + potential_vulnerabilities[commit["sha"]]["origin-github-api"] = commit["url"] + if cve: + potential_vulnerabilities[commit["sha"]]["cve"] = cve + potential_vulnerabilities[commit["sha"]]["state"] = "cve-assigned" + else: + potential_vulnerabilities[commit["sha"]]["state"] = commit_state + + return potential_vulnerabilities, cve_found + + def extract_cve(commit): cve_found = set() cve_find = re.compile(r"CVE-[1-2]\d{1,4}-\d{1,7}", re.IGNORECASE) diff --git a/tests/gharchive_test.json b/tests/gharchive_test.json new file mode 100644 index 0000000..08d1ca3 --- /dev/null +++ b/tests/gharchive_test.json @@ -0,0 +1,48 @@ +[ + { + "id": "19351512310", + "type": "PushEvent", + "actor": { + "id": 32466128, + "login": "DavidCruciani", + "display_login": "DavidCruciani", + "gravatar_id": "", + "url": "https://api.github.com/users/DavidCruciani", + "avatar_url": "https://avatars.githubusercontent.com/u/32466128?" + }, + "repo": { + "id": 424660123, + "name": "ail-project/ail-feeder-gharchive", + "url": "https://api.github.com/repos/ail-project/ail-feeder-gharchive" + }, + "payload": { + "push_id": 8628652926, + "size": 1, + "distinct_size": 1, + "ref": "refs/heads/main", + "head": "910ed71a2819546a3f3bcce1ebb9e3984a8c8d86", + "before": "40a9ef5dc6b2add5184a0a58401bfe9058faa8df", + "commits": [ + { + "sha": "910ed71a2819546a3f3bcce1ebb9e3984a8c8d86", + "author": { + "email": "da.cruciani@laposte.net", + "name": "David Cruciani" + }, + "message": "chg: [feeder] case sensitive", + "distinct": true, + "url": "https://api.github.com/repos/ail-project/ail-feeder-gharchive/commits/910ed71a2819546a3f3bcce1ebb9e3984a8c8d86" + } + ] + }, + "public": true, + "created_at": "2021-12-15T16:06:43Z", + "org": { + "id": 62389074, + "login": "ail-project", + "gravatar_id": "", + "url": "https://api.github.com/orgs/ail-project", + "avatar_url": "https://avatars.githubusercontent.com/u/62389074?" + } + } +]