gharchive process

This commit is contained in:
David Cruciani 2021-12-20 14:54:59 +01:00
parent 1a903da1e6
commit cefa47ae23
5 changed files with 152 additions and 11 deletions

View file

@ -14,7 +14,7 @@ import json
import sys
import argparse
from git_vuln_finder import find, find_vuln, summary
from git_vuln_finder import find, find_event
def main():
@ -45,16 +45,28 @@ def main():
parser.add_argument(
"-t", help="Include tags matching a specific commit", action="store_true"
)
parser.add_argument(
"-gh", help="special option for gharchive, pass a file containing a PushEvent in JSON format"
)
args = parser.parse_args()
if args.p not in ["vulnpatterns", "cryptopatterns", "cpatterns", "all"]:
parser.print_usage()
parser.exit()
if not args.r:
if not args.r and not args.gh:
parser.print_usage()
parser.exit()
if args.gh:
with open(args.gh, "r") as read_file:
event = json.load(read_file)
for element in event:
for i in range(0,len(element["payload"]["commits"])):
all_potential_vulnerabilities, all_cve_found, found = find_event(element["payload"]["commits"][i], element)
else:
# Launch the process
all_potential_vulnerabilities, all_cve_found, found = find(
args.r,

View file

@ -1,6 +1,9 @@
from git_vuln_finder.pattern import build_pattern
from git_vuln_finder.pattern import get_patterns
from git_vuln_finder.vulnerability import find_vuln
from git_vuln_finder.vulnerability import find_vuln_event
from git_vuln_finder.vulnerability import summary
from git_vuln_finder.vulnerability import summary_event
from git_vuln_finder.vulnerability import extract_cve
from git_vuln_finder.run import find
from git_vuln_finder.run import find_event

View file

@ -12,7 +12,7 @@
import sys
import git
import typing
from git_vuln_finder import get_patterns, find_vuln, summary
from git_vuln_finder import get_patterns, find_vuln, find_vuln_event, summary, summary_event
def find(
@ -95,3 +95,32 @@ def find(
found += 1
return all_potential_vulnerabilities, all_cve_found, found
def find_event(commit, element):
# Initialization of the variables for the results
found = 0
all_potential_vulnerabilities = {}
all_cve_found = set()
# Initialization of the patterns
patterns = get_patterns()
vulnpatterns = patterns["en"]["medium"]["vuln"]
cryptopatterns = patterns["en"]["medium"]["crypto"]
cpatterns = patterns["en"]["medium"]["c"]
defaultpattern = [vulnpatterns, cryptopatterns, cpatterns]
for p in defaultpattern:
ret = find_vuln_event(commit["message"], pattern=p)
if ret:
potential_vulnerabilities, cve_found = summary_event(
commit,
p,
element,
vuln_match=ret["match"]
)
all_potential_vulnerabilities.update(potential_vulnerabilities)
all_cve_found.update(cve_found)
found += 1
return all_potential_vulnerabilities, all_cve_found, found

View file

@ -31,6 +31,23 @@ def find_vuln(commit, pattern, verbose=False):
else:
return None
def find_vuln_event(commit_msg, pattern, verbose=False):
"""Find a potential vulnerability from a commit message thanks to a regex
pattern.
"""
m = pattern.search(commit_msg)
if m:
if verbose:
print("Match found: {}".format(m.group(0)), file=sys.stderr)
print(commit_msg, file=sys.stderr)
print("---", file=sys.stderr)
ret = {}
ret["commit"] = commit_msg
ret["match"] = m.groups()
return ret
else:
return None
def summary(
repo,
@ -103,6 +120,38 @@ def summary(
return rcommit.hexsha, potential_vulnerabilities, cve_found
def summary_event(
commit,
pattern,
element,
vuln_match=None,
commit_state="under-review"
):
potential_vulnerabilities = {}
cve, cve_found = extract_cve(commit["message"])
potential_vulnerabilities[commit["sha"]] = {}
potential_vulnerabilities[commit["sha"]]["repo_name"] = element["repo"]["name"]
potential_vulnerabilities[commit["sha"]]["message"] = commit["message"]
potential_vulnerabilities[commit["sha"]]["language"] = langdetect(commit["message"])
potential_vulnerabilities[commit["sha"]]["commit-id"] = commit["sha"]
potential_vulnerabilities[commit["sha"]]["author"] = commit["author"]["name"]
potential_vulnerabilities[commit["sha"]]["author-email"] = commit["author"]["email"]
potential_vulnerabilities[commit["sha"]]["authored_date"] = element["created_at"]
potential_vulnerabilities[commit["sha"]]["branches"] = element["payload"]["ref"]
potential_vulnerabilities[commit["sha"]]["pattern-selected"] = pattern.pattern
potential_vulnerabilities[commit["sha"]]["pattern-matches"] = vuln_match
potential_vulnerabilities[commit["sha"]]["origin-github-api"] = commit["url"]
if cve:
potential_vulnerabilities[commit["sha"]]["cve"] = cve
potential_vulnerabilities[commit["sha"]]["state"] = "cve-assigned"
else:
potential_vulnerabilities[commit["sha"]]["state"] = commit_state
return potential_vulnerabilities, cve_found
def extract_cve(commit):
cve_found = set()
cve_find = re.compile(r"CVE-[1-2]\d{1,4}-\d{1,7}", re.IGNORECASE)

48
tests/gharchive_test.json Normal file
View file

@ -0,0 +1,48 @@
[
{
"id": "19351512310",
"type": "PushEvent",
"actor": {
"id": 32466128,
"login": "DavidCruciani",
"display_login": "DavidCruciani",
"gravatar_id": "",
"url": "https://api.github.com/users/DavidCruciani",
"avatar_url": "https://avatars.githubusercontent.com/u/32466128?"
},
"repo": {
"id": 424660123,
"name": "ail-project/ail-feeder-gharchive",
"url": "https://api.github.com/repos/ail-project/ail-feeder-gharchive"
},
"payload": {
"push_id": 8628652926,
"size": 1,
"distinct_size": 1,
"ref": "refs/heads/main",
"head": "910ed71a2819546a3f3bcce1ebb9e3984a8c8d86",
"before": "40a9ef5dc6b2add5184a0a58401bfe9058faa8df",
"commits": [
{
"sha": "910ed71a2819546a3f3bcce1ebb9e3984a8c8d86",
"author": {
"email": "da.cruciani@laposte.net",
"name": "David Cruciani"
},
"message": "chg: [feeder] case sensitive",
"distinct": true,
"url": "https://api.github.com/repos/ail-project/ail-feeder-gharchive/commits/910ed71a2819546a3f3bcce1ebb9e3984a8c8d86"
}
]
},
"public": true,
"created_at": "2021-12-15T16:06:43Z",
"org": {
"id": 62389074,
"login": "ail-project",
"gravatar_id": "",
"url": "https://api.github.com/orgs/ail-project",
"avatar_url": "https://avatars.githubusercontent.com/u/62389074?"
}
}
]