wip, but seems to be working as a cli

This commit is contained in:
Cédric Bonhomme 2020-01-03 16:06:33 +01:00
parent 01cedee91d
commit dcb0fcc0a5
Signed by untrusted user who does not match committer: cedric
GPG key ID: A1CB94DE57B7A70D
13 changed files with 55 additions and 238 deletions

View file

@ -2,7 +2,11 @@
![git-vuln-finder logo](https://raw.githubusercontent.com/cve-search/git-vuln-finder/f22077452c37e110bff0564e1f7b34637dc726c3/doc/logos/git-vuln-finder-small.png)
Finding potential software vulnerabilities from git commit messages. The output format is a JSON with the associated commit which could contain a fix regarding a software vulnerability. The search is based on a set of regular expressions against the commit messages only. If CVE IDs are present, those are added automatically in the output.
Finding potential software vulnerabilities from git commit messages.
The output format is a JSON with the associated commit which could contain a
fix regarding a software vulnerability. The search is based on a set of regular
expressions against the commit messages only. If CVE IDs are present, those are
added automatically in the output.
# Requirements
@ -10,6 +14,24 @@ Finding potential software vulnerabilities from git commit messages. The output
- GitPython
- langdetect
# Installation
## Use in your Python software
~~~bash
$ poetry install git-vuln-finder
$ poetry shell
~~~
## Use as a command line tool
~~~bash
$ pipx install git-vuln-finder
$ finder --help
~~~
# Usage
~~~bash
@ -166,5 +188,3 @@ collectively own this open source software. The contributors acknowledge the [De
- [Notes](https://gist.github.com/adulau/dce5a6ca5c65017869bb01dfee576303#file-finding-vuln-git-commit-messages-md)
- https://csce.ucmss.com/cr/books/2017/LFS/CSREA2017/ICA2077.pdf (mainly using CVE referenced in the commit message) - archive (http://archive.is/xep9o)
- https://asankhaya.github.io/pdf/automated-identification-of-security-issues-from-commit-messages-and-bug-reports.pdf (2 main regexps)

View file

@ -1,2 +0,0 @@
gitpython
langdetect

View file

@ -1 +0,0 @@
theme: jekyll-theme-minimal

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Finding potential software vulnerabilities from git commit messages
@ -10,16 +10,21 @@
# Copyright (c) 2019 Alexandre Dulaunoy - a@foo.be
import os
import re
import git
import json
import sys
import argparse
import typing
from langdetect import detect as langdetect
PATTERNS_PATH="../patterns"
from git_vuln_finder import (
build_pattern,
get_patterns,
find_vuln,
summary,
extract_cve
)
PATTERNS_PATH="./git_vuln_finder/patterns"
parser = argparse.ArgumentParser(description = "Finding potential software vulnerabilities from git commit messages.", epilog = "More info: https://github.com/cve-search/git-vuln-finder")
parser.add_argument("-v", help="increase output verbosity", action="store_true")
@ -32,63 +37,6 @@ parser.add_argument("-t", help="Include tags matching a specific commit", action
args = parser.parse_args()
def build_pattern(pattern_file):
fp = open(pattern_file, "r")
rex = ""
try:
prefix_fp = open(pattern_file + ".prefix", "r")
rex += prefix_fp.read()
prefix_fp.close()
except:
pass
for line in fp.readlines():
rex += line.rstrip() + "|"
rex = rex[:-1] # We remove the extra '|
fp.close()
try:
suffix_fp = open(pattern_file + ".suffix", "r")
rex += suffix_fp.read()
suffix_fp.close()
except:
pass
return rex
def get_patterns(patterns_path=PATTERNS_PATH):
patterns = {}
for root, dirs, files in os.walk(patterns_path):
path = root.split(os.sep)
for f in files:
if f.endswith(".prefix") or f.endswith(".suffix"):
continue
npath = root[len(patterns_path):].split(os.sep)
try:
npath.remove('')
except ValueError:
pass
lang = npath[0]
severity = npath[1]
pattern_category = f
try: # FIXME: Is there a better way?
a = patterns[lang]
except KeyError:
patterns[lang] = {}
try:
a = patterns[lang][severity]
except KeyError:
patterns[lang][severity] = {}
try:
a = patterns[lang][severity][pattern_category]
except KeyError:
rex = build_pattern(root + os.sep + f)
patterns[lang][severity][pattern_category] = re.compile(rex)
return patterns
patterns = get_patterns()
vulnpatterns = patterns["en"]["medium"]["vuln"]
cryptopatterns = patterns["en"]["medium"]["crypto"]
@ -114,79 +62,12 @@ else:
found = 0
potential_vulnerabilities = {}
all_potential_vulnerabilities = {}
cve_found = set()
def find_vuln(commit, pattern=vulnpatterns):
m = pattern.search(commit.message)
if m:
if args.v:
print("Match found: {}".format(m.group(0)), file=sys.stderr)
print(commit.message, file=sys.stderr)
print("---", file=sys.stderr)
ret = {}
ret['commit'] = commit
ret['match'] = m.groups()
return ret
else:
return None
def main():
pass
def summary(commit, branch, pattern, origin=None):
rcommit = commit
cve = extract_cve(rcommit.message)
if origin is not None:
origin = origin
if origin.find('github.com'):
origin_github_api = origin.split(':')[1]
(org_name, repo_name) = origin_github_api.split('/', 1)
if repo_name.find('.git$'):
repo_name = re.sub(r".git$","", repo_name)
origin_github_api = 'https://api.github.com/repos/{}/{}/commits/{}'.format(org_name, repo_name, rcommit.hexsha)
else:
origin = 'git origin unknown'
# deduplication if similar commits on different branches
if rcommit.hexsha in potential_vulnerabilities:
potential_vulnerabilities[rcommit.hexsha]['branches'].append(branch)
else:
potential_vulnerabilities[rcommit.hexsha] = {}
potential_vulnerabilities[rcommit.hexsha]['message'] = rcommit.message
potential_vulnerabilities[rcommit.hexsha]['language'] = langdetect(rcommit.message)
potential_vulnerabilities[rcommit.hexsha]['commit-id'] = rcommit.hexsha
potential_vulnerabilities[rcommit.hexsha]['summary'] = rcommit.summary
potential_vulnerabilities[rcommit.hexsha]['stats'] = rcommit.stats.total
potential_vulnerabilities[rcommit.hexsha]['author'] = rcommit.author.name
potential_vulnerabilities[rcommit.hexsha]['author-email'] = rcommit.author.email
potential_vulnerabilities[rcommit.hexsha]['authored_date'] = rcommit.authored_date
potential_vulnerabilities[rcommit.hexsha]['committed_date'] = rcommit.committed_date
potential_vulnerabilities[rcommit.hexsha]['branches'] = []
potential_vulnerabilities[rcommit.hexsha]['branches'].append(branch)
potential_vulnerabilities[rcommit.hexsha]['pattern-selected'] = pattern.pattern
potential_vulnerabilities[rcommit.hexsha]['pattern-matches'] = ret['match']
potential_vulnerabilities[rcommit.hexsha]['origin'] = origin
if origin_github_api:
potential_vulnerabilities[commit.hexsha]['origin-github-api'] = origin_github_api
potential_vulnerabilities[rcommit.hexsha]['tags'] = []
if args.t:
if repo.commit(rcommit).hexsha in tagmap:
potential_vulnerabilities[rcommit.hexsha]['tags'] = tagmap[repo.commit(rcommit).hexsha]
if cve: potential_vulnerabilities[rcommit.hexsha]['cve'] = cve
if cve:
potential_vulnerabilities[rcommit.hexsha]['state'] = "cve-assigned"
else:
potential_vulnerabilities[rcommit.hexsha]['state'] = args.s
return rcommit.hexsha
def extract_cve(commit):
cve_find = re.compile(r'CVE-[1-2]\d{1,4}-\d{1,7}', re.IGNORECASE)
m = cve_find.findall(commit)
if m:
for v in m:
cve_found.add(v)
return m
else:
return None
repo_heads = repo.heads
repo_heads_names = [h.name for h in repo_heads]
@ -202,20 +83,34 @@ for branch in repo_heads_names:
defaultpattern
for commit in commits:
if isinstance(defaultpattern, typing.Pattern):
ret = find_vuln(commit, pattern=defaultpattern)
ret = find_vuln(commit, pattern=defaultpattern, versbose=args.v)
if ret:
rcommit = ret['commit']
summary(rcommit, branch, defaultpattern, origin=origin)
_, potential_vulnerabilities = summary(rcommit,
branch,
defaultpattern,
origin=origin,
vuln_match=ret['match'],
tags_matching=args.t,
commit_state=args.s)
all_potential_vulnerabilities.update(potential_vulnerabilities)
found += 1
elif isinstance(defaultpattern, list):
for p in defaultpattern:
ret = find_vuln(commit, pattern=p)
ret = find_vuln(commit, pattern=p, versbose=args.v)
if ret:
rcommit = ret['commit']
summary(rcommit, branch, p, origin=origin)
_, potential_vulnerabilities = summary(rcommit,
branch,
p,
origin=origin,
vuln_match=ret['match'],
tags_matching=args.t,
commit_state=args.s)
all_potential_vulnerabilities.update(potential_vulnerabilities)
found += 1
if not args.c:
print(json.dumps(potential_vulnerabilities))
print(json.dumps(all_potential_vulnerabilities))
elif args.c:
print(json.dumps(list(cve_found)))

View file

@ -1,4 +0,0 @@
double[-| ]free
buffer overflow
double free
race[-| ]condition

View file

@ -1 +0,0 @@
(?i)(

View file

@ -1 +0,0 @@
)

View file

@ -1,55 +0,0 @@
assessment
lack of
bad
vulnerable
missing
unproper
unsuitable
breakable
broken
weak
incorrect
replace
assessment
pen([\s-]?)test
pentest
penetration([\s-]?)test
report
vulnerablity
replace
fix
issue
fixes
add
remove
check){s1,}
(crypto
cryptographic
cryptography
encipherement
encryption
ciphers
cipher
AES
DES
3DES
cipher
GPG
PGP
OpenSSL
SSH
wireguard
VPN
CBC
ECB
CTR
key[.|,|\s]
private([\s-]?)key
public([\s-]?)key size
length
strenght
generation
randomness
entropy
prng
rng

View file

@ -1 +0,0 @@
.*(

View file

@ -1 +0,0 @@
){1,}

View file

@ -1,30 +0,0 @@
denial of service
\bXXE\b
remote code execution
\bopen redirect
OSVDB
\bvuln
\bCVE\b
\bXSS\b
\bReDoS\b
\bNVD\b
malicious
xframeoptions
attack
cross site
exploit
malicious
directory traversal
\bRCE\b
\bdos\b
\bXSRF \b
\bXSS\b
clickjack
session.fixation
hijack
\badvisory
\binsecure
security
\bcrossorigin\b
unauthori[z|s]ed
infinite loop

View file

@ -1 +0,0 @@
(?i)(

View file

@ -1 +0,0 @@
)