mirror of
https://github.com/cve-search/git-vuln-finder.git
synced 2024-11-22 14:57:20 +00:00
wip, but seems to be working as a cli
This commit is contained in:
parent
01cedee91d
commit
dcb0fcc0a5
13 changed files with 55 additions and 238 deletions
26
README.md
26
README.md
|
@ -2,7 +2,11 @@
|
|||
|
||||
![git-vuln-finder logo](https://raw.githubusercontent.com/cve-search/git-vuln-finder/f22077452c37e110bff0564e1f7b34637dc726c3/doc/logos/git-vuln-finder-small.png)
|
||||
|
||||
Finding potential software vulnerabilities from git commit messages. The output format is a JSON with the associated commit which could contain a fix regarding a software vulnerability. The search is based on a set of regular expressions against the commit messages only. If CVE IDs are present, those are added automatically in the output.
|
||||
Finding potential software vulnerabilities from git commit messages.
|
||||
The output format is a JSON with the associated commit which could contain a
|
||||
fix regarding a software vulnerability. The search is based on a set of regular
|
||||
expressions against the commit messages only. If CVE IDs are present, those are
|
||||
added automatically in the output.
|
||||
|
||||
# Requirements
|
||||
|
||||
|
@ -10,6 +14,24 @@ Finding potential software vulnerabilities from git commit messages. The output
|
|||
- GitPython
|
||||
- langdetect
|
||||
|
||||
|
||||
# Installation
|
||||
|
||||
## Use in your Python software
|
||||
|
||||
~~~bash
|
||||
$ poetry install git-vuln-finder
|
||||
$ poetry shell
|
||||
~~~
|
||||
|
||||
## Use as a command line tool
|
||||
|
||||
~~~bash
|
||||
$ pipx install git-vuln-finder
|
||||
$ finder --help
|
||||
~~~
|
||||
|
||||
|
||||
# Usage
|
||||
|
||||
~~~bash
|
||||
|
@ -166,5 +188,3 @@ collectively own this open source software. The contributors acknowledge the [De
|
|||
- [Notes](https://gist.github.com/adulau/dce5a6ca5c65017869bb01dfee576303#file-finding-vuln-git-commit-messages-md)
|
||||
- https://csce.ucmss.com/cr/books/2017/LFS/CSREA2017/ICA2077.pdf (mainly using CVE referenced in the commit message) - archive (http://archive.is/xep9o)
|
||||
- https://asankhaya.github.io/pdf/automated-identification-of-security-issues-from-commit-messages-and-bug-reports.pdf (2 main regexps)
|
||||
|
||||
|
||||
|
|
|
@ -1,2 +0,0 @@
|
|||
gitpython
|
||||
langdetect
|
|
@ -1 +0,0 @@
|
|||
theme: jekyll-theme-minimal
|
169
bin/finder.py
169
bin/finder.py
|
@ -1,4 +1,4 @@
|
|||
#!/usr/bin/env python3
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Finding potential software vulnerabilities from git commit messages
|
||||
|
@ -10,16 +10,21 @@
|
|||
# Copyright (c) 2019 Alexandre Dulaunoy - a@foo.be
|
||||
|
||||
|
||||
import os
|
||||
import re
|
||||
import git
|
||||
import json
|
||||
import sys
|
||||
import argparse
|
||||
import typing
|
||||
from langdetect import detect as langdetect
|
||||
|
||||
PATTERNS_PATH="../patterns"
|
||||
from git_vuln_finder import (
|
||||
build_pattern,
|
||||
get_patterns,
|
||||
find_vuln,
|
||||
summary,
|
||||
extract_cve
|
||||
)
|
||||
|
||||
PATTERNS_PATH="./git_vuln_finder/patterns"
|
||||
|
||||
parser = argparse.ArgumentParser(description = "Finding potential software vulnerabilities from git commit messages.", epilog = "More info: https://github.com/cve-search/git-vuln-finder")
|
||||
parser.add_argument("-v", help="increase output verbosity", action="store_true")
|
||||
|
@ -32,63 +37,6 @@ parser.add_argument("-t", help="Include tags matching a specific commit", action
|
|||
args = parser.parse_args()
|
||||
|
||||
|
||||
def build_pattern(pattern_file):
|
||||
fp = open(pattern_file, "r")
|
||||
rex = ""
|
||||
try:
|
||||
prefix_fp = open(pattern_file + ".prefix", "r")
|
||||
rex += prefix_fp.read()
|
||||
prefix_fp.close()
|
||||
except:
|
||||
pass
|
||||
|
||||
for line in fp.readlines():
|
||||
rex += line.rstrip() + "|"
|
||||
rex = rex[:-1] # We remove the extra '|
|
||||
fp.close()
|
||||
|
||||
try:
|
||||
suffix_fp = open(pattern_file + ".suffix", "r")
|
||||
rex += suffix_fp.read()
|
||||
suffix_fp.close()
|
||||
except:
|
||||
pass
|
||||
|
||||
return rex
|
||||
|
||||
def get_patterns(patterns_path=PATTERNS_PATH):
|
||||
patterns = {}
|
||||
for root, dirs, files in os.walk(patterns_path):
|
||||
path = root.split(os.sep)
|
||||
for f in files:
|
||||
if f.endswith(".prefix") or f.endswith(".suffix"):
|
||||
continue
|
||||
npath = root[len(patterns_path):].split(os.sep)
|
||||
try:
|
||||
npath.remove('')
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
lang = npath[0]
|
||||
severity = npath[1]
|
||||
pattern_category = f
|
||||
|
||||
try: # FIXME: Is there a better way?
|
||||
a = patterns[lang]
|
||||
except KeyError:
|
||||
patterns[lang] = {}
|
||||
try:
|
||||
a = patterns[lang][severity]
|
||||
except KeyError:
|
||||
patterns[lang][severity] = {}
|
||||
try:
|
||||
a = patterns[lang][severity][pattern_category]
|
||||
except KeyError:
|
||||
rex = build_pattern(root + os.sep + f)
|
||||
patterns[lang][severity][pattern_category] = re.compile(rex)
|
||||
|
||||
return patterns
|
||||
|
||||
patterns = get_patterns()
|
||||
vulnpatterns = patterns["en"]["medium"]["vuln"]
|
||||
cryptopatterns = patterns["en"]["medium"]["crypto"]
|
||||
|
@ -114,79 +62,12 @@ else:
|
|||
|
||||
|
||||
found = 0
|
||||
potential_vulnerabilities = {}
|
||||
all_potential_vulnerabilities = {}
|
||||
cve_found = set()
|
||||
|
||||
def find_vuln(commit, pattern=vulnpatterns):
|
||||
m = pattern.search(commit.message)
|
||||
if m:
|
||||
if args.v:
|
||||
print("Match found: {}".format(m.group(0)), file=sys.stderr)
|
||||
print(commit.message, file=sys.stderr)
|
||||
print("---", file=sys.stderr)
|
||||
ret = {}
|
||||
ret['commit'] = commit
|
||||
ret['match'] = m.groups()
|
||||
return ret
|
||||
else:
|
||||
return None
|
||||
def main():
|
||||
pass
|
||||
|
||||
def summary(commit, branch, pattern, origin=None):
|
||||
rcommit = commit
|
||||
cve = extract_cve(rcommit.message)
|
||||
if origin is not None:
|
||||
origin = origin
|
||||
if origin.find('github.com'):
|
||||
origin_github_api = origin.split(':')[1]
|
||||
(org_name, repo_name) = origin_github_api.split('/', 1)
|
||||
if repo_name.find('.git$'):
|
||||
repo_name = re.sub(r".git$","", repo_name)
|
||||
origin_github_api = 'https://api.github.com/repos/{}/{}/commits/{}'.format(org_name, repo_name, rcommit.hexsha)
|
||||
|
||||
else:
|
||||
origin = 'git origin unknown'
|
||||
# deduplication if similar commits on different branches
|
||||
if rcommit.hexsha in potential_vulnerabilities:
|
||||
potential_vulnerabilities[rcommit.hexsha]['branches'].append(branch)
|
||||
else:
|
||||
potential_vulnerabilities[rcommit.hexsha] = {}
|
||||
potential_vulnerabilities[rcommit.hexsha]['message'] = rcommit.message
|
||||
potential_vulnerabilities[rcommit.hexsha]['language'] = langdetect(rcommit.message)
|
||||
potential_vulnerabilities[rcommit.hexsha]['commit-id'] = rcommit.hexsha
|
||||
potential_vulnerabilities[rcommit.hexsha]['summary'] = rcommit.summary
|
||||
potential_vulnerabilities[rcommit.hexsha]['stats'] = rcommit.stats.total
|
||||
potential_vulnerabilities[rcommit.hexsha]['author'] = rcommit.author.name
|
||||
potential_vulnerabilities[rcommit.hexsha]['author-email'] = rcommit.author.email
|
||||
potential_vulnerabilities[rcommit.hexsha]['authored_date'] = rcommit.authored_date
|
||||
potential_vulnerabilities[rcommit.hexsha]['committed_date'] = rcommit.committed_date
|
||||
potential_vulnerabilities[rcommit.hexsha]['branches'] = []
|
||||
potential_vulnerabilities[rcommit.hexsha]['branches'].append(branch)
|
||||
potential_vulnerabilities[rcommit.hexsha]['pattern-selected'] = pattern.pattern
|
||||
potential_vulnerabilities[rcommit.hexsha]['pattern-matches'] = ret['match']
|
||||
potential_vulnerabilities[rcommit.hexsha]['origin'] = origin
|
||||
if origin_github_api:
|
||||
potential_vulnerabilities[commit.hexsha]['origin-github-api'] = origin_github_api
|
||||
potential_vulnerabilities[rcommit.hexsha]['tags'] = []
|
||||
if args.t:
|
||||
if repo.commit(rcommit).hexsha in tagmap:
|
||||
potential_vulnerabilities[rcommit.hexsha]['tags'] = tagmap[repo.commit(rcommit).hexsha]
|
||||
if cve: potential_vulnerabilities[rcommit.hexsha]['cve'] = cve
|
||||
if cve:
|
||||
potential_vulnerabilities[rcommit.hexsha]['state'] = "cve-assigned"
|
||||
else:
|
||||
potential_vulnerabilities[rcommit.hexsha]['state'] = args.s
|
||||
|
||||
return rcommit.hexsha
|
||||
|
||||
def extract_cve(commit):
|
||||
cve_find = re.compile(r'CVE-[1-2]\d{1,4}-\d{1,7}', re.IGNORECASE)
|
||||
m = cve_find.findall(commit)
|
||||
if m:
|
||||
for v in m:
|
||||
cve_found.add(v)
|
||||
return m
|
||||
else:
|
||||
return None
|
||||
|
||||
repo_heads = repo.heads
|
||||
repo_heads_names = [h.name for h in repo_heads]
|
||||
|
@ -202,20 +83,34 @@ for branch in repo_heads_names:
|
|||
defaultpattern
|
||||
for commit in commits:
|
||||
if isinstance(defaultpattern, typing.Pattern):
|
||||
ret = find_vuln(commit, pattern=defaultpattern)
|
||||
ret = find_vuln(commit, pattern=defaultpattern, versbose=args.v)
|
||||
if ret:
|
||||
rcommit = ret['commit']
|
||||
summary(rcommit, branch, defaultpattern, origin=origin)
|
||||
_, potential_vulnerabilities = summary(rcommit,
|
||||
branch,
|
||||
defaultpattern,
|
||||
origin=origin,
|
||||
vuln_match=ret['match'],
|
||||
tags_matching=args.t,
|
||||
commit_state=args.s)
|
||||
all_potential_vulnerabilities.update(potential_vulnerabilities)
|
||||
found += 1
|
||||
elif isinstance(defaultpattern, list):
|
||||
for p in defaultpattern:
|
||||
ret = find_vuln(commit, pattern=p)
|
||||
ret = find_vuln(commit, pattern=p, versbose=args.v)
|
||||
if ret:
|
||||
rcommit = ret['commit']
|
||||
summary(rcommit, branch, p, origin=origin)
|
||||
_, potential_vulnerabilities = summary(rcommit,
|
||||
branch,
|
||||
p,
|
||||
origin=origin,
|
||||
vuln_match=ret['match'],
|
||||
tags_matching=args.t,
|
||||
commit_state=args.s)
|
||||
all_potential_vulnerabilities.update(potential_vulnerabilities)
|
||||
found += 1
|
||||
if not args.c:
|
||||
print(json.dumps(potential_vulnerabilities))
|
||||
print(json.dumps(all_potential_vulnerabilities))
|
||||
elif args.c:
|
||||
print(json.dumps(list(cve_found)))
|
||||
|
||||
|
|
|
@ -1,4 +0,0 @@
|
|||
double[-| ]free
|
||||
buffer overflow
|
||||
double free
|
||||
race[-| ]condition
|
|
@ -1 +0,0 @@
|
|||
(?i)(
|
|
@ -1 +0,0 @@
|
|||
)
|
|
@ -1,55 +0,0 @@
|
|||
assessment
|
||||
lack of
|
||||
bad
|
||||
vulnerable
|
||||
missing
|
||||
unproper
|
||||
unsuitable
|
||||
breakable
|
||||
broken
|
||||
weak
|
||||
incorrect
|
||||
replace
|
||||
assessment
|
||||
pen([\s-]?)test
|
||||
pentest
|
||||
penetration([\s-]?)test
|
||||
report
|
||||
vulnerablity
|
||||
replace
|
||||
fix
|
||||
issue
|
||||
fixes
|
||||
add
|
||||
remove
|
||||
check){s1,}
|
||||
(crypto
|
||||
cryptographic
|
||||
cryptography
|
||||
encipherement
|
||||
encryption
|
||||
ciphers
|
||||
cipher
|
||||
AES
|
||||
DES
|
||||
3DES
|
||||
cipher
|
||||
GPG
|
||||
PGP
|
||||
OpenSSL
|
||||
SSH
|
||||
wireguard
|
||||
VPN
|
||||
CBC
|
||||
ECB
|
||||
CTR
|
||||
key[.|,|\s]
|
||||
private([\s-]?)key
|
||||
public([\s-]?)key size
|
||||
length
|
||||
strenght
|
||||
generation
|
||||
randomness
|
||||
entropy
|
||||
prng
|
||||
rng
|
|
@ -1 +0,0 @@
|
|||
.*(
|
|
@ -1 +0,0 @@
|
|||
){1,}
|
|
@ -1,30 +0,0 @@
|
|||
denial of service
|
||||
\bXXE\b
|
||||
remote code execution
|
||||
\bopen redirect
|
||||
OSVDB
|
||||
\bvuln
|
||||
\bCVE\b
|
||||
\bXSS\b
|
||||
\bReDoS\b
|
||||
\bNVD\b
|
||||
malicious
|
||||
x−frame−options
|
||||
attack
|
||||
cross site
|
||||
exploit
|
||||
malicious
|
||||
directory traversal
|
||||
\bRCE\b
|
||||
\bdos\b
|
||||
\bXSRF \b
|
||||
\bXSS\b
|
||||
clickjack
|
||||
session.fixation
|
||||
hijack
|
||||
\badvisory
|
||||
\binsecure
|
||||
security
|
||||
\bcross−origin\b
|
||||
unauthori[z|s]ed
|
||||
infinite loop
|
|
@ -1 +0,0 @@
|
|||
(?i)(
|
|
@ -1 +0,0 @@
|
|||
)
|
Loading…
Reference in a new issue