From df6c4c163eab777f063973abcd2ebae4c719f6be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bonhomme?= Date: Mon, 6 Jan 2020 22:30:14 +0100 Subject: [PATCH] moved find function used for the cli in a file run.py --- .gitignore | 2 +- README.md | 8 ++- bin/finder.py | 7 ++- git_vuln_finder/__init__.py | 2 +- git_vuln_finder/run.py | 97 ++++++++++++++++++++++++++++++++ git_vuln_finder/vulnerability.py | 80 -------------------------- tests/test_finder.py | 2 +- 7 files changed, 111 insertions(+), 87 deletions(-) create mode 100644 git_vuln_finder/run.py diff --git a/.gitignore b/.gitignore index 88e45aa..4a23fa9 100644 --- a/.gitignore +++ b/.gitignore @@ -19,7 +19,7 @@ dist/* .coverage .mypy_cache/ .cache/ -.test_repos/ +test_repos/ # sphinx docs/_build diff --git a/README.md b/README.md index c1d67bb..befd4c7 100644 --- a/README.md +++ b/README.md @@ -37,6 +37,7 @@ Type "help", "copyright", "credits" or "license" for more information. ['9069838b30fb3b48af0123e39f664cea683254a5', 'facb0e4662415b5f28163e853dc6742ac5fafb3d', ... snap ... '8a75dbeb2305297640453029b7905ef51b87e8dd', '1dc43de0dccc2ea7da6dddb7b98f8d7dcf323914', '192c4f788d48f82c03e9cef40013f34370e90737', '2eb8dcf26cb37f09cffe26909a646e702dbcab66', 'fa1ae0abcde5df8d0b3283299e3f246bedf7692c', 'c11c30a8c8d727dcf5634fa0cc6ee0b4b77ddc3d', '75ca568fa1c19de4c5358fed246686de8467c238', 'a20daf90e358c1476a325ea665d533f7a27e3364', '042cc1f69ec0878f542667cb684378869f859911'] + >>> print(json.dumps(all_potential_vulnerabilities['9069838b30fb3b48af0123e39f664cea683254a5'], sort_keys=True, indent=4, separators=(",", ": "))) { "author": "Daniel Stenberg", @@ -79,8 +80,9 @@ $ pipx install git-vuln-finder $ git-vuln-finder --help ~~~ -``pipx`` installs scripts (system wide available) provided by Python packages into -separate virtualenvs to shield them from your system and each other. +You can also use pip. +``pipx`` installs scripts (system wide available) provided by Python packages +into separate virtualenvs to shield them from your system and each other. ### Usage @@ -106,6 +108,7 @@ optional arguments: More info: https://github.com/cve-search/git-vuln-finder ~~~ + # Patterns git-vuln-finder comes with 3 default patterns which can be selected to find the potential vulnerabilities described in the commit messages such as: @@ -114,6 +117,7 @@ git-vuln-finder comes with 3 default patterns which can be selected to find the - [`cryptopatterns`](https://github.com/cve-search/git-vuln-finder/blob/master/patterns/en/medium/crypto) is a vulnerability pattern for cryptographic errors mentioned in commit messages. - [`cpatterns`](https://github.com/cve-search/git-vuln-finder/blob/master/patterns/en/medium/c) is a set of standard vulnerability patterns see for C/C++-like languages. + ## A sample partial output from Curl git repository ~~~bash diff --git a/bin/finder.py b/bin/finder.py index f8c692c..581a67b 100644 --- a/bin/finder.py +++ b/bin/finder.py @@ -56,8 +56,11 @@ def main(): parser.exit() all_potential_vulnerabilities, all_cve_found, found = find( - args.r, tags_matching=args.t, commit_state=args.s, verbose=args.v, - defaultpattern=args.p + args.r, + tags_matching=args.t, + commit_state=args.s, + verbose=args.v, + defaultpattern=args.p, ) if not args.c: diff --git a/git_vuln_finder/__init__.py b/git_vuln_finder/__init__.py index 8dd73fc..a2572bc 100644 --- a/git_vuln_finder/__init__.py +++ b/git_vuln_finder/__init__.py @@ -1,6 +1,6 @@ from git_vuln_finder.pattern import build_pattern from git_vuln_finder.pattern import get_patterns -from git_vuln_finder.vulnerability import find from git_vuln_finder.vulnerability import find_vuln from git_vuln_finder.vulnerability import summary from git_vuln_finder.vulnerability import extract_cve +from git_vuln_finder.run import find diff --git a/git_vuln_finder/run.py b/git_vuln_finder/run.py new file mode 100644 index 0000000..181633b --- /dev/null +++ b/git_vuln_finder/run.py @@ -0,0 +1,97 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Finding potential software vulnerabilities from git commit messages +# +# Software is free software released under the "GNU Affero General Public License v3.0" +# +# This software is part of cve-search.org +# +# Copyright (c) 2019-2020 Alexandre Dulaunoy - a@foo.be + +import sys +import git +import typing +from git_vuln_finder import get_patterns, find_vuln, summary + + +def find( + repo, + tags_matching=False, + commit_state="under-review", + verbose=False, + defaultpattern="all", +): + # Initialization of the variables for the results + repo = git.Repo(repo) + found = 0 + all_potential_vulnerabilities = {} + all_cve_found = set() + + # Initialization of the patterns + patterns = get_patterns() + vulnpatterns = patterns["en"]["medium"]["vuln"] + cryptopatterns = patterns["en"]["medium"]["crypto"] + cpatterns = patterns["en"]["medium"]["c"] + + if defaultpattern == "vulnpatterns": + defaultpattern = vulnpatterns + elif defaultpattern == "cryptopatterns": + defaultpattern = cryptopatterns + elif defaultpattern == "cpatterns": + defaultpattern = cpatterns + elif defaultpattern == "all": + defaultpattern = [vulnpatterns, cryptopatterns, cpatterns] + + repo_heads = repo.heads + repo_heads_names = [h.name for h in repo_heads] + print(repo_heads_names, file=sys.stderr) + origin = repo.remotes.origin.url + tagmap = {} + if tags_matching: + for t in repo.tags: + tagmap.setdefault(repo.commit(t).hexsha, []).append(str(t)) + + for branch in repo_heads_names: + commits = list(repo.iter_commits(branch)) + defaultpattern + for commit in commits: + if isinstance(defaultpattern, typing.Pattern): + ret = find_vuln(commit, pattern=defaultpattern, verbose=verbose) + if ret: + rcommit = ret["commit"] + _, potential_vulnerabilities, cve_found = summary( + repo, + rcommit, + branch, + tagmap, + defaultpattern, + origin=origin, + vuln_match=ret["match"], + tags_matching=tags_matching, + commit_state=commit_state, + ) + all_potential_vulnerabilities.update(potential_vulnerabilities) + all_cve_found.update(cve_found) + found += 1 + elif isinstance(defaultpattern, list): + for p in defaultpattern: + ret = find_vuln(commit, pattern=p, verbose=verbose) + if ret: + rcommit = ret["commit"] + _, potential_vulnerabilities, cve_found = summary( + repo, + rcommit, + branch, + tagmap, + p, + origin=origin, + vuln_match=ret["match"], + tags_matching=tags_matching, + commit_state=commit_state, + ) + all_potential_vulnerabilities.update(potential_vulnerabilities) + all_cve_found.update(cve_found) + found += 1 + + return all_potential_vulnerabilities, all_cve_found, found diff --git a/git_vuln_finder/vulnerability.py b/git_vuln_finder/vulnerability.py index 616f4e8..4581618 100644 --- a/git_vuln_finder/vulnerability.py +++ b/git_vuln_finder/vulnerability.py @@ -9,90 +9,10 @@ # # Copyright (c) 2019-2020 Alexandre Dulaunoy - a@foo.be -import git import re import sys -import typing from langdetect import detect as langdetect -from git_vuln_finder import get_patterns - - -def find(repo, tags_matching=False, commit_state="under-review", verbose=False, defaultpattern="all"): - # Initialization of the variables for the results - repo = git.Repo(repo) - found = 0 - all_potential_vulnerabilities = {} - all_cve_found = set() - - # Initialization of the patterns - patterns = get_patterns() - vulnpatterns = patterns["en"]["medium"]["vuln"] - cryptopatterns = patterns["en"]["medium"]["crypto"] - cpatterns = patterns["en"]["medium"]["c"] - - if defaultpattern == "vulnpatterns": - defaultpattern = vulnpatterns - elif defaultpattern == "cryptopatterns": - defaultpattern = cryptopatterns - elif defaultpattern == "cpatterns": - defaultpattern = cpatterns - elif defaultpattern == "all": - defaultpattern = [vulnpatterns, cryptopatterns, cpatterns] - - repo_heads = repo.heads - repo_heads_names = [h.name for h in repo_heads] - print(repo_heads_names, file=sys.stderr) - origin = repo.remotes.origin.url - tagmap = {} - if tags_matching: - for t in repo.tags: - tagmap.setdefault(repo.commit(t).hexsha, []).append(str(t)) - - for branch in repo_heads_names: - commits = list(repo.iter_commits(branch)) - defaultpattern - for commit in commits: - if isinstance(defaultpattern, typing.Pattern): - ret = find_vuln(commit, pattern=defaultpattern, verbose=verbose) - if ret: - rcommit = ret["commit"] - _, potential_vulnerabilities, cve_found = summary( - repo, - rcommit, - branch, - tagmap, - defaultpattern, - origin=origin, - vuln_match=ret["match"], - tags_matching=tags_matching, - commit_state=commit_state, - ) - all_potential_vulnerabilities.update(potential_vulnerabilities) - all_cve_found.update(cve_found) - found += 1 - elif isinstance(defaultpattern, list): - for p in defaultpattern: - ret = find_vuln(commit, pattern=p, verbose=verbose) - if ret: - rcommit = ret["commit"] - _, potential_vulnerabilities, cve_found = summary( - repo, - rcommit, - branch, - tagmap, - p, - origin=origin, - vuln_match=ret["match"], - tags_matching=tags_matching, - commit_state=commit_state, - ) - all_potential_vulnerabilities.update(potential_vulnerabilities) - all_cve_found.update(cve_found) - found += 1 - - return all_potential_vulnerabilities, all_cve_found, found - def find_vuln(commit, pattern, verbose=False): """Find a potential vulnerability from a commit message thanks to a regex diff --git a/tests/test_finder.py b/tests/test_finder.py index 4625ea2..a4f36d0 100644 --- a/tests/test_finder.py +++ b/tests/test_finder.py @@ -3,7 +3,7 @@ from git_vuln_finder import find -def test_find_vuln(clone_curl): +def test_find_vuln(): all_potential_vulnerabilities, all_cve_found, found = find("./test_repos/curl/") #assert len(list(all_cve_found)) == 64