#!/usr/bin/env python # -*- coding: utf-8 -*- # # Finding potential software vulnerabilities from git commit messages # # Software is free software released under the "GNU Affero General Public License v3.0" # # This software is part of cve-search.org # # Copyright (c) 2019-2020 Alexandre Dulaunoy - a@foo.be import re import sys from langdetect import detect as langdetect def find_vuln(commit, pattern, verbose=False): """Find a potential vulnerability from a commit message thanks to a regex pattern. """ m = pattern.search(commit.message) if m: if verbose: print("Match found: {}".format(m.group(0)), file=sys.stderr) print(commit.message, file=sys.stderr) print("---", file=sys.stderr) ret = {} ret["commit"] = commit ret["match"] = m.groups() return ret else: return None def find_vuln_event(commit_msg, pattern, verbose=False): """Find a potential vulnerability from a commit message thanks to a regex pattern. """ m = pattern.search(commit_msg) if m: if verbose: print("Match found: {}".format(m.group(0)), file=sys.stderr) print(commit_msg, file=sys.stderr) print("---", file=sys.stderr) ret = {} ret["commit"] = commit_msg ret["match"] = m.groups() return ret else: return None def summary( repo, commit, branch, tagmap, pattern, origin=None, vuln_match=None, tags_matching=False, commit_state="under-review", ): potential_vulnerabilities = {} rcommit = commit cve, cve_found = extract_cve(rcommit.message) if origin is not None: origin = origin if origin.find("github.com"): origin_github_api = origin.split(":")[1] (org_name, repo_name) = origin_github_api.split("/", 1) if repo_name.find(".git$"): repo_name = re.sub(r".git$", "", repo_name) origin_github_api = "https://api.github.com/repos/{}/{}/commits/{}".format( org_name, repo_name, rcommit.hexsha ) else: origin = "git origin unknown" # deduplication if similar commits on different branches if rcommit.hexsha in potential_vulnerabilities: potential_vulnerabilities[rcommit.hexsha]["branches"].append(branch) else: potential_vulnerabilities[rcommit.hexsha] = {} potential_vulnerabilities[rcommit.hexsha]["message"] = rcommit.message try: lang = langdetect(rcommit.message) except: lang = "unknown" potential_vulnerabilities[rcommit.hexsha]["language"] = lang potential_vulnerabilities[rcommit.hexsha]["commit-id"] = rcommit.hexsha potential_vulnerabilities[rcommit.hexsha]["summary"] = rcommit.summary potential_vulnerabilities[rcommit.hexsha]["stats"] = rcommit.stats.total potential_vulnerabilities[rcommit.hexsha]["author"] = rcommit.author.name potential_vulnerabilities[rcommit.hexsha]["author-email"] = rcommit.author.email potential_vulnerabilities[rcommit.hexsha][ "authored_date" ] = rcommit.authored_date potential_vulnerabilities[rcommit.hexsha][ "committed_date" ] = rcommit.committed_date potential_vulnerabilities[rcommit.hexsha]["branches"] = [] potential_vulnerabilities[rcommit.hexsha]["branches"].append(branch) potential_vulnerabilities[rcommit.hexsha]["pattern-selected"] = pattern.pattern potential_vulnerabilities[rcommit.hexsha]["pattern-matches"] = vuln_match potential_vulnerabilities[rcommit.hexsha]["origin"] = origin if origin_github_api: potential_vulnerabilities[commit.hexsha][ "origin-github-api" ] = origin_github_api potential_vulnerabilities[rcommit.hexsha]["tags"] = [] if tags_matching: if repo.commit(rcommit).hexsha in tagmap: potential_vulnerabilities[rcommit.hexsha]["tags"] = tagmap[ repo.commit(rcommit).hexsha ] if cve: potential_vulnerabilities[rcommit.hexsha]["cve"] = cve potential_vulnerabilities[rcommit.hexsha]["state"] = "cve-assigned" else: potential_vulnerabilities[rcommit.hexsha]["state"] = commit_state return rcommit.hexsha, potential_vulnerabilities, cve_found def summary_event( commit, pattern, element, vuln_match=None, commit_state="under-review" ): potential_vulnerabilities = {} cve, cve_found = extract_cve(commit["message"]) potential_vulnerabilities[commit["sha"]] = {} potential_vulnerabilities[commit["sha"]]["repo_name"] = element["repo"]["name"] potential_vulnerabilities[commit["sha"]]["message"] = commit["message"] potential_vulnerabilities[commit["sha"]]["language"] = langdetect(commit["message"]) potential_vulnerabilities[commit["sha"]]["commit-id"] = commit["sha"] potential_vulnerabilities[commit["sha"]]["author"] = commit["author"]["name"] potential_vulnerabilities[commit["sha"]]["author-email"] = commit["author"]["email"] potential_vulnerabilities[commit["sha"]]["authored_date"] = element["created_at"] potential_vulnerabilities[commit["sha"]]["branches"] = element["payload"]["ref"] potential_vulnerabilities[commit["sha"]]["pattern-selected"] = pattern.pattern potential_vulnerabilities[commit["sha"]]["pattern-matches"] = vuln_match potential_vulnerabilities[commit["sha"]]["origin-github-api"] = commit["url"] if cve: potential_vulnerabilities[commit["sha"]]["cve"] = cve potential_vulnerabilities[commit["sha"]]["state"] = "cve-assigned" else: potential_vulnerabilities[commit["sha"]]["state"] = commit_state return potential_vulnerabilities, cve_found def extract_cve(commit): cve_found = set() cve_find = re.compile(r"CVE-[1-2]\d{1,4}-\d{1,7}", re.IGNORECASE) m = cve_find.findall(commit) if m: for v in m: cve_found.add(v) return m, cve_found else: return None, set()