mirror of
https://github.com/MISP/misp-galaxy.git
synced 2024-11-22 23:07:19 +00:00
Add [generator] statistics
This commit is contained in:
parent
590554cb0f
commit
29b39f55d7
1 changed files with 235 additions and 82 deletions
|
@ -1,39 +1,29 @@
|
|||
#!/usr/bin/python
|
||||
|
||||
import json
|
||||
import operator
|
||||
import os
|
||||
import re
|
||||
from typing import List
|
||||
|
||||
import validators
|
||||
|
||||
CLUSTER_PATH = '../../clusters'
|
||||
SITE_PATH = './site/docs'
|
||||
PROJECTS_PATH = './site/projects'
|
||||
# PROJECTS_PATH = './site/projects'
|
||||
|
||||
galaxies_fnames = []
|
||||
FILES_TO_IGNORE = [] # if you want to skip a specific cluster in the generation
|
||||
|
||||
for f in os.listdir(CLUSTER_PATH):
|
||||
if '.json' in f and f not in FILES_TO_IGNORE:
|
||||
galaxies_fnames.append(f)
|
||||
|
||||
galaxies_fnames.sort()
|
||||
galaxy_output = {}
|
||||
|
||||
# Variables for statistics
|
||||
public_relations_count = 0
|
||||
private_relations_count = 0
|
||||
|
||||
private_clusters = []
|
||||
public_clusters = []
|
||||
|
||||
public_clusters_dict = {}
|
||||
relation_count_dict = {}
|
||||
synonyms_count_dict = {}
|
||||
|
||||
empty_uuids_dict = {}
|
||||
|
||||
|
||||
intro = """
|
||||
INTRO = """
|
||||
# MISP Galaxy
|
||||
|
||||
The MISP galaxy offers a streamlined approach for representing large entities, known as clusters, which can be linked to MISP events or attributes. Each cluster consists of one or more elements, represented as key-value pairs. MISP galaxy comes with a default knowledge base, encompassing areas like Threat Actors, Tools, Ransomware, and ATT&CK matrices. However, users have the flexibility to modify, update, replace, or share these elements according to their needs.
|
||||
|
@ -51,7 +41,15 @@ Clusters serve as an open and freely accessible knowledge base, which can be uti
|
|||
## Publicly available clusters
|
||||
|
||||
"""
|
||||
contributing = """
|
||||
|
||||
STATISTICS= """
|
||||
## Statistics
|
||||
|
||||
You can find some statistics about MISP galaxies [here](./statistics.md).
|
||||
|
||||
"""
|
||||
|
||||
CONTRIBUTING = """
|
||||
|
||||
# Contributing
|
||||
|
||||
|
@ -95,6 +93,7 @@ class Galaxy():
|
|||
self.entry += f' |{author}|\n'
|
||||
|
||||
def _create_clusters(self):
|
||||
# global public_clusters_dict
|
||||
clusters = []
|
||||
for cluster in self.cluster_list:
|
||||
clusters.append(Cluster(
|
||||
|
@ -104,22 +103,30 @@ class Galaxy():
|
|||
date=cluster.get('date', None),
|
||||
related_list=cluster.get('related', None),
|
||||
meta=cluster.get('meta', None),
|
||||
galaxie=self.name
|
||||
galaxie=self.json_file_name
|
||||
))
|
||||
return clusters
|
||||
|
||||
def _create_clusters_entry(self):
|
||||
def _create_clusters_entry(self, cluster_dict):
|
||||
for cluster in self.clusters:
|
||||
self.entry += cluster.create_entry()
|
||||
self.entry += cluster.create_entry(cluster_dict)
|
||||
|
||||
def create_entry(self):
|
||||
def create_entry(self, cluster_dict):
|
||||
self._create_metadata_entry()
|
||||
self._create_title_entry()
|
||||
self._create_description_entry()
|
||||
self._create_authors_entry()
|
||||
self._create_clusters_entry()
|
||||
self._create_clusters_entry(cluster_dict)
|
||||
return self.entry
|
||||
|
||||
def write_entry(self, path, cluster_dict):
|
||||
self.create_entry(cluster_dict)
|
||||
galaxy_path = os.path.join(path, self.json_file_name)
|
||||
if not os.path.exists(galaxy_path):
|
||||
os.mkdir(galaxy_path)
|
||||
with open(os.path.join(galaxy_path, 'index.md'), "w") as index:
|
||||
index.write(self.entry)
|
||||
|
||||
class Cluster():
|
||||
def __init__(self, description, uuid, date, value, related_list, meta, galaxie):
|
||||
self.description = description
|
||||
|
@ -131,6 +138,10 @@ class Cluster():
|
|||
self.entry = ""
|
||||
self.galaxie = galaxie
|
||||
|
||||
global public_clusters_dict
|
||||
if self.galaxie:
|
||||
public_clusters_dict[self.uuid] = self.galaxie
|
||||
|
||||
def _create_title_entry(self):
|
||||
self.entry += f'## {self.value}\n'
|
||||
self.entry += f'\n'
|
||||
|
@ -189,10 +200,9 @@ class Cluster():
|
|||
self.entry += f' | {meta} | {self.meta[meta]} |\n'
|
||||
|
||||
|
||||
def get_related_clusters(self, depth=-1, visited=None):
|
||||
def get_related_clusters(self, cluster_dict, depth=-1, visited=None):
|
||||
global public_relations_count
|
||||
global private_relations_count
|
||||
global public_clusters
|
||||
global private_clusters
|
||||
global empty_uuids_dict
|
||||
empty_uuids = 0
|
||||
|
@ -224,14 +234,12 @@ class Cluster():
|
|||
related_cluster = cluster_dict[dest_uuid]
|
||||
|
||||
public_relations_count += 1
|
||||
if dest_uuid not in public_clusters:
|
||||
public_clusters.append(dest_uuid)
|
||||
|
||||
related_clusters.append((self, related_cluster))
|
||||
|
||||
if (depth > 1 or depth == -1) and related_cluster.uuid not in visited:
|
||||
new_depth = depth - 1 if depth > 1 else -1
|
||||
related_clusters += related_cluster.get_related_clusters(depth=new_depth, visited=visited)
|
||||
related_clusters += related_cluster.get_related_clusters(depth=new_depth, visited=visited, cluster_dict=cluster_dict)
|
||||
|
||||
if empty_uuids > 0:
|
||||
empty_uuids_dict[self.value] = empty_uuids
|
||||
|
@ -242,38 +250,198 @@ class Cluster():
|
|||
|
||||
return related_clusters
|
||||
|
||||
def _create_related_entry(self):
|
||||
if self.related_list and cluster_dict:
|
||||
related_clusters = self.get_related_clusters()
|
||||
self.entry += f'\n'
|
||||
self.entry += f'??? info "Related clusters"\n'
|
||||
self.entry += f'\n'
|
||||
self.entry += f' ```mermaid\n'
|
||||
self.entry += f' graph TD\n'
|
||||
# def _create_related_entry(self, cluster_dict):
|
||||
# if self.related_list and cluster_dict:
|
||||
# related_clusters = self.get_related_clusters(cluster_dict)
|
||||
# self.entry += f'\n'
|
||||
# self.entry += f'??? info "Related clusters"\n'
|
||||
# self.entry += f'\n'
|
||||
# self.entry += f' ```mermaid\n'
|
||||
# self.entry += f' graph TD\n'
|
||||
|
||||
global relation_count_dict
|
||||
relation_count = 0
|
||||
# global relation_count_dict
|
||||
# relation_count = 0
|
||||
|
||||
for relation in related_clusters:
|
||||
relation_count += 1
|
||||
# print(self.value)
|
||||
# print(relation)
|
||||
# print(relation[0].value)
|
||||
# print(relation[1].value)
|
||||
self.entry += f' {relation[0].uuid}[{relation[0].value}] --- {relation[1].uuid}[{relation[1].value}]\n'
|
||||
self.entry += f' ```\n'
|
||||
relation_count_dict[self.value] = relation_count
|
||||
# for relation in related_clusters:
|
||||
# relation_count += 1
|
||||
# self.entry += f' {relation[0].uuid}[{relation[0].value}] --- {relation[1].uuid}[{relation[1].value}]\n'
|
||||
# self.entry += f' ```\n'
|
||||
# relation_count_dict[self.value] = relation_count
|
||||
|
||||
def create_entry(self):
|
||||
def _get_related_entry(self, relations):
|
||||
output = ""
|
||||
output += f'## Related clusters for {self.value}\n'
|
||||
output += f'\n'
|
||||
return output
|
||||
|
||||
def create_entry(self, cluster_dict):
|
||||
self._create_title_entry()
|
||||
self._create_description_entry()
|
||||
self._create_synonyms_entry()
|
||||
self._create_uuid_entry()
|
||||
self._create_refs_entry()
|
||||
self._create_associated_metadata_entry()
|
||||
self._create_related_entry()
|
||||
# self._create_related_entry(cluster_dict)
|
||||
if self.related_list:
|
||||
self._write_relations(cluster_dict, SITE_PATH)
|
||||
return self.entry
|
||||
|
||||
def _write_relations(self, cluster_dict, path):
|
||||
related_clusters = self.get_related_clusters(cluster_dict)
|
||||
global relation_count_dict
|
||||
relation_count_dict[self.value] = len(related_clusters)
|
||||
galaxy_path = os.path.join(path, self.galaxie)
|
||||
if not os.path.exists(galaxy_path):
|
||||
os.mkdir(galaxy_path)
|
||||
with open(os.path.join(galaxy_path, f'{self.uuid}.md'), "w") as index:
|
||||
index.write(self._get_related_entry(related_clusters))
|
||||
|
||||
def create_index(galaxies):
|
||||
index_output = INTRO
|
||||
index_output += STATISTICS
|
||||
for galaxie in galaxies:
|
||||
index_output += f'- [{galaxie.name}](./{galaxie.json_file_name}/index.md)\n'
|
||||
index_output += CONTRIBUTING
|
||||
return index_output
|
||||
|
||||
def create_galaxies(galaxies, cluster_dict):
|
||||
galaxy_output = {}
|
||||
for galaxie in galaxies:
|
||||
galaxy_output[galaxie.json_file_name] = galaxie.create_entry(cluster_dict)
|
||||
return galaxy_output
|
||||
|
||||
def create_xy_chart(title, width, height, x_axis, y_axis, bar):
|
||||
output = ""
|
||||
output += f'```mermaid\n'
|
||||
output += f'---\n'
|
||||
output += f'config:\n'
|
||||
output += f' xyChart:\n'
|
||||
output += f' width: {width}\n'
|
||||
output += f' height: {height}\n'
|
||||
output += f'---\n'
|
||||
output += f'xychart-beta\n'
|
||||
output += f' title "{title}"\n'
|
||||
output += f' x-axis [{x_axis}]\n'
|
||||
output += f' y-axis "{y_axis}"\n'
|
||||
output += f' bar {bar}\n'
|
||||
output += f'```\n'
|
||||
output += f'\n'
|
||||
return output
|
||||
|
||||
def create_statistics():
|
||||
statistic_output = ""
|
||||
|
||||
statistic_output += f'# Cluster statistics\n'
|
||||
statistic_output += f'## Number of clusters\n'
|
||||
statistic_output += f'```mermaid\n'
|
||||
statistic_output += f"pie showData\n"
|
||||
statistic_output += f' title Number of clusters\n'
|
||||
statistic_output += f' "Public clusters" : {len(public_clusters_dict)}\n'
|
||||
statistic_output += f' "Private clusters" : {len(private_clusters)}\n'
|
||||
statistic_output += f'```\n'
|
||||
statistic_output += f'\n'
|
||||
|
||||
statistic_output += f'## Galaxies with the most clusters\n'
|
||||
galaxy_counts = {}
|
||||
for galaxy in public_clusters_dict.values():
|
||||
galaxy_counts[galaxy] = galaxy_counts.get(galaxy, 0) + 1
|
||||
sorted_galaxies = sorted(galaxy_counts, key=galaxy_counts.get, reverse=True)[:15]
|
||||
top_15_galaxies = [re.sub(r"[^A-Za-z0-9 ]", "", galaxy) for galaxy in sorted_galaxies]
|
||||
top_15_galaxies = ", ".join(top_15_galaxies)
|
||||
top_15_galaxies_values = sorted(galaxy_counts.values(), reverse=True)[:15]
|
||||
statistic_output += create_xy_chart("Galaxies with the most clusters", 1800, 500, top_15_galaxies, "Number of clusters", top_15_galaxies_values)
|
||||
|
||||
statistic_output += f'## Galaxies with the least clusters\n'
|
||||
sorted_galaxies = sorted(galaxy_counts, key=galaxy_counts.get)[:15]
|
||||
top_15_galaxies = [re.sub(r"[^A-Za-z0-9 ]", "", galaxy) for galaxy in sorted_galaxies]
|
||||
top_15_galaxies = ", ".join(top_15_galaxies)
|
||||
top_15_galaxies_values = sorted(galaxy_counts.values())[:15]
|
||||
statistic_output += create_xy_chart("Galaxies with the least clusters", 1800, 500, top_15_galaxies, "Number of clusters", top_15_galaxies_values)
|
||||
|
||||
galaxy_number = 0
|
||||
for galaxy in public_clusters_dict.values():
|
||||
galaxy_number += 1
|
||||
statistic_output += f'**Average number of clusters per galaxy**: {len(public_clusters_dict) / galaxy_number}\n'
|
||||
|
||||
statistic_output += f'# Relation statistics\n'
|
||||
statistic_output += f'## Number of relations\n'
|
||||
statistic_output += f'```mermaid\n'
|
||||
statistic_output += f"pie showData\n"
|
||||
statistic_output += f' title Number of relations\n'
|
||||
statistic_output += f' "Public relations" : {public_relations_count}\n'
|
||||
statistic_output += f' "Private relations" : {private_relations_count}\n'
|
||||
statistic_output += f'```\n'
|
||||
statistic_output += f'\n'
|
||||
|
||||
statistic_output += f'**Average number of relations per cluster**: {sum(relation_count_dict.values()) / len(relation_count_dict)}\n'
|
||||
|
||||
statistic_output += f'## Cluster with the most relations\n'
|
||||
sorted_relation = sorted(relation_count_dict.items(), key=operator.itemgetter(1), reverse=True)[:15]
|
||||
top_15_relation = [re.sub(r"[^A-Za-z0-9 ]", "", key) for key, value in sorted_relation]
|
||||
top_15_relation = ", ".join(top_15_relation)
|
||||
top_15_relation_values = sorted(relation_count_dict.values(), reverse=True)[:15]
|
||||
statistic_output += create_xy_chart("Cluster with the most relations", 2000, 500, top_15_relation, "Number of relations", top_15_relation_values)
|
||||
|
||||
statistic_output += f'## Cluster with the least relations\n'
|
||||
sorted_relation = sorted(relation_count_dict.items(), key=operator.itemgetter(1))[:15]
|
||||
top_15_relation = [re.sub(r"[^A-Za-z0-9 ]", "", key) for key, value in sorted_relation]
|
||||
top_15_relation = ", ".join(top_15_relation)
|
||||
top_15_relation_values = sorted(relation_count_dict.values())[:15]
|
||||
statistic_output += create_xy_chart("Cluster with the least relations", 2000, 500, top_15_relation, "Number of relations", top_15_relation_values)
|
||||
|
||||
statistic_output += f'# Synonyms statistics\n'
|
||||
statistic_output += f'## Cluster with the most synonyms\n'
|
||||
sorted_synonyms = sorted(synonyms_count_dict.items(), key=operator.itemgetter(1), reverse=True)[:15]
|
||||
top_15_synonyms = [re.sub(r"[^A-Za-z0-9 ]", "", key) for key, value in sorted_synonyms]
|
||||
top_15_synonyms = ", ".join(top_15_synonyms)
|
||||
top_15_synonyms_values = sorted(synonyms_count_dict.values(), reverse=True)[:15]
|
||||
statistic_output += create_xy_chart("Cluster with the most synonyms", 1800, 500, top_15_synonyms, "Number of synonyms", top_15_synonyms_values)
|
||||
|
||||
statistic_output += f'## Cluster with the least synonyms\n'
|
||||
sorted_synonyms = sorted(synonyms_count_dict.items(), key=operator.itemgetter(1))[:15]
|
||||
top_15_synonyms = [re.sub(r"[^A-Za-z0-9 ]", "", key) for key, value in sorted_synonyms]
|
||||
top_15_synonyms = ", ".join(top_15_synonyms)
|
||||
top_15_synonyms_values = sorted(synonyms_count_dict.values())[:15]
|
||||
statistic_output += create_xy_chart("Cluster with the least synonyms", 1800, 500, top_15_synonyms, "Number of synonyms", top_15_synonyms_values)
|
||||
|
||||
statistic_output += f'# Empty UUIDs statistics\n'
|
||||
statistic_output += f'**Number of empty UUIDs**: {sum(empty_uuids_dict.values())}\n'
|
||||
statistic_output += f'\n'
|
||||
statistic_output += f'**Empty UUIDs per cluster**: {empty_uuids_dict}\n'
|
||||
|
||||
|
||||
print(f"Public relations: {public_relations_count}")
|
||||
print(f"Private relations: {private_relations_count}")
|
||||
print(f"Total relations: {public_relations_count + private_relations_count}")
|
||||
print(f"Percetage of private relations: {private_relations_count / (public_relations_count + private_relations_count) * 100}%")
|
||||
print(f"Private clusters: {len(private_clusters)}")
|
||||
print(f"Public clusters: {len(public_clusters_dict)}")
|
||||
print(f"Total clusters: {len(private_clusters) + len(public_clusters_dict)}")
|
||||
print(f"Percentage of private clusters: {len(private_clusters) / (len(private_clusters) + len(public_clusters_dict)) * 100}%")
|
||||
print(f"Average number of relations per cluster: {sum(relation_count_dict.values()) / len(relation_count_dict)}")
|
||||
print(f"Max number of relations per cluster: {max(relation_count_dict.values())} from {max(relation_count_dict, key=relation_count_dict.get)}")
|
||||
print(f"Min number of relations per cluster: {min(relation_count_dict.values())} from {min(relation_count_dict, key=relation_count_dict.get)}")
|
||||
print(f"Average number of synonyms per cluster: {sum(synonyms_count_dict.values()) / len(synonyms_count_dict)}")
|
||||
print(f"Max number of synonyms per cluster: {max(synonyms_count_dict.values())} from {max(synonyms_count_dict, key=synonyms_count_dict.get)}")
|
||||
print(f"Min number of synonyms per cluster: {min(synonyms_count_dict.values())} from {min(synonyms_count_dict, key=synonyms_count_dict.get)}")
|
||||
print(f"Number of empty UUIDs: {sum(empty_uuids_dict.values())}")
|
||||
print(f"Empty UUIDs per cluster: {empty_uuids_dict}")
|
||||
print(sorted(relation_count_dict.items(), key=operator.itemgetter(1), reverse=True)[:30])
|
||||
lol = [re.sub(r"[^A-Za-z0-9 ]", "", key) for key, value in sorted_relation]
|
||||
print(", ".join(lol))
|
||||
|
||||
|
||||
return statistic_output
|
||||
|
||||
def main():
|
||||
galaxies_fnames = []
|
||||
for f in os.listdir(CLUSTER_PATH):
|
||||
if '.json' in f and f not in FILES_TO_IGNORE:
|
||||
galaxies_fnames.append(f)
|
||||
|
||||
galaxies_fnames.sort()
|
||||
galaxy_output = {}
|
||||
|
||||
galaxies = []
|
||||
for galaxy in galaxies_fnames:
|
||||
with open(os.path.join(CLUSTER_PATH, galaxy)) as fr:
|
||||
|
@ -285,52 +453,37 @@ for galaxy in galaxies:
|
|||
for cluster in galaxy.clusters:
|
||||
cluster_dict[cluster.uuid] = cluster
|
||||
|
||||
def create_index(intro, contributing, galaxies):
|
||||
index_output = intro
|
||||
for galaxie in galaxies:
|
||||
index_output += f'- [{galaxie.name}](./{galaxie.json_file_name}/index.md)\n'
|
||||
index_output += contributing
|
||||
return index_output
|
||||
|
||||
def create_galaxies(galaxies):
|
||||
galaxy_output = {}
|
||||
for galaxie in galaxies:
|
||||
galaxy_output[galaxie.json_file_name] = galaxie.create_entry()
|
||||
return galaxy_output
|
||||
|
||||
if __name__ == "__main__":
|
||||
index_output = create_index(intro, contributing, galaxies)
|
||||
galaxy_output = create_galaxies(galaxies)
|
||||
|
||||
# Write files
|
||||
if not os.path.exists(SITE_PATH):
|
||||
os.mkdir(SITE_PATH)
|
||||
|
||||
for galaxy in galaxies:
|
||||
galaxy.write_entry(SITE_PATH, cluster_dict)
|
||||
|
||||
index_output = create_index(galaxies)
|
||||
# galaxy_output = create_galaxies(galaxies, cluster_dict)
|
||||
statistic_output = create_statistics()
|
||||
|
||||
with open(os.path.join(SITE_PATH, 'index.md'), "w") as index:
|
||||
index.write(index_output)
|
||||
|
||||
for f in galaxies_fnames:
|
||||
cluster_filename = f.split('.')[0]
|
||||
pathSiteCluster = os.path.join(SITE_PATH, cluster_filename)
|
||||
if not os.path.exists(pathSiteCluster):
|
||||
os.mkdir(pathSiteCluster)
|
||||
with open(os.path.join(pathSiteCluster, 'index.md'), "w") as index:
|
||||
index.write(galaxy_output[cluster_filename])
|
||||
with open(os.path.join(SITE_PATH, 'statistics.md'), "w") as index:
|
||||
index.write(statistic_output)
|
||||
|
||||
print(f"Public relations: {public_relations_count}")
|
||||
print(f"Private relations: {private_relations_count}")
|
||||
print(f"Total relations: {public_relations_count + private_relations_count}")
|
||||
print(f"Percetage of private relations: {private_relations_count / (public_relations_count + private_relations_count) * 100}%")
|
||||
print(f"Private clusters: {len(private_clusters)}")
|
||||
print(f"Public clusters: {len(public_clusters)}")
|
||||
print(f"Total clusters: {len(private_clusters) + len(public_clusters)}")
|
||||
print(f"Percentage of private clusters: {len(private_clusters) / (len(private_clusters) + len(public_clusters)) * 100}%")
|
||||
print(f"Average number of relations per cluster: {sum(relation_count_dict.values()) / len(relation_count_dict)}")
|
||||
print(f"Max number of relations per cluster: {max(relation_count_dict.values())} from {max(relation_count_dict, key=relation_count_dict.get)}")
|
||||
print(f"Min number of relations per cluster: {min(relation_count_dict.values())} from {min(relation_count_dict, key=relation_count_dict.get)}")
|
||||
print(f"Average number of synonyms per cluster: {sum(synonyms_count_dict.values()) / len(synonyms_count_dict)}")
|
||||
print(f"Max number of synonyms per cluster: {max(synonyms_count_dict.values())} from {max(synonyms_count_dict, key=synonyms_count_dict.get)}")
|
||||
print(f"Min number of synonyms per cluster: {min(synonyms_count_dict.values())} from {min(synonyms_count_dict, key=synonyms_count_dict.get)}")
|
||||
print(f"Number of empty UUIDs: {sum(empty_uuids_dict.values())}")
|
||||
print(f"Empty UUIDs per cluster: {empty_uuids_dict}")
|
||||
# for f in galaxies_fnames:
|
||||
# cluster_filename = f.split('.')[0]
|
||||
# pathSiteCluster = os.path.join(SITE_PATH, cluster_filename)
|
||||
# if not os.path.exists(pathSiteCluster):
|
||||
# os.mkdir(pathSiteCluster)
|
||||
# with open(os.path.join(pathSiteCluster, 'index.md'), "w") as index:
|
||||
# index.write(galaxy_output[cluster_filename])
|
||||
|
||||
|
||||
# for cluster in cluster_dict.values():
|
||||
# print(cluster.uuid)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
# test = cluster_dict['f0ec2df5-2e38-4df3-970d-525352006f2e']
|
||||
# test = cluster_dict['d7247cf9-13b6-4781-b789-a5f33521633b']
|
||||
|
|
Loading…
Reference in a new issue