chg [tool] code formatting

This commit is contained in:
niclas 2024-03-05 11:01:51 +01:00
parent 58bdd6c155
commit ab5a95ffc6
6 changed files with 168 additions and 53 deletions

View file

@ -12,25 +12,30 @@ import os
import time
import sys
sys.setrecursionlimit(10000)
sys.setrecursionlimit(10000)
FILES_TO_IGNORE = []
FILES_TO_IGNORE = []
CLUSTER_PATH = "../../clusters"
SITE_PATH = "./site/docs"
GALAXY_PATH = "../../galaxies"
def write_relations_table(cluster):
if cluster.relationships:
print(f"Writing {cluster.uuid}.md")
with open(os.path.join(relation_path, f"{cluster.uuid}.md"), "w") as index:
index.write(generate_relations_table(cluster.relationships))
def get_cluster_relationships(cluster_data):
galaxy, cluster = cluster_data
relationships = universe.get_relationships_with_levels(universe.galaxies[galaxy].clusters[cluster])
relationships = universe.get_relationships_with_levels(
universe.galaxies[galaxy].clusters[cluster]
)
print(f"Processed {galaxy}, {cluster}")
return cluster, galaxy, relationships
def get_deprecated_galaxy_files():
deprecated_galaxy_files = []
for f in os.listdir(GALAXY_PATH):
@ -41,6 +46,7 @@ def get_deprecated_galaxy_files():
return deprecated_galaxy_files
if __name__ == "__main__":
start_time = time.time()
universe = Universe()
@ -56,15 +62,20 @@ if __name__ == "__main__":
for galaxy in galaxies_fnames:
with open(os.path.join(CLUSTER_PATH, galaxy)) as fr:
galaxy_json = json.load(fr)
universe.add_galaxy(galaxy_name=galaxy_json["name"], json_file_name=galaxy, authors=galaxy_json["authors"], description=galaxy_json["description"])
universe.add_galaxy(
galaxy_name=galaxy_json["name"],
json_file_name=galaxy,
authors=galaxy_json["authors"],
description=galaxy_json["description"],
)
for cluster in galaxy_json["values"]:
universe.add_cluster(
galaxy_name=galaxy_json.get("name", None),
uuid=cluster.get("uuid", None),
description=cluster.get("description", None),
value=cluster.get("value", None),
meta=cluster.get("meta", None)
)
galaxy_name=galaxy_json.get("name", None),
uuid=cluster.get("uuid", None),
description=cluster.get("description", None),
value=cluster.get("value", None),
meta=cluster.get("meta", None),
)
# Define the relationships between clusters
for galaxy in galaxies_fnames:
@ -73,7 +84,9 @@ if __name__ == "__main__":
for cluster in galaxy_json["values"]:
if "related" in cluster:
for related in cluster["related"]:
universe.define_relationship(cluster["uuid"], related["dest-uuid"])
universe.define_relationship(
cluster["uuid"], related["dest-uuid"]
)
tasks = []
for galaxy_name, galaxy in universe.galaxies.items():
@ -93,26 +106,56 @@ if __name__ == "__main__":
# Write output
if not os.path.exists(SITE_PATH):
os.mkdir(SITE_PATH)
index = IndexSite(SITE_PATH)
index.add_content("# MISP Galaxy\n\nThe MISP galaxy offers a streamlined approach for representing large entities, known as clusters, which can be linked to MISP events or attributes. Each cluster consists of one or more elements, represented as key-value pairs. MISP galaxy comes with a default knowledge base, encompassing areas like Threat Actors, Tools, Ransomware, and ATT&CK matrices. However, users have the flexibility to modify, update, replace, or share these elements according to their needs.\n\nClusters and vocabularies within MISP galaxy can be utilized in their original form or as a foundational knowledge base. The distribution settings for each cluster can be adjusted, allowing for either restricted or wide dissemination.\n\nAdditionally, MISP galaxies enable the representation of existing standards like the MITRE ATT&CK™ framework, as well as custom matrices.\n\nThe aim is to provide a core set of clusters for organizations embarking on analysis, which can be further tailored to include localized, private information or additional, shareable data.\n\nClusters serve as an open and freely accessible knowledge base, which can be utilized and expanded within [MISP](https://www.misp-project.org/) or other threat intelligence platforms.\n\n![Overview of the integration of MISP galaxy in the MISP Threat Intelligence Sharing Platform](https://raw.githubusercontent.com/MISP/misp-galaxy/aa41337fd78946a60aef3783f58f337d2342430a/doc/images/galaxy.png)\n\n## Publicly available clusters\n")
index.add_content(
"# MISP Galaxy\n\nThe MISP galaxy offers a streamlined approach for representing large entities, known as clusters, which can be linked to MISP events or attributes. Each cluster consists of one or more elements, represented as key-value pairs. MISP galaxy comes with a default knowledge base, encompassing areas like Threat Actors, Tools, Ransomware, and ATT&CK matrices. However, users have the flexibility to modify, update, replace, or share these elements according to their needs.\n\nClusters and vocabularies within MISP galaxy can be utilized in their original form or as a foundational knowledge base. The distribution settings for each cluster can be adjusted, allowing for either restricted or wide dissemination.\n\nAdditionally, MISP galaxies enable the representation of existing standards like the MITRE ATT&CK™ framework, as well as custom matrices.\n\nThe aim is to provide a core set of clusters for organizations embarking on analysis, which can be further tailored to include localized, private information or additional, shareable data.\n\nClusters serve as an open and freely accessible knowledge base, which can be utilized and expanded within [MISP](https://www.misp-project.org/) or other threat intelligence platforms.\n\n![Overview of the integration of MISP galaxy in the MISP Threat Intelligence Sharing Platform](https://raw.githubusercontent.com/MISP/misp-galaxy/aa41337fd78946a60aef3783f58f337d2342430a/doc/images/galaxy.png)\n\n## Publicly available clusters\n"
)
index.add_toc(universe.galaxies.values())
index.add_content("## Statistics\n\nYou can find some statistics about MISP galaxies [here](./statistics.md).\n\n")
index.add_content("# Contributing\n\nIn the dynamic realm of threat intelligence, a variety of models and approaches exist to systematically organize, categorize, and delineate threat actors, hazards, or activity groups. We embrace innovative methodologies for articulating threat intelligence. The galaxy model is particularly versatile, enabling you to leverage and integrate methodologies that you trust and are already utilizing within your organization or community.\n\nWe encourage collaboration and contributions to the [MISP Galaxy JSON files](https://github.com/MISP/misp-galaxy/). Feel free to fork the project, enhance existing elements or clusters, or introduce new ones. Your insights are valuable - share them with us through a pull-request.\n")
index.add_content(
"## Statistics\n\nYou can find some statistics about MISP galaxies [here](./statistics.md).\n\n"
)
index.add_content(
"# Contributing\n\nIn the dynamic realm of threat intelligence, a variety of models and approaches exist to systematically organize, categorize, and delineate threat actors, hazards, or activity groups. We embrace innovative methodologies for articulating threat intelligence. The galaxy model is particularly versatile, enabling you to leverage and integrate methodologies that you trust and are already utilizing within your organization or community.\n\nWe encourage collaboration and contributions to the [MISP Galaxy JSON files](https://github.com/MISP/misp-galaxy/). Feel free to fork the project, enhance existing elements or clusters, or introduce new ones. Your insights are valuable - share them with us through a pull-request.\n"
)
index.write_entry()
statistics = StatisticsSite(SITE_PATH)
statistics.add_cluster_statistics(len([cluster for galaxy in universe.galaxies.values() for cluster in galaxy.clusters.values()]), len(universe.private_clusters))
statistics.add_content("# MISP Galaxy Statistics\n\n")
statistics.add_cluster_statistics(
len(
[
cluster
for galaxy in universe.galaxies.values()
for cluster in galaxy.clusters.values()
]
),
len(universe.private_clusters),
)
statistics.add_galaxy_statistics(universe.galaxies.values())
statistics.add_relation_statistics([cluster for galaxy in universe.galaxies.values() for cluster in galaxy.clusters.values()])
statistics.add_synonym_statistics([cluster for galaxy in universe.galaxies.values() for cluster in galaxy.clusters.values()])
statistics.add_relation_statistics(
[
cluster
for galaxy in universe.galaxies.values()
for cluster in galaxy.clusters.values()
]
)
statistics.add_synonym_statistics(
[
cluster
for galaxy in universe.galaxies.values()
for cluster in galaxy.clusters.values()
]
)
statistics.write_entry()
for galaxy in universe.galaxies.values():
galaxy.write_entry(SITE_PATH)
for galaxy in universe.galaxies.values():
galaxy_path = os.path.join(SITE_PATH, f"{galaxy.json_file_name}".replace(".json", ""))
galaxy_path = os.path.join(
SITE_PATH, f"{galaxy.json_file_name}".replace(".json", "")
)
if not os.path.exists(galaxy_path):
os.mkdir(galaxy_path)
relation_path = os.path.join(galaxy_path, "relations")
@ -121,8 +164,9 @@ if __name__ == "__main__":
with open(os.path.join(relation_path, ".pages"), "w") as index:
index.write(f"hide: true\n")
with ThreadPoolExecutor(max_workers=(multiprocessing.cpu_count() * 4)) as executor:
with ThreadPoolExecutor(
max_workers=(multiprocessing.cpu_count() * 4)
) as executor:
executor.map(write_relations_table, galaxy.clusters.values())
print(f"Finished in {time.time() - start_time} seconds")

View file

@ -1,5 +1,6 @@
import validators
class Cluster:
def __init__(self, uuid, galaxy, description=None, value=None, meta=None):
self.uuid = uuid
@ -8,9 +9,9 @@ class Cluster:
self.meta = meta
self.galaxy = galaxy # Reference to the Galaxy object this cluster belongs to
self.outbound_relationships = set()
self.inbound_relationships = set()
self.relationships = set()
self.outbound_relationships = set()
self.inbound_relationships = set()
self.relationships = set()
def add_outbound_relationship(self, cluster):
self.outbound_relationships.add(cluster)
@ -32,7 +33,7 @@ class Cluster:
if self.relationships:
entry += self._create_related_entry()
return entry
def _create_title_entry(self):
entry = ""
entry += f"## {self.value}\n"
@ -106,4 +107,4 @@ class Cluster:
entry += f'??? info "Related clusters"\n'
entry += f"\n"
entry += f" To see the related clusters, click [here](./relations/{self.uuid}.md).\n"
return entry
return entry

View file

@ -2,8 +2,15 @@ from modules.cluster import Cluster
from typing import List
import os
class Galaxy:
def __init__(self, galaxy_name: str, json_file_name: str, authors: List[str], description: str):
def __init__(
self,
galaxy_name: str,
json_file_name: str,
authors: List[str],
description: str,
):
self.galaxy_name = galaxy_name
self.json_file_name = json_file_name
self.authors = authors
@ -13,7 +20,9 @@ class Galaxy:
def add_cluster(self, uuid, description, value, meta):
if uuid not in self.clusters:
self.clusters[uuid] = Cluster(uuid=uuid, galaxy=self, description=description, value=value, meta=meta)
self.clusters[uuid] = Cluster(
uuid=uuid, galaxy=self, description=description, value=value, meta=meta
)
def write_entry(self, path):
galaxy_path = os.path.join(path, f"{self.json_file_name}".replace(".json", ""))
@ -30,7 +39,7 @@ class Galaxy:
entry += self._create_authors_entry()
entry += self._create_clusters_entry()
return entry
def _create_metadata_entry(self):
entry = ""
entry += "---\n"
@ -66,4 +75,4 @@ class Galaxy:
entry = ""
for cluster in self.clusters.values():
entry += cluster.generate_entry()
return entry
return entry

View file

@ -2,6 +2,7 @@ import os
from utils.helper import create_bar_chart, get_top_x, create_pie_chart
class Site:
def __init__(self, path, name) -> None:
self.path = path
@ -17,6 +18,7 @@ class Site:
with open(os.path.join(self.path, self.name), "w") as index:
index.write(self.content)
class IndexSite(Site):
def __init__(self, path) -> None:
super().__init__(path=path, name="index.md")
@ -27,6 +29,7 @@ class IndexSite(Site):
self.add_content(f"- [{galaxy.galaxy_name}](./{galaxy_folder}/index.md)\n")
self.add_content("\n")
class StatisticsSite(Site):
def __init__(self, path) -> None:
super().__init__(path=path, name="statistics.md")
@ -37,15 +40,28 @@ class StatisticsSite(Site):
flop_20 = get_top_x(galaxy_cluster_count, 20, False)
self.add_content(f"# Galaxy statistics\n")
self.add_content(f"## Galaxies with the most clusters\n\n")
self.add_content(create_bar_chart(x_axis="Galaxy", y_axis="Count", values=top_20, galaxy=True))
self.add_content(
create_bar_chart(
x_axis="Galaxy", y_axis="Count", values=top_20, galaxy=True
)
)
self.add_content(f"## Galaxies with the least clusters\n\n")
self.add_content(create_bar_chart(x_axis="Galaxy", y_axis="Count", values=flop_20, galaxy=True))
self.add_content(
create_bar_chart(
x_axis="Galaxy", y_axis="Count", values=flop_20, galaxy=True
)
)
def add_cluster_statistics(self, public_clusters, private_clusters):
values = {"Public clusters": public_clusters, "Private clusters": private_clusters}
values = {
"Public clusters": public_clusters,
"Private clusters": private_clusters,
}
self.add_content(f"# Cluster statistics\n")
self.add_content(f"## Number of clusters\n")
self.add_content(f"Here you can find the total number of clusters including public and private clusters.The number of public clusters has been calculated based on the number of unique Clusters in the MISP galaxy JSON files. The number of private clusters could only be approximated based on the number of relations to non-existing clusters. Therefore the number of private clusters is not accurate and only an approximation.\n\n")
self.add_content(
f"Here you can find the total number of clusters including public and private clusters.The number of public clusters has been calculated based on the number of unique Clusters in the MISP galaxy JSON files. The number of private clusters could only be approximated based on the number of relations to non-existing clusters. Therefore the number of private clusters is not accurate and only an approximation.\n\n"
)
self.add_content(create_pie_chart(sector="Type", unit="Count", values=values))
def add_relation_statistics(self, clusters):
@ -62,14 +78,31 @@ class StatisticsSite(Site):
top_20 = get_top_x(cluster_relations, 20)
flop_20 = get_top_x(cluster_relations, 20, False)
self.add_content(f"# Relation statistics\n")
self.add_content(f"Here you can find the total number of relations including public and private relations. The number includes relations between public clusters and relations between public and private clusters. Therefore relatons between private clusters are not included in the statistics.\n\n")
self.add_content(
f"Here you can find the total number of relations including public and private relations. The number includes relations between public clusters and relations between public and private clusters. Therefore relatons between private clusters are not included in the statistics.\n\n"
)
self.add_content(f"## Number of relations\n\n")
self.add_content(create_pie_chart(sector="Type", unit="Count", values={"Public relations": public_relations, "Private relations": private_relations}))
self.add_content(f"**Average number of relations per cluster**: {int(sum(cluster_relations.values()) / len(cluster_relations))}\n")
self.add_content(
create_pie_chart(
sector="Type",
unit="Count",
values={
"Public relations": public_relations,
"Private relations": private_relations,
},
)
)
self.add_content(
f"**Average number of relations per cluster**: {int(sum(cluster_relations.values()) / len(cluster_relations))}\n"
)
self.add_content(f"## Cluster with the most relations\n\n")
self.add_content(create_bar_chart(x_axis="Cluster", y_axis="Count", values=top_20))
self.add_content(
create_bar_chart(x_axis="Cluster", y_axis="Count", values=top_20)
)
self.add_content(f"## Cluster with the least relations\n\n")
self.add_content(create_bar_chart(x_axis="Cluster", y_axis="Count", values=flop_20))
self.add_content(
create_bar_chart(x_axis="Cluster", y_axis="Count", values=flop_20)
)
def add_synonym_statistics(self, clusters):
synonyms = {}
@ -79,4 +112,6 @@ class StatisticsSite(Site):
top_20 = get_top_x(synonyms, 20)
self.add_content(f"# Synonym statistics\n")
self.add_content(f"## Cluster with the most synonyms\n\n")
self.add_content(create_bar_chart(x_axis="Cluster", y_axis="Count", values=top_20))
self.add_content(
create_bar_chart(x_axis="Cluster", y_axis="Count", values=top_20)
)

View file

@ -12,11 +12,18 @@ class Universe:
def add_galaxy(self, galaxy_name, json_file_name, authors, description):
if galaxy_name not in self.galaxies:
self.galaxies[galaxy_name] = Galaxy(galaxy_name=galaxy_name, json_file_name=json_file_name, authors=authors, description=description)
self.galaxies[galaxy_name] = Galaxy(
galaxy_name=galaxy_name,
json_file_name=json_file_name,
authors=authors,
description=description,
)
def add_cluster(self, galaxy_name, uuid, description, value, meta):
if galaxy_name in self.galaxies:
self.galaxies[galaxy_name].add_cluster(uuid=uuid, description=description, value=value, meta=meta)
self.galaxies[galaxy_name].add_cluster(
uuid=uuid, description=description, value=value, meta=meta
)
def define_relationship(self, cluster_a_id, cluster_b_id):
cluster_a = None
@ -41,17 +48,25 @@ class Universe:
else:
if cluster_a:
# private_cluster = self.add_cluster(uuid=cluster_b_id, galaxy_name="Unknown", description=None, value="Private Cluster", meta=None)
private_cluster = Cluster(uuid=cluster_b_id, galaxy=None, description=None, value="Private Cluster", meta=None)
private_cluster = Cluster(
uuid=cluster_b_id,
galaxy=None,
description=None,
value="Private Cluster",
meta=None,
)
self.private_clusters[cluster_b_id] = private_cluster
cluster_a.add_outbound_relationship(private_cluster)
else:
raise ValueError(f"Cluster {cluster_a} not found in any galaxy")
def get_relationships_with_levels(self, start_cluster):
def bfs_with_undirected_relationships(start_cluster):
visited = set() # Tracks whether a cluster has been visited
relationships = defaultdict(lambda: float('inf')) # Tracks the lowest level for each cluster pair
relationships = defaultdict(
lambda: float("inf")
) # Tracks the lowest level for each cluster pair
queue = deque([(start_cluster, 0)]) # Queue of (cluster, level)
@ -62,22 +77,27 @@ class Universe:
# Process all relationships regardless of direction
if self.add_inbound_relationship:
neighbors = current_cluster.outbound_relationships.union(current_cluster.inbound_relationships)
neighbors = current_cluster.outbound_relationships.union(
current_cluster.inbound_relationships
)
else:
neighbors = current_cluster.outbound_relationships
for neighbor in neighbors:
link = frozenset([current_cluster, neighbor])
if level + 1 < relationships[link]:
relationships[link] = level + 1
if neighbor not in visited and neighbor.value != "Private Cluster":
if (
neighbor not in visited
and neighbor.value != "Private Cluster"
):
queue.append((neighbor, level + 1))
# Convert the defaultdict to a list of tuples, ignoring direction
processed_relationships = []
for link, lvl in relationships.items():
# Extract clusters from the frozenset; direction is irrelevant
clusters = list(link)
# Arbitrarily choose the first cluster as 'source' for consistency
if clusters[0].value == "Private Cluster":
processed_relationships.append((clusters[1], clusters[0], lvl))
@ -86,4 +106,4 @@ class Universe:
return processed_relationships
return bfs_with_undirected_relationships(start_cluster)
return bfs_with_undirected_relationships(start_cluster)

View file

@ -1,5 +1,6 @@
import operator
def get_top_x(dict, x, big_to_small=True):
sorted_dict = sorted(
dict.items(), key=operator.itemgetter(1), reverse=big_to_small
@ -7,6 +8,7 @@ def get_top_x(dict, x, big_to_small=True):
top_x = {key: value for key, value in sorted_dict}
return top_x
def name_to_section(name):
placeholder = "__TMP__"
return (
@ -18,6 +20,7 @@ def name_to_section(name):
.replace(placeholder, "-")
) # Replace the placeholder with "-"
def create_bar_chart(x_axis, y_axis, values, log=False, galaxy=False):
if not log:
chart = f"| No. | {x_axis} | {y_axis} {{ .bar-chart }}|\n"
@ -32,6 +35,7 @@ def create_bar_chart(x_axis, y_axis, values, log=False, galaxy=False):
chart += "\n"
return chart
def create_pie_chart(sector, unit, values):
chart = f"| No. | {sector} | {unit} {{ .pie-chart }}|\n"
chart += f"|----|--------|-------|\n"
@ -40,11 +44,11 @@ def create_pie_chart(sector, unit, values):
chart += "\n"
return chart
def cluster_transform_to_link(cluster, uuid=False):
placeholder = "__TMP__"
section = (
cluster
.value.lower()
cluster.value.lower()
.replace(" - ", placeholder) # Replace " - " first
.replace(" ", "-")
.replace("/", "")
@ -57,10 +61,12 @@ def cluster_transform_to_link(cluster, uuid=False):
else:
return f"[{cluster.value}](../../{galaxy_folder}/index.md#{section})"
def galaxy_transform_to_link(galaxy):
galaxy_folder = galaxy.json_file_name.replace(".json", "")
return f"[{galaxy.galaxy_name}](../../{galaxy_folder}/index.md)"
def generate_relations_table(relationships):
markdown = "|Cluster A | Galaxy A | Cluster B | Galaxy B | Level { .graph } |\n"
markdown += "| --- | --- | --- | --- | --- |\n"
@ -71,4 +77,4 @@ def generate_relations_table(relationships):
markdown += f"{cluster_transform_to_link(from_cluster, uuid=True)} | {galaxy_transform_to_link(from_galaxy)} | {cluster_transform_to_link(to_cluster, uuid=True)} | {galaxy_transform_to_link(to_galaxy)} | {level}\n"
else:
markdown += f"{cluster_transform_to_link(from_cluster, uuid=True)} | {galaxy_transform_to_link(from_galaxy)} | {to_cluster.value} | Unknown | {level}\n"
return markdown
return markdown