mirror of
https://github.com/MISP/misp-galaxy.git
synced 2024-11-26 16:57:18 +00:00
Formatted code using black
This commit is contained in:
parent
8f93eb9ed6
commit
0e1f0db3f7
1 changed files with 213 additions and 152 deletions
|
@ -8,10 +8,10 @@ from typing import List
|
||||||
|
|
||||||
import validators
|
import validators
|
||||||
|
|
||||||
CLUSTER_PATH = '../../clusters'
|
CLUSTER_PATH = "../../clusters"
|
||||||
SITE_PATH = './site/docs'
|
SITE_PATH = "./site/docs"
|
||||||
|
|
||||||
FILES_TO_IGNORE = [] # if you want to skip a specific cluster in the generation
|
FILES_TO_IGNORE = [] # if you want to skip a specific cluster in the generation
|
||||||
|
|
||||||
# Variables for statistics
|
# Variables for statistics
|
||||||
public_relations_count = 0
|
public_relations_count = 0
|
||||||
|
@ -41,7 +41,7 @@ Clusters serve as an open and freely accessible knowledge base, which can be uti
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
STATISTICS= """
|
STATISTICS = """
|
||||||
## Statistics
|
## Statistics
|
||||||
|
|
||||||
You can find some statistics about MISP galaxies [here](./statistics.md).
|
You can find some statistics about MISP galaxies [here](./statistics.md).
|
||||||
|
@ -58,8 +58,11 @@ We encourage collaboration and contributions to the [MISP Galaxy JSON files](htt
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
class Galaxy():
|
|
||||||
def __init__(self, cluster_list: List[dict], authors, description, name, json_file_name):
|
class Galaxy:
|
||||||
|
def __init__(
|
||||||
|
self, cluster_list: List[dict], authors, description, name, json_file_name
|
||||||
|
):
|
||||||
self.cluster_list = cluster_list
|
self.cluster_list = cluster_list
|
||||||
self.authors = authors
|
self.authors = authors
|
||||||
self.description = description
|
self.description = description
|
||||||
|
@ -70,39 +73,41 @@ class Galaxy():
|
||||||
|
|
||||||
def _create_metadata_entry(self):
|
def _create_metadata_entry(self):
|
||||||
self.entry += "---\n"
|
self.entry += "---\n"
|
||||||
self.entry += f'title: {self.name}\n'
|
self.entry += f"title: {self.name}\n"
|
||||||
meta_description = self.description.replace("\"", "-")
|
meta_description = self.description.replace('"', "-")
|
||||||
self.entry += f'description: {meta_description}\n'
|
self.entry += f"description: {meta_description}\n"
|
||||||
self.entry += "---\n"
|
self.entry += "---\n"
|
||||||
|
|
||||||
def _create_title_entry(self):
|
def _create_title_entry(self):
|
||||||
self.entry += f'# {self.name}\n'
|
self.entry += f"# {self.name}\n"
|
||||||
|
|
||||||
def _create_description_entry(self):
|
def _create_description_entry(self):
|
||||||
self.entry += f'{self.description}\n'
|
self.entry += f"{self.description}\n"
|
||||||
|
|
||||||
def _create_authors_entry(self):
|
def _create_authors_entry(self):
|
||||||
if self.authors:
|
if self.authors:
|
||||||
self.entry += f'\n'
|
self.entry += f"\n"
|
||||||
self.entry += f'??? info "Authors"\n'
|
self.entry += f'??? info "Authors"\n'
|
||||||
self.entry += f'\n'
|
self.entry += f"\n"
|
||||||
self.entry += f' | Authors and/or Contributors|\n'
|
self.entry += f" | Authors and/or Contributors|\n"
|
||||||
self.entry += f' |----------------------------|\n'
|
self.entry += f" |----------------------------|\n"
|
||||||
for author in self.authors:
|
for author in self.authors:
|
||||||
self.entry += f' |{author}|\n'
|
self.entry += f" |{author}|\n"
|
||||||
|
|
||||||
def _create_clusters(self):
|
def _create_clusters(self):
|
||||||
clusters = []
|
clusters = []
|
||||||
for cluster in self.cluster_list:
|
for cluster in self.cluster_list:
|
||||||
clusters.append(Cluster(
|
clusters.append(
|
||||||
value=cluster.get('value', None),
|
Cluster(
|
||||||
description=cluster.get('description', None),
|
value=cluster.get("value", None),
|
||||||
uuid=cluster.get('uuid', None),
|
description=cluster.get("description", None),
|
||||||
date=cluster.get('date', None),
|
uuid=cluster.get("uuid", None),
|
||||||
related_list=cluster.get('related', None),
|
date=cluster.get("date", None),
|
||||||
meta=cluster.get('meta', None),
|
related_list=cluster.get("related", None),
|
||||||
galaxie=self
|
meta=cluster.get("meta", None),
|
||||||
))
|
galaxie=self,
|
||||||
|
)
|
||||||
|
)
|
||||||
return clusters
|
return clusters
|
||||||
|
|
||||||
def _create_clusters_entry(self, cluster_dict):
|
def _create_clusters_entry(self, cluster_dict):
|
||||||
|
@ -122,10 +127,11 @@ class Galaxy():
|
||||||
galaxy_path = os.path.join(path, self.json_file_name)
|
galaxy_path = os.path.join(path, self.json_file_name)
|
||||||
if not os.path.exists(galaxy_path):
|
if not os.path.exists(galaxy_path):
|
||||||
os.mkdir(galaxy_path)
|
os.mkdir(galaxy_path)
|
||||||
with open(os.path.join(galaxy_path, 'index.md'), "w") as index:
|
with open(os.path.join(galaxy_path, "index.md"), "w") as index:
|
||||||
index.write(self.entry)
|
index.write(self.entry)
|
||||||
|
|
||||||
class Cluster():
|
|
||||||
|
class Cluster:
|
||||||
def __init__(self, description, uuid, date, value, related_list, meta, galaxie):
|
def __init__(self, description, uuid, date, value, related_list, meta, galaxie):
|
||||||
self.description = description
|
self.description = description
|
||||||
self.uuid = uuid
|
self.uuid = uuid
|
||||||
|
@ -141,61 +147,61 @@ class Cluster():
|
||||||
public_clusters_dict[self.uuid] = self.galaxie
|
public_clusters_dict[self.uuid] = self.galaxie
|
||||||
|
|
||||||
def _create_title_entry(self):
|
def _create_title_entry(self):
|
||||||
self.entry += f'## {self.value}\n'
|
self.entry += f"## {self.value}\n"
|
||||||
self.entry += f'\n'
|
self.entry += f"\n"
|
||||||
|
|
||||||
def _create_description_entry(self):
|
def _create_description_entry(self):
|
||||||
if self.description:
|
if self.description:
|
||||||
self.entry += f'{self.description}\n'
|
self.entry += f"{self.description}\n"
|
||||||
|
|
||||||
def _create_synonyms_entry(self):
|
def _create_synonyms_entry(self):
|
||||||
if isinstance(self.meta, dict) and self.meta.get('synonyms'):
|
if isinstance(self.meta, dict) and self.meta.get("synonyms"):
|
||||||
self.entry += f'\n'
|
self.entry += f"\n"
|
||||||
self.entry += f'??? info "Synonyms"\n'
|
self.entry += f'??? info "Synonyms"\n'
|
||||||
self.entry += f'\n'
|
self.entry += f"\n"
|
||||||
self.entry += f' "synonyms" in the meta part typically refer to alternate names or labels that are associated with a particular {self.value}.\n\n'
|
self.entry += f' "synonyms" in the meta part typically refer to alternate names or labels that are associated with a particular {self.value}.\n\n'
|
||||||
self.entry += f' | Known Synonyms |\n'
|
self.entry += f" | Known Synonyms |\n"
|
||||||
self.entry += f' |---------------------|\n'
|
self.entry += f" |---------------------|\n"
|
||||||
global synonyms_count_dict
|
global synonyms_count_dict
|
||||||
synonyms_count = 0
|
synonyms_count = 0
|
||||||
for synonym in sorted(self.meta['synonyms']):
|
for synonym in sorted(self.meta["synonyms"]):
|
||||||
synonyms_count += 1
|
synonyms_count += 1
|
||||||
self.entry += f' | `{synonym}` |\n'
|
self.entry += f" | `{synonym}` |\n"
|
||||||
synonyms_count_dict[self.uuid] = synonyms_count
|
synonyms_count_dict[self.uuid] = synonyms_count
|
||||||
|
|
||||||
def _create_uuid_entry(self):
|
def _create_uuid_entry(self):
|
||||||
if self.uuid:
|
if self.uuid:
|
||||||
self.entry += f'\n'
|
self.entry += f"\n"
|
||||||
self.entry += f'??? tip "Internal MISP references"\n'
|
self.entry += f'??? tip "Internal MISP references"\n'
|
||||||
self.entry += f'\n'
|
self.entry += f"\n"
|
||||||
self.entry += f' UUID `{self.uuid}` which can be used as unique global reference for `{self.value}` in MISP communities and other software using the MISP galaxy\n'
|
self.entry += f" UUID `{self.uuid}` which can be used as unique global reference for `{self.value}` in MISP communities and other software using the MISP galaxy\n"
|
||||||
self.entry += f'\n'
|
self.entry += f"\n"
|
||||||
|
|
||||||
def _create_refs_entry(self):
|
def _create_refs_entry(self):
|
||||||
if isinstance(self.meta, dict) and self.meta.get('refs'):
|
if isinstance(self.meta, dict) and self.meta.get("refs"):
|
||||||
self.entry += f'\n'
|
self.entry += f"\n"
|
||||||
self.entry += f'??? info "External references"\n'
|
self.entry += f'??? info "External references"\n'
|
||||||
self.entry += f'\n'
|
self.entry += f"\n"
|
||||||
|
|
||||||
for ref in self.meta['refs']:
|
for ref in self.meta["refs"]:
|
||||||
if validators.url(ref):
|
if validators.url(ref):
|
||||||
self.entry += f' - [{ref}]({ref}) - :material-archive: :material-arrow-right: [webarchive](https://web.archive.org/web/*/{ref})\n'
|
self.entry += f" - [{ref}]({ref}) - :material-archive: :material-arrow-right: [webarchive](https://web.archive.org/web/*/{ref})\n"
|
||||||
else:
|
else:
|
||||||
self.entry += f' - {ref}\n'
|
self.entry += f" - {ref}\n"
|
||||||
|
|
||||||
self.entry += f'\n'
|
self.entry += f"\n"
|
||||||
|
|
||||||
def _create_associated_metadata_entry(self):
|
def _create_associated_metadata_entry(self):
|
||||||
if isinstance(self.meta, dict):
|
if isinstance(self.meta, dict):
|
||||||
excluded_meta = ['synonyms', 'refs']
|
excluded_meta = ["synonyms", "refs"]
|
||||||
self.entry += f'\n'
|
self.entry += f"\n"
|
||||||
self.entry += f'??? info "Associated metadata"\n'
|
self.entry += f'??? info "Associated metadata"\n'
|
||||||
self.entry += f'\n'
|
self.entry += f"\n"
|
||||||
self.entry += f' |Metadata key {{ .no-filter }} |Value|\n'
|
self.entry += f" |Metadata key {{ .no-filter }} |Value|\n"
|
||||||
self.entry += f' |-----------------------------------|-----|\n'
|
self.entry += f" |-----------------------------------|-----|\n"
|
||||||
for meta in sorted(self.meta.keys()):
|
for meta in sorted(self.meta.keys()):
|
||||||
if meta not in excluded_meta:
|
if meta not in excluded_meta:
|
||||||
self.entry += f' | {meta} | {self.meta[meta]} |\n'
|
self.entry += f" | {meta} | {self.meta[meta]} |\n"
|
||||||
|
|
||||||
def get_related_clusters(self, cluster_dict, depth=-1, visited=None, level=1):
|
def get_related_clusters(self, cluster_dict, depth=-1, visited=None, level=1):
|
||||||
global public_relations_count
|
global public_relations_count
|
||||||
|
@ -228,7 +234,21 @@ class Cluster():
|
||||||
private_relations_count += 1
|
private_relations_count += 1
|
||||||
if dest_uuid not in private_clusters:
|
if dest_uuid not in private_clusters:
|
||||||
private_clusters.append(dest_uuid)
|
private_clusters.append(dest_uuid)
|
||||||
related_clusters.append((self, Cluster(value="Private Cluster", uuid=dest_uuid, date=None, description=None, related_list=None, meta=None, galaxie=None), level))
|
related_clusters.append(
|
||||||
|
(
|
||||||
|
self,
|
||||||
|
Cluster(
|
||||||
|
value="Private Cluster",
|
||||||
|
uuid=dest_uuid,
|
||||||
|
date=None,
|
||||||
|
description=None,
|
||||||
|
related_list=None,
|
||||||
|
meta=None,
|
||||||
|
galaxie=None,
|
||||||
|
),
|
||||||
|
level,
|
||||||
|
)
|
||||||
|
)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
related_cluster = cluster_dict[dest_uuid]
|
related_cluster = cluster_dict[dest_uuid]
|
||||||
|
@ -237,10 +257,15 @@ class Cluster():
|
||||||
|
|
||||||
related_clusters.append((self, related_cluster, level))
|
related_clusters.append((self, related_cluster, level))
|
||||||
|
|
||||||
if (depth > 1 or depth == -1) and (cluster["dest-uuid"] not in visited or visited[cluster["dest-uuid"]] > level + 1):
|
if (depth > 1 or depth == -1) and (
|
||||||
|
cluster["dest-uuid"] not in visited
|
||||||
|
or visited[cluster["dest-uuid"]] > level + 1
|
||||||
|
):
|
||||||
new_depth = depth - 1 if depth > 1 else -1
|
new_depth = depth - 1 if depth > 1 else -1
|
||||||
if cluster["dest-uuid"] in cluster_dict:
|
if cluster["dest-uuid"] in cluster_dict:
|
||||||
related_clusters += cluster_dict[cluster["dest-uuid"]].get_related_clusters(cluster_dict, new_depth, visited, level+1)
|
related_clusters += cluster_dict[
|
||||||
|
cluster["dest-uuid"]
|
||||||
|
].get_related_clusters(cluster_dict, new_depth, visited, level + 1)
|
||||||
|
|
||||||
if empty_uuids > 0:
|
if empty_uuids > 0:
|
||||||
empty_uuids_dict[self.value] = empty_uuids
|
empty_uuids_dict[self.value] = empty_uuids
|
||||||
|
@ -268,43 +293,51 @@ class Cluster():
|
||||||
|
|
||||||
else:
|
else:
|
||||||
cluster_dict[key1] = cluster
|
cluster_dict[key1] = cluster
|
||||||
related_clusters = [cluster for cluster in related_clusters if cluster not in to_remove]
|
related_clusters = [
|
||||||
|
cluster for cluster in related_clusters if cluster not in to_remove
|
||||||
|
]
|
||||||
|
|
||||||
return related_clusters
|
return related_clusters
|
||||||
|
|
||||||
def _create_related_entry(self):
|
def _create_related_entry(self):
|
||||||
self.entry += f'\n'
|
self.entry += f"\n"
|
||||||
self.entry += f'??? info "Related clusters"\n'
|
self.entry += f'??? info "Related clusters"\n'
|
||||||
self.entry += f'\n'
|
self.entry += f"\n"
|
||||||
self.entry += f' To see the related clusters, click [here](./relations/{self.uuid}.md).\n'
|
self.entry += f" To see the related clusters, click [here](./relations/{self.uuid}.md).\n"
|
||||||
|
|
||||||
def _get_related_entry(self, relations):
|
def _get_related_entry(self, relations):
|
||||||
output = ""
|
output = ""
|
||||||
output += f'## Related clusters for {self.value}\n'
|
output += f"## Related clusters for {self.value}\n"
|
||||||
output += f'\n'
|
output += f"\n"
|
||||||
output += f'| Cluster A | Cluster B | Level {{ .graph }} |\n'
|
output += f"| Cluster A | Cluster B | Level {{ .graph }} |\n"
|
||||||
output += f'|-----------|-----------|-------|\n'
|
output += f"|-----------|-----------|-------|\n"
|
||||||
for relation in relations:
|
for relation in relations:
|
||||||
placeholder = "__TMP__"
|
placeholder = "__TMP__"
|
||||||
|
|
||||||
cluster_a_section = (relation[0].value.lower()
|
cluster_a_section = (
|
||||||
.replace(" - ", placeholder) # Replace " - " first
|
relation[0]
|
||||||
.replace(" ", "-")
|
.value.lower()
|
||||||
.replace("/", "")
|
.replace(" - ", placeholder) # Replace " - " first
|
||||||
.replace(":", "")
|
.replace(" ", "-")
|
||||||
.replace(placeholder, "-")) # Replace the placeholder with "-"
|
.replace("/", "")
|
||||||
|
.replace(":", "")
|
||||||
|
.replace(placeholder, "-")
|
||||||
|
) # Replace the placeholder with "-"
|
||||||
|
|
||||||
cluster_b_section = (relation[1].value.lower()
|
cluster_b_section = (
|
||||||
.replace(" - ", placeholder) # Replace " - " first
|
relation[1]
|
||||||
.replace(" ", "-")
|
.value.lower()
|
||||||
.replace("/", "")
|
.replace(" - ", placeholder) # Replace " - " first
|
||||||
.replace(":", "")
|
.replace(" ", "-")
|
||||||
.replace(placeholder, "-")) # Replace the placeholder with "-"
|
.replace("/", "")
|
||||||
|
.replace(":", "")
|
||||||
|
.replace(placeholder, "-")
|
||||||
|
) # Replace the placeholder with "-"
|
||||||
|
|
||||||
if cluster_b_section != "private-cluster":
|
if cluster_b_section != "private-cluster":
|
||||||
output += f'| [{relation[0].value} ({relation[0].uuid})](../../{relation[0].galaxie.json_file_name}/index.md#{cluster_a_section}) | [{relation[1].value} ({relation[1].uuid})](../../{relation[1].galaxie.json_file_name}/index.md#{cluster_b_section}) | {relation[2]} |\n'
|
output += f"| [{relation[0].value} ({relation[0].uuid})](../../{relation[0].galaxie.json_file_name}/index.md#{cluster_a_section}) | [{relation[1].value} ({relation[1].uuid})](../../{relation[1].galaxie.json_file_name}/index.md#{cluster_b_section}) | {relation[2]} |\n"
|
||||||
else:
|
else:
|
||||||
output += f'| [{relation[0].value} ({relation[0].uuid})](../../{relation[0].galaxie.json_file_name}/index.md#{cluster_a_section}) | {relation[1].value} ({relation[1].uuid}) | {relation[2]} |\n'
|
output += f"| [{relation[0].value} ({relation[0].uuid})](../../{relation[0].galaxie.json_file_name}/index.md#{cluster_a_section}) | {relation[1].value} ({relation[1].uuid}) | {relation[2]} |\n"
|
||||||
return output
|
return output
|
||||||
|
|
||||||
def create_entry(self, cluster_dict):
|
def create_entry(self, cluster_dict):
|
||||||
|
@ -326,115 +359,134 @@ class Cluster():
|
||||||
galaxy_path = os.path.join(path, self.galaxie.json_file_name)
|
galaxy_path = os.path.join(path, self.galaxie.json_file_name)
|
||||||
if not os.path.exists(galaxy_path):
|
if not os.path.exists(galaxy_path):
|
||||||
os.mkdir(galaxy_path)
|
os.mkdir(galaxy_path)
|
||||||
relation_path = os.path.join(galaxy_path, 'relations')
|
relation_path = os.path.join(galaxy_path, "relations")
|
||||||
if not os.path.exists(relation_path):
|
if not os.path.exists(relation_path):
|
||||||
os.mkdir(relation_path)
|
os.mkdir(relation_path)
|
||||||
with open(os.path.join(relation_path, ".pages"), "w") as index:
|
with open(os.path.join(relation_path, ".pages"), "w") as index:
|
||||||
index.write(f'hide: true\n')
|
index.write(f"hide: true\n")
|
||||||
with open(os.path.join(relation_path, f'{self.uuid}.md'), "w") as index:
|
with open(os.path.join(relation_path, f"{self.uuid}.md"), "w") as index:
|
||||||
index.write(self._get_related_entry(related_clusters))
|
index.write(self._get_related_entry(related_clusters))
|
||||||
|
|
||||||
|
|
||||||
def create_index(galaxies):
|
def create_index(galaxies):
|
||||||
index_output = INTRO
|
index_output = INTRO
|
||||||
index_output += STATISTICS
|
index_output += STATISTICS
|
||||||
for galaxie in galaxies:
|
for galaxie in galaxies:
|
||||||
index_output += f'- [{galaxie.name}](./{galaxie.json_file_name}/index.md)\n'
|
index_output += f"- [{galaxie.name}](./{galaxie.json_file_name}/index.md)\n"
|
||||||
index_output += CONTRIBUTING
|
index_output += CONTRIBUTING
|
||||||
return index_output
|
return index_output
|
||||||
|
|
||||||
|
|
||||||
def get_top_x(dict, x, big_to_small=True):
|
def get_top_x(dict, x, big_to_small=True):
|
||||||
sorted_dict = sorted(dict.items(), key=operator.itemgetter(1), reverse=big_to_small)[:x]
|
sorted_dict = sorted(
|
||||||
|
dict.items(), key=operator.itemgetter(1), reverse=big_to_small
|
||||||
|
)[:x]
|
||||||
top_x = [key for key, value in sorted_dict]
|
top_x = [key for key, value in sorted_dict]
|
||||||
top_x_values = sorted(dict.values(), reverse=big_to_small)[:x]
|
top_x_values = sorted(dict.values(), reverse=big_to_small)[:x]
|
||||||
return top_x, top_x_values
|
return top_x, top_x_values
|
||||||
|
|
||||||
|
|
||||||
def name_to_section(name):
|
def name_to_section(name):
|
||||||
placeholder = "__TMP__"
|
placeholder = "__TMP__"
|
||||||
return (name.lower()
|
return (
|
||||||
.replace(" - ", placeholder) # Replace " - " first
|
name.lower()
|
||||||
.replace(" ", "-")
|
.replace(" - ", placeholder) # Replace " - " first
|
||||||
.replace("/", "")
|
.replace(" ", "-")
|
||||||
.replace(":", "")
|
.replace("/", "")
|
||||||
.replace(placeholder, "-")) # Replace the placeholder with "-"
|
.replace(":", "")
|
||||||
|
.replace(placeholder, "-")
|
||||||
|
) # Replace the placeholder with "-"
|
||||||
|
|
||||||
|
|
||||||
def create_statistics(cluster_dict):
|
def create_statistics(cluster_dict):
|
||||||
statistic_output = ""
|
statistic_output = ""
|
||||||
statistic_output += f'# MISP Galaxy statistics\n'
|
statistic_output += f"# MISP Galaxy statistics\n"
|
||||||
statistic_output +='The MISP galaxy statistics are automatically generated based on the MISP galaxy JSON files. Therefore the statistics only include detailed infomration about public clusters and relations. Some statistics about private clusters and relations is included but only as an approximation based on the information gathered from the public clusters.\n'
|
statistic_output += "The MISP galaxy statistics are automatically generated based on the MISP galaxy JSON files. Therefore the statistics only include detailed infomration about public clusters and relations. Some statistics about private clusters and relations is included but only as an approximation based on the information gathered from the public clusters.\n"
|
||||||
|
|
||||||
statistic_output += f'# Cluster statistics\n'
|
statistic_output += f"# Cluster statistics\n"
|
||||||
statistic_output += f'## Number of clusters\n'
|
statistic_output += f"## Number of clusters\n"
|
||||||
statistic_output += f'Here you can find the total number of clusters including public and private clusters. The number of public clusters has been calculated based on the number of unique Clusters in the MISP galaxy JSON files. The number of private clusters could only be approximated based on the number of relations to non-existing clusters. Therefore the number of private clusters is not accurate and only an approximation.\n'
|
statistic_output += f"Here you can find the total number of clusters including public and private clusters. The number of public clusters has been calculated based on the number of unique Clusters in the MISP galaxy JSON files. The number of private clusters could only be approximated based on the number of relations to non-existing clusters. Therefore the number of private clusters is not accurate and only an approximation.\n"
|
||||||
statistic_output += f'\n'
|
statistic_output += f"\n"
|
||||||
statistic_output += f'| No. | Type | Count {{ .pie-chart }}|\n'
|
statistic_output += f"| No. | Type | Count {{ .pie-chart }}|\n"
|
||||||
statistic_output += f'|----|------|-------|\n'
|
statistic_output += f"|----|------|-------|\n"
|
||||||
statistic_output += f'| 1 | Public clusters | {len(public_clusters_dict)} |\n'
|
statistic_output += f"| 1 | Public clusters | {len(public_clusters_dict)} |\n"
|
||||||
statistic_output += f'| 2 | Private clusters | {len(private_clusters)} |\n'
|
statistic_output += f"| 2 | Private clusters | {len(private_clusters)} |\n"
|
||||||
statistic_output += f'\n'
|
statistic_output += f"\n"
|
||||||
|
|
||||||
statistic_output += f'## Galaxies with the most clusters\n'
|
statistic_output += f"## Galaxies with the most clusters\n"
|
||||||
galaxy_counts = {}
|
galaxy_counts = {}
|
||||||
for galaxy in public_clusters_dict.values():
|
for galaxy in public_clusters_dict.values():
|
||||||
galaxy_counts[galaxy] = galaxy_counts.get(galaxy, 0) + 1
|
galaxy_counts[galaxy] = galaxy_counts.get(galaxy, 0) + 1
|
||||||
top_galaxies, top_galaxies_values = get_top_x(galaxy_counts, 20)
|
top_galaxies, top_galaxies_values = get_top_x(galaxy_counts, 20)
|
||||||
statistic_output += f' | No. | Galaxy | Count {{ .log-bar-chart }}|\n'
|
statistic_output += f" | No. | Galaxy | Count {{ .log-bar-chart }}|\n"
|
||||||
statistic_output += f' |----|--------|-------|\n'
|
statistic_output += f" |----|--------|-------|\n"
|
||||||
for i, galaxy in enumerate(top_galaxies, 1):
|
for i, galaxy in enumerate(top_galaxies, 1):
|
||||||
galaxy_section = name_to_section(galaxy.json_file_name)
|
galaxy_section = name_to_section(galaxy.json_file_name)
|
||||||
statistic_output += f' | {i} | [{galaxy.name}](../{galaxy_section}) | {top_galaxies_values[i-1]} |\n'
|
statistic_output += f" | {i} | [{galaxy.name}](../{galaxy_section}) | {top_galaxies_values[i-1]} |\n"
|
||||||
statistic_output += f'\n'
|
statistic_output += f"\n"
|
||||||
|
|
||||||
statistic_output += f'## Galaxies with the least clusters\n'
|
statistic_output += f"## Galaxies with the least clusters\n"
|
||||||
flop_galaxies, flop_galaxies_values = get_top_x(galaxy_counts, 20, False)
|
flop_galaxies, flop_galaxies_values = get_top_x(galaxy_counts, 20, False)
|
||||||
statistic_output += f' | No. | Galaxy | Count {{ .bar-chart }}|\n'
|
statistic_output += f" | No. | Galaxy | Count {{ .bar-chart }}|\n"
|
||||||
statistic_output += f' |----|--------|-------|\n'
|
statistic_output += f" |----|--------|-------|\n"
|
||||||
for i, galaxy in enumerate(flop_galaxies, 1):
|
for i, galaxy in enumerate(flop_galaxies, 1):
|
||||||
galaxy_section = name_to_section(galaxy.json_file_name)
|
galaxy_section = name_to_section(galaxy.json_file_name)
|
||||||
statistic_output += f' | {i} | [{galaxy.name}](../{galaxy_section}) | {flop_galaxies_values[i-1]} |\n'
|
statistic_output += f" | {i} | [{galaxy.name}](../{galaxy_section}) | {flop_galaxies_values[i-1]} |\n"
|
||||||
statistic_output += f'\n'
|
statistic_output += f"\n"
|
||||||
|
|
||||||
statistic_output += f'# Relation statistics\n'
|
statistic_output += f"# Relation statistics\n"
|
||||||
statistic_output += f'Here you can find the total number of relations including public and private relations. The number includes relations between public clusters and relations between public and private clusters. Therefore relatons between private clusters are not included in the statistics.\n'
|
statistic_output += f"Here you can find the total number of relations including public and private relations. The number includes relations between public clusters and relations between public and private clusters. Therefore relatons between private clusters are not included in the statistics.\n"
|
||||||
statistic_output += f'\n'
|
statistic_output += f"\n"
|
||||||
statistic_output += f'## Number of relations\n'
|
statistic_output += f"## Number of relations\n"
|
||||||
statistic_output += f'| No. | Type | Count {{ .pie-chart }}|\n'
|
statistic_output += f"| No. | Type | Count {{ .pie-chart }}|\n"
|
||||||
statistic_output += f'|----|------|-------|\n'
|
statistic_output += f"|----|------|-------|\n"
|
||||||
statistic_output += f'| 1 | Public relations | {public_relations_count} |\n'
|
statistic_output += f"| 1 | Public relations | {public_relations_count} |\n"
|
||||||
statistic_output += f'| 2 | Private relations | {private_relations_count} |\n'
|
statistic_output += f"| 2 | Private relations | {private_relations_count} |\n"
|
||||||
statistic_output += f'\n'
|
statistic_output += f"\n"
|
||||||
|
|
||||||
statistic_output += f'**Average number of relations per cluster**: {int(sum(relation_count_dict.values()) / len(relation_count_dict))}\n'
|
statistic_output += f"**Average number of relations per cluster**: {int(sum(relation_count_dict.values()) / len(relation_count_dict))}\n"
|
||||||
|
|
||||||
statistic_output += f'## Cluster with the most relations\n'
|
statistic_output += f"## Cluster with the most relations\n"
|
||||||
relation_count_dict_names = {cluster_dict[uuid].value: count for uuid, count in relation_count_dict.items()}
|
relation_count_dict_names = {
|
||||||
|
cluster_dict[uuid].value: count for uuid, count in relation_count_dict.items()
|
||||||
|
}
|
||||||
top_25_relation, top_25_relation_values = get_top_x(relation_count_dict_names, 20)
|
top_25_relation, top_25_relation_values = get_top_x(relation_count_dict_names, 20)
|
||||||
statistic_output += f' | No. | Cluster | Count {{ .bar-chart }}|\n'
|
statistic_output += f" | No. | Cluster | Count {{ .bar-chart }}|\n"
|
||||||
statistic_output += f' |----|--------|-------|\n'
|
statistic_output += f" |----|--------|-------|\n"
|
||||||
relation_count_dict_galaxies = {cluster_dict[uuid].value: cluster_dict[uuid].galaxie.json_file_name for uuid in relation_count_dict.keys()}
|
relation_count_dict_galaxies = {
|
||||||
|
cluster_dict[uuid].value: cluster_dict[uuid].galaxie.json_file_name
|
||||||
|
for uuid in relation_count_dict.keys()
|
||||||
|
}
|
||||||
for i, cluster in enumerate(top_25_relation, 1):
|
for i, cluster in enumerate(top_25_relation, 1):
|
||||||
cluster_section = name_to_section(cluster)
|
cluster_section = name_to_section(cluster)
|
||||||
statistic_output += f' | {i} | [{cluster}](../{relation_count_dict_galaxies[cluster]}/#{cluster_section}) | {top_25_relation_values[i-1]} |\n'
|
statistic_output += f" | {i} | [{cluster}](../{relation_count_dict_galaxies[cluster]}/#{cluster_section}) | {top_25_relation_values[i-1]} |\n"
|
||||||
statistic_output += f'\n'
|
statistic_output += f"\n"
|
||||||
|
|
||||||
statistic_output += f'# Synonyms statistics\n'
|
statistic_output += f"# Synonyms statistics\n"
|
||||||
statistic_output += f'## Cluster with the most synonyms\n'
|
statistic_output += f"## Cluster with the most synonyms\n"
|
||||||
synonyms_count_dict_names = {cluster_dict[uuid].value: count for uuid, count in synonyms_count_dict.items()}
|
synonyms_count_dict_names = {
|
||||||
|
cluster_dict[uuid].value: count for uuid, count in synonyms_count_dict.items()
|
||||||
|
}
|
||||||
top_synonyms, top_synonyms_values = get_top_x(synonyms_count_dict_names, 20)
|
top_synonyms, top_synonyms_values = get_top_x(synonyms_count_dict_names, 20)
|
||||||
statistic_output += f' | No. | Cluster | Count {{ .bar-chart }}|\n'
|
statistic_output += f" | No. | Cluster | Count {{ .bar-chart }}|\n"
|
||||||
statistic_output += f' |----|--------|-------|\n'
|
statistic_output += f" |----|--------|-------|\n"
|
||||||
synonyms_count_dict_galaxies = {cluster_dict[uuid].value: cluster_dict[uuid].galaxie.json_file_name for uuid in synonyms_count_dict.keys()}
|
synonyms_count_dict_galaxies = {
|
||||||
|
cluster_dict[uuid].value: cluster_dict[uuid].galaxie.json_file_name
|
||||||
|
for uuid in synonyms_count_dict.keys()
|
||||||
|
}
|
||||||
for i, cluster in enumerate(top_synonyms, 1):
|
for i, cluster in enumerate(top_synonyms, 1):
|
||||||
cluster_section = name_to_section(cluster)
|
cluster_section = name_to_section(cluster)
|
||||||
statistic_output += f' | {i} | [{cluster}](../{synonyms_count_dict_galaxies[cluster]}/#{cluster_section}) | {top_synonyms_values[i-1]} |\n'
|
statistic_output += f" | {i} | [{cluster}](../{synonyms_count_dict_galaxies[cluster]}/#{cluster_section}) | {top_synonyms_values[i-1]} |\n"
|
||||||
statistic_output += f'\n'
|
statistic_output += f"\n"
|
||||||
|
|
||||||
return statistic_output
|
return statistic_output
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
galaxies_fnames = []
|
galaxies_fnames = []
|
||||||
for f in os.listdir(CLUSTER_PATH):
|
for f in os.listdir(CLUSTER_PATH):
|
||||||
if '.json' in f and f not in FILES_TO_IGNORE:
|
if ".json" in f and f not in FILES_TO_IGNORE:
|
||||||
galaxies_fnames.append(f)
|
galaxies_fnames.append(f)
|
||||||
galaxies_fnames.sort()
|
galaxies_fnames.sort()
|
||||||
|
|
||||||
|
@ -442,7 +494,15 @@ def main():
|
||||||
for galaxy in galaxies_fnames:
|
for galaxy in galaxies_fnames:
|
||||||
with open(os.path.join(CLUSTER_PATH, galaxy)) as fr:
|
with open(os.path.join(CLUSTER_PATH, galaxy)) as fr:
|
||||||
galaxie_json = json.load(fr)
|
galaxie_json = json.load(fr)
|
||||||
galaxies.append(Galaxy(galaxie_json['values'], galaxie_json['authors'], galaxie_json['description'], galaxie_json['name'], galaxy.split('.')[0]))
|
galaxies.append(
|
||||||
|
Galaxy(
|
||||||
|
galaxie_json["values"],
|
||||||
|
galaxie_json["authors"],
|
||||||
|
galaxie_json["description"],
|
||||||
|
galaxie_json["name"],
|
||||||
|
galaxy.split(".")[0],
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
cluster_dict = {}
|
cluster_dict = {}
|
||||||
for galaxy in galaxies:
|
for galaxy in galaxies:
|
||||||
|
@ -459,13 +519,14 @@ def main():
|
||||||
index_output = create_index(galaxies)
|
index_output = create_index(galaxies)
|
||||||
statistic_output = create_statistics(cluster_dict=cluster_dict)
|
statistic_output = create_statistics(cluster_dict=cluster_dict)
|
||||||
|
|
||||||
with open(os.path.join(SITE_PATH, 'index.md'), "w") as index:
|
with open(os.path.join(SITE_PATH, "index.md"), "w") as index:
|
||||||
index.write(index_output)
|
index.write(index_output)
|
||||||
|
|
||||||
with open(os.path.join(SITE_PATH, 'statistics.md'), "w") as index:
|
with open(os.path.join(SITE_PATH, "statistics.md"), "w") as index:
|
||||||
index.write(statistic_output)
|
index.write(statistic_output)
|
||||||
|
|
||||||
print(f"Finished file creation in {time.time() - start_time} seconds")
|
print(f"Finished file creation in {time.time() - start_time} seconds")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|
Loading…
Reference in a new issue