mirror of
https://github.com/MISP/misp-galaxy.git
synced 2024-11-29 18:27:19 +00:00
Improve [statistics] graphs for statistics
This commit is contained in:
parent
710837770f
commit
71d90c2c77
7 changed files with 243 additions and 53 deletions
|
@ -1,4 +0,0 @@
|
|||
validators
|
||||
|
||||
mkdocs-git-committers-plugin
|
||||
mkdocs-rss-plugin
|
|
@ -266,7 +266,10 @@ class Cluster():
|
|||
for relation in relations:
|
||||
cluster_a_section = relation[0].value.lower().replace(" ", "+").replace("/", "").replace(":", "")
|
||||
cluster_b_section = relation[1].value.lower().replace(" ", "+").replace("/", "").replace(":", "")
|
||||
output += f'| [{relation[0].value} ({relation[0].uuid})](../../{relation[0].galaxie_file_name}/index.md#{cluster_a_section}) | [{relation[1].value} ({relation[1].uuid})](../../{relation[1].galaxie_file_name}/index.md#{cluster_b_section}) | {relation[2]} |\n'
|
||||
if cluster_b_section != "private+cluster":
|
||||
output += f'| [{relation[0].value} ({relation[0].uuid})](../../{relation[0].galaxie_file_name}/index.md#{cluster_a_section}) | [{relation[1].value} ({relation[1].uuid})](../../{relation[1].galaxie_file_name}/index.md#{cluster_b_section}) | {relation[2]} |\n'
|
||||
else:
|
||||
output += f'| [{relation[0].value} ({relation[0].uuid})](../../{relation[0].galaxie_file_name}/index.md#{cluster_a_section}) | {relation[1].value} ({relation[1].uuid}) | {relation[2]} |\n'
|
||||
return output
|
||||
|
||||
def create_entry(self, cluster_dict):
|
||||
|
@ -348,81 +351,78 @@ def get_top_x(dict, x, big_to_small=True):
|
|||
|
||||
def create_statistics():
|
||||
statistic_output = ""
|
||||
statistic_output += f'# MISP Galaxy statistics\n'
|
||||
statistic_output +='The MISP galaxy statistics are automatically generated based on the MISP galaxy JSON files. Therefore the statistics only include detailed infomration about public clusters and relations.\n'
|
||||
|
||||
statistic_output += f'# Cluster statistics\n'
|
||||
statistic_output += f'## Number of clusters\n'
|
||||
statistic_output += create_pie_chart("Number of clusters", [("Public clusters", len(public_clusters_dict)), ("Private clusters", len(private_clusters))])
|
||||
statistic_output += f'Here you can find the total number of clusters including public and private clusters. The number of public clusters has been calculated based on the number of unique Clusters in the MISP galaxy JSON files. The number of private clusters could only be approximated based on the number of relations to non-existing clusters. Therefore the number of private clusters is not accurate and only an approximation.\n'
|
||||
statistic_output += f'\n'
|
||||
statistic_output += f'| No. | Type | Count {{ .pie-chart }}|\n'
|
||||
statistic_output += f'|----|------|-------|\n'
|
||||
statistic_output += f'| 1 | Public clusters | {len(public_clusters_dict)} |\n'
|
||||
statistic_output += f'| 2 | Private clusters | {len(private_clusters)} |\n'
|
||||
statistic_output += f'\n'
|
||||
|
||||
statistic_output += f'## Galaxies with the most clusters\n'
|
||||
galaxy_counts = {}
|
||||
for galaxy in public_clusters_dict.values():
|
||||
galaxy_counts[galaxy] = galaxy_counts.get(galaxy, 0) + 1
|
||||
top_galaxies, top_galaxies_values = get_top_x(galaxy_counts, 25)
|
||||
statistic_output += create_xy_chart("Galaxies with the most clusters", 3000, 1000, top_galaxies, "Number of clusters", top_galaxies_values)
|
||||
top_galaxies, top_galaxies_values = get_top_x(galaxy_counts, 20)
|
||||
# statistic_output += create_xy_chart("Galaxies with the most clusters", 3000, 1000, top_galaxies, "Number of clusters", top_galaxies_values)
|
||||
statistic_output += f' | No. | Galaxy | Count {{ .bar-chart }}|\n'
|
||||
statistic_output += f' |----|--------|-------|\n'
|
||||
for i, galaxy in enumerate(top_galaxies.split(", "), 1):
|
||||
statistic_output += f'{i}. [{galaxy}](./{galaxy}/index.md)\n'
|
||||
# statistic_output += f'{i}. [{galaxy}](./{galaxy}/index.md)\n'
|
||||
statistic_output += f' | {i} | [{galaxy}](./{galaxy}/index.md) | {top_galaxies_values[i-1]} |\n'
|
||||
statistic_output += f'\n'
|
||||
|
||||
statistic_output += f'## Galaxies with the least clusters\n'
|
||||
flop_galaxies, flop_galaxies_values = get_top_x(galaxy_counts, 25, False)
|
||||
statistic_output += create_xy_chart("Galaxies with the least clusters", 3000, 1000, flop_galaxies, "Number of clusters", flop_galaxies_values)
|
||||
flop_galaxies, flop_galaxies_values = get_top_x(galaxy_counts, 20, False)
|
||||
# statistic_output += create_xy_chart("Galaxies with the least clusters", 3000, 1000, flop_galaxies, "Number of clusters", flop_galaxies_values)
|
||||
statistic_output += f' | No. | Galaxy | Count {{ .bar-chart }}|\n'
|
||||
statistic_output += f' |----|--------|-------|\n'
|
||||
for i, galaxy in enumerate(flop_galaxies.split(", "), 1):
|
||||
statistic_output += f'{i}. [{galaxy}](./{galaxy}/index.md)\n'
|
||||
# statistic_output += f'{i}. [{galaxy}](./{galaxy}/index.md)\n'
|
||||
statistic_output += f' | {i} | [{galaxy}](./{galaxy}/index.md) | {flop_galaxies_values[i-1]} |\n'
|
||||
statistic_output += f'\n'
|
||||
|
||||
# galaxy_number = 0
|
||||
# global galaxies
|
||||
# for galaxy in galaxies:
|
||||
# galaxy_number += 1
|
||||
# statistic_output += f'**Average number of clusters per galaxy**: {len(public_clusters_dict) / galaxy_number}\n'
|
||||
|
||||
statistic_output += f'# Relation statistics\n'
|
||||
statistic_output += f'## Number of relations\n'
|
||||
statistic_output += create_pie_chart("Number of relations", [("Public relations", public_relations_count), ("Private relations", private_relations_count)])
|
||||
statistic_output += f'| No. | Type | Count {{ .pie-chart }}|\n'
|
||||
statistic_output += f'|----|------|-------|\n'
|
||||
statistic_output += f'| 1 | Public relations | {public_relations_count} |\n'
|
||||
statistic_output += f'| 2 | Private relations | {private_relations_count} |\n'
|
||||
statistic_output += f'\n'
|
||||
|
||||
statistic_output += f'**Average number of relations per cluster**: {sum(relation_count_dict.values()) / len(relation_count_dict)}\n'
|
||||
statistic_output += f'**Average number of relations per cluster**: {int(sum(relation_count_dict.values()) / len(relation_count_dict))}\n'
|
||||
|
||||
statistic_output += f'## Cluster with the most relations\n'
|
||||
top_25_relation, top_25_relation_values = get_top_x(relation_count_dict, 25)
|
||||
statistic_output += create_xy_chart("Cluster with the most relations", 3000, 1000, top_25_relation, "Number of relations", top_25_relation_values)
|
||||
|
||||
statistic_output += f'## Cluster with the least relations\n'
|
||||
top_25_relation, top_25_relation_values = get_top_x(relation_count_dict, 25, False)
|
||||
statistic_output += create_xy_chart("Cluster with the least relations", 3000, 1000, top_25_relation, "Number of relations", top_25_relation_values)
|
||||
top_25_relation, top_25_relation_values = get_top_x(relation_count_dict, 20)
|
||||
# statistic_output += create_xy_chart("Cluster with the most relations", 3000, 1000, top_25_relation, "Number of relations", top_25_relation_values)
|
||||
statistic_output += f' | No. | Cluster | Count {{ .bar-chart }}|\n'
|
||||
statistic_output += f' |----|--------|-------|\n'
|
||||
for i, cluster in enumerate(top_25_relation.split(", "), 1):
|
||||
# statistic_output += f'{i}. [{cluster}](./{cluster}/index.md)\n'
|
||||
statistic_output += f' | {i} | [{cluster}](./{cluster}/index.md) | {top_25_relation_values[i-1]} |\n'
|
||||
statistic_output += f'\n'
|
||||
|
||||
statistic_output += f'# Synonyms statistics\n'
|
||||
statistic_output += f'## Cluster with the most synonyms\n'
|
||||
top_25_synonyms, top_25_synonyms_values = get_top_x(synonyms_count_dict, 25)
|
||||
statistic_output += create_xy_chart("Cluster with the most synonyms", 3000, 1000, top_25_synonyms, "Number of synonyms", top_25_synonyms_values)
|
||||
|
||||
statistic_output += f'## Cluster with the least synonyms\n'
|
||||
top_25_synonyms, top_25_synonyms_values = get_top_x(synonyms_count_dict, 25, False)
|
||||
statistic_output += create_xy_chart("Cluster with the least synonyms", 3000, 1000, top_25_synonyms, "Number of synonyms", top_25_synonyms_values)
|
||||
top_synonyms, top_synonyms_values = get_top_x(synonyms_count_dict, 20)
|
||||
# statistic_output += create_xy_chart("Cluster with the most synonyms", 3000, 1000, top_synonyms, "Number of synonyms", top_synonyms_values)
|
||||
statistic_output += f' | No. | Cluster | Count {{ .bar-chart }}|\n'
|
||||
statistic_output += f' |----|--------|-------|\n'
|
||||
for i, cluster in enumerate(top_synonyms.split(", "), 1):
|
||||
# statistic_output += f'{i}. [{cluster}](./{cluster}/index.md)\n'
|
||||
statistic_output += f' | {i} | [{cluster}](./{cluster}/index.md) | {top_synonyms_values[i-1]} |\n'
|
||||
statistic_output += f'\n'
|
||||
|
||||
statistic_output += f'# Empty UUIDs statistics\n'
|
||||
statistic_output += f'**Number of empty UUIDs**: {sum(empty_uuids_dict.values())}\n'
|
||||
statistic_output += f'\n'
|
||||
statistic_output += f'**Empty UUIDs per cluster**: {empty_uuids_dict}\n'
|
||||
|
||||
|
||||
print(f"Public relations: {public_relations_count}")
|
||||
print(f"Private relations: {private_relations_count}")
|
||||
print(f"Total relations: {public_relations_count + private_relations_count}")
|
||||
print(f"Percetage of private relations: {private_relations_count / (public_relations_count + private_relations_count) * 100}%")
|
||||
print(f"Private clusters: {len(private_clusters)}")
|
||||
print(f"Public clusters: {len(public_clusters_dict)}")
|
||||
print(f"Total clusters: {len(private_clusters) + len(public_clusters_dict)}")
|
||||
print(f"Percentage of private clusters: {len(private_clusters) / (len(private_clusters) + len(public_clusters_dict)) * 100}%")
|
||||
print(f"Average number of relations per cluster: {sum(relation_count_dict.values()) / len(relation_count_dict)}")
|
||||
print(f"Max number of relations per cluster: {max(relation_count_dict.values())} from {max(relation_count_dict, key=relation_count_dict.get)}")
|
||||
print(f"Min number of relations per cluster: {min(relation_count_dict.values())} from {min(relation_count_dict, key=relation_count_dict.get)}")
|
||||
print(f"Average number of synonyms per cluster: {sum(synonyms_count_dict.values()) / len(synonyms_count_dict)}")
|
||||
print(f"Max number of synonyms per cluster: {max(synonyms_count_dict.values())} from {max(synonyms_count_dict, key=synonyms_count_dict.get)}")
|
||||
print(f"Min number of synonyms per cluster: {min(synonyms_count_dict.values())} from {min(synonyms_count_dict, key=synonyms_count_dict.get)}")
|
||||
print(f"Number of empty UUIDs: {sum(empty_uuids_dict.values())}")
|
||||
print(f"Empty UUIDs per cluster: {empty_uuids_dict}")
|
||||
print(sorted(relation_count_dict.items(), key=operator.itemgetter(1), reverse=True)[:30])
|
||||
|
||||
return statistic_output
|
||||
|
||||
def main():
|
||||
|
|
48
tools/mkdocs/requirements.txt
Normal file
48
tools/mkdocs/requirements.txt
Normal file
|
@ -0,0 +1,48 @@
|
|||
Babel==2.14.0
|
||||
bracex==2.4
|
||||
certifi==2023.11.17
|
||||
cffi==1.16.0
|
||||
charset-normalizer==3.3.2
|
||||
click==8.1.7
|
||||
colorama==0.4.6
|
||||
cryptography==42.0.1
|
||||
Deprecated==1.2.14
|
||||
ghp-import==2.1.0
|
||||
gitdb==4.0.11
|
||||
GitPython==3.1.41
|
||||
graphviz==0.20.1
|
||||
idna==3.6
|
||||
Jinja2==3.1.3
|
||||
Markdown==3.5.2
|
||||
MarkupSafe==2.1.4
|
||||
mergedeep==1.3.4
|
||||
mkdocs==1.5.3
|
||||
mkdocs-awesome-pages-plugin==2.9.2
|
||||
mkdocs-git-committers-plugin==0.2.3
|
||||
mkdocs-material==9.5.6
|
||||
mkdocs-material-extensions==1.3.1
|
||||
mkdocs-rss-plugin==1.12.0
|
||||
natsort==8.4.0
|
||||
packaging==23.2
|
||||
paginate==0.5.6
|
||||
pathspec==0.12.1
|
||||
platformdirs==4.1.0
|
||||
pycparser==2.21
|
||||
PyGithub==2.2.0
|
||||
Pygments==2.17.2
|
||||
PyJWT==2.8.0
|
||||
pymdown-extensions==10.7
|
||||
PyNaCl==1.5.0
|
||||
python-dateutil==2.8.2
|
||||
PyYAML==6.0.1
|
||||
pyyaml_env_tag==0.1
|
||||
regex==2023.12.25
|
||||
requests==2.31.0
|
||||
six==1.16.0
|
||||
smmap==5.0.1
|
||||
typing_extensions==4.9.0
|
||||
urllib3==2.1.0
|
||||
validators==0.22.0
|
||||
watchdog==3.0.0
|
||||
wcmatch==8.5
|
||||
wrapt==1.16.0
|
145
tools/mkdocs/site/docs/01_attachements/javascripts/statistics.js
Normal file
145
tools/mkdocs/site/docs/01_attachements/javascripts/statistics.js
Normal file
|
@ -0,0 +1,145 @@
|
|||
document$.subscribe(function () {
|
||||
|
||||
function parseTable(table) {
|
||||
var data = [];
|
||||
table.querySelectorAll("tr").forEach((row, i) => {
|
||||
if (i > 0) {
|
||||
var cells = row.querySelectorAll("td");
|
||||
data.push({ name: cells[1].textContent, value: Number(cells[2].textContent) });
|
||||
}
|
||||
});
|
||||
return data;
|
||||
}
|
||||
|
||||
function createPieChart(data, elementId) {
|
||||
// Set up the dimensions of the graph
|
||||
var width = 500, height = 500;
|
||||
|
||||
// Append SVG for the graph
|
||||
var svg = d3.select(elementId).append("svg")
|
||||
.attr("width", width)
|
||||
.attr("height", height);
|
||||
|
||||
// Set up the dimensions of the graph
|
||||
var radius = Math.min(width, height) / 2 - 20;
|
||||
|
||||
// Append a group to the SVG
|
||||
var g = svg.append("g")
|
||||
.attr("transform", "translate(" + width / 2 + "," + height / 2 + ")");
|
||||
|
||||
// Set up the color scale
|
||||
var color = d3.scaleOrdinal()
|
||||
.domain(data.map(d => d.name))
|
||||
.range(d3.quantize(t => d3.interpolateSpectral(t * 0.8 + 0.1), data.length).reverse());
|
||||
|
||||
// Compute the position of each group on the pie
|
||||
var pie = d3.pie()
|
||||
.value(d => d.value);
|
||||
var data_ready = pie(data);
|
||||
|
||||
// Build the pie chart
|
||||
g.selectAll('whatever')
|
||||
.data(data_ready)
|
||||
.enter()
|
||||
.append('path')
|
||||
.attr('d', d3.arc()
|
||||
.innerRadius(0)
|
||||
.outerRadius(radius)
|
||||
)
|
||||
.attr('fill', d => color(d.data.name))
|
||||
.attr("stroke", "black")
|
||||
.style("stroke-width", "2px")
|
||||
.style("opacity", 0.7);
|
||||
|
||||
// Add labels
|
||||
g.selectAll('whatever')
|
||||
.data(data_ready)
|
||||
.enter()
|
||||
.append('text')
|
||||
.text(d => d.data.name)
|
||||
.attr("transform", d => "translate(" + d3.arc().innerRadius(0).outerRadius(radius).centroid(d) + ")")
|
||||
.style("text-anchor", "middle")
|
||||
.style("font-size", 17);
|
||||
}
|
||||
|
||||
function createBarChart(data, elementId) {
|
||||
// Set up the dimensions of the graph
|
||||
var svgWidth = 1000, svgHeight = 1500;
|
||||
var margin = { top: 20, right: 200, bottom: 400, left: 60 }, // Increase bottom margin for x-axis labels
|
||||
width = svgWidth - margin.left - margin.right,
|
||||
height = svgHeight - margin.top - margin.bottom;
|
||||
|
||||
// Append SVG for the graph
|
||||
var svg = d3.select(elementId).append("svg")
|
||||
.attr("width", svgWidth)
|
||||
.attr("height", svgHeight)
|
||||
.append("g")
|
||||
.attr("transform", "translate(" + margin.left + "," + margin.top + ")");
|
||||
|
||||
// Set up the scales
|
||||
var x = d3.scaleBand()
|
||||
.range([0, width])
|
||||
.padding(0.1)
|
||||
.domain(data.map(d => d.name));
|
||||
|
||||
var maxYValue = d3.max(data, d => d.value);
|
||||
var y = d3.scaleLinear()
|
||||
.range([height, 0])
|
||||
.domain([0, maxYValue + maxYValue * 0.1]); // Add padding to the max value
|
||||
|
||||
// Set up the color scale
|
||||
var color = d3.scaleOrdinal()
|
||||
.range(d3.schemeCategory10);
|
||||
|
||||
// Set up the axes
|
||||
var xAxis = d3.axisBottom(x)
|
||||
.tickSize(0)
|
||||
.tickPadding(6);
|
||||
|
||||
var yAxis = d3.axisLeft(y);
|
||||
|
||||
// Add the bars
|
||||
svg.selectAll(".bar")
|
||||
.data(data)
|
||||
.enter().append("rect")
|
||||
.attr("class", "bar")
|
||||
.attr("x", d => x(d.name))
|
||||
.attr("y", d => y(d.value))
|
||||
.attr("width", x.bandwidth())
|
||||
.attr("height", d => height - y(d.value))
|
||||
.attr("fill", d => color(d.name));
|
||||
|
||||
// Add and rotate x-axis labels
|
||||
svg.append("g")
|
||||
.attr("transform", "translate(0," + height + ")")
|
||||
.call(xAxis)
|
||||
.selectAll("text")
|
||||
.style("text-anchor", "end")
|
||||
.attr("dx", "-.8em")
|
||||
.attr("dy", ".15em")
|
||||
.attr("transform", "rotate(-65)"); // Rotate the labels
|
||||
|
||||
// Add the y-axis
|
||||
svg.append("g")
|
||||
.call(yAxis);
|
||||
}
|
||||
|
||||
|
||||
document.querySelectorAll("table").forEach((table, index) => {
|
||||
var pieChart = table.querySelector("th.pie-chart");
|
||||
var barChart = table.querySelector("th.bar-chart");
|
||||
graphId = "graph" + index;
|
||||
var div = document.createElement("div");
|
||||
div.id = graphId;
|
||||
table.parentNode.insertBefore(div, table);
|
||||
if (pieChart) {
|
||||
var data = parseTable(table);
|
||||
createPieChart(data, "#" + graphId);
|
||||
}
|
||||
if (barChart) {
|
||||
var data = parseTable(table);
|
||||
createBarChart(data, "#" + graphId);
|
||||
}
|
||||
})
|
||||
|
||||
});
|
2
tools/mkdocs/site/docs/package-lock.json
generated
2
tools/mkdocs/site/docs/package-lock.json
generated
|
@ -1,5 +1,5 @@
|
|||
{
|
||||
"name": "mkdocs",
|
||||
"name": "docs",
|
||||
"lockfileVersion": 2,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
|
|
|
@ -69,7 +69,8 @@ extra_javascript:
|
|||
# - javascripts/tablefilter.js
|
||||
# - "https://unpkg.com/tablefilter@0.7.3/dist/tablefilter/tablefilter.js"
|
||||
# - "https://d3js.org/d3.v6.min.js"
|
||||
- 01_attachements/javascripts/d3.js
|
||||
- 01_attachements/javascripts/graph.js
|
||||
- 01_attachements/javascripts/statistics.js
|
||||
- node_modules/tablefilter/dist/tablefilter/tablefilter.js
|
||||
- node_modules/d3/dist/d3.min.js
|
||||
|
||||
|
|
Loading…
Reference in a new issue