2024-08-22 16:46:56 +02:00
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# A simple convertor script to generate galaxies from the MITRE NICE framework
# https://niccs.cisa.gov/workforce-development/nice-framework
2024-08-23 15:49:44 +02:00
# Copyright (C) 2024 Jean-Louis Huynen
2024-09-02 11:13:10 +02:00
# Copyright (C) 2024 Déborah Servili
2024-08-22 16:46:56 +02:00
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
2024-08-23 15:36:38 +02:00
import pdb
2024-08-22 16:46:56 +02:00
import requests
import json
import os
import uuid
import re
from bs4 import BeautifulSoup
# uuidv4 generated to be concatenated in v5: 43803a9f-9ea6-4ebc-9cb5-68ccdc2c23e0
galaxy = {
" namespace " : " first " ,
" type " : " first-csirt-services-framework " ,
" name " : " FIRST CSIRT Services Framework " ,
" description " : " The Computer Security Incident Response Team (CSIRT) Services Framework is a high-level document describing in a structured way a collection of cyber security services and associated functions that Computer Security Incident Response Teams and other teams providing incident management related services may provide " ,
" uuid " : " 4a72488f-ef5b-4895-a5d9-c625dee663cb " ,
" version " : 1 ,
2024-08-23 15:49:44 +02:00
" icon " : ' user ' ,
2024-08-22 16:46:56 +02:00
}
cluster = {
' authors ' : [ " FIRST " , " CIRCL " , " Jean-Louis Huynen " ] ,
' category ' : ' csirt ' ,
" type " : " first-csirt-services-framework " ,
" name " : " FIRST CSIRT Services Framework " ,
" description " : " The Computer Security Incident Response Team (CSIRT) Services Framework is a high-level document describing in a structured way a collection of cyber security services and associated functions that Computer Security Incident Response Teams and other teams providing incident management related services may provide " ,
" uuid " : " 4a72488f-ef5b-4895-a5d9-c625dee663cb " ,
' source ' : ' https://www.first.org/standards/frameworks/csirts/csirt_services_framework_v2.1 ' ,
' values ' : [ ] ,
2024-08-23 15:49:44 +02:00
' version ' : 1 ,
2024-08-22 16:46:56 +02:00
}
# URL to download
url = " https://www.first.org/standards/frameworks/csirts/csirt_services_framework_v2.1#5-Service-Area-Information-Security-Event-Management "
# Send a GET request to the webpage
response = requests . get ( url )
2024-08-23 15:36:38 +02:00
def extract_nostrong_content ( element ) :
2024-08-22 16:46:56 +02:00
content = element . find_next_siblings ( ' p ' , limit = 3 )
2024-08-23 15:36:38 +02:00
extracted = { }
extracted [ " purpose " ] = content [ 0 ] . text . strip ( ) [ 8 : ]
for sibling in content [ 0 ] . find_next_siblings ( ) :
if " Description: " in sibling . text :
break
extracted [ " purpose " ] + = f " { sibling . text . strip ( ) } "
extracted [ " description " ] = content [ 1 ] . text . strip ( ) [ 12 : ]
for sibling in content [ 1 ] . find_next_siblings ( ) :
if " Outcome: " in sibling . text :
2024-08-23 15:49:44 +02:00
break
2024-08-23 15:36:38 +02:00
extracted [ " description " ] + = f " { sibling . text . strip ( ) } "
extracted [ " outcome " ] = content [ 2 ] . text . strip ( ) [ 8 : ]
for sibling in content [ 2 ] . find_next_siblings ( ) :
2024-09-02 11:13:10 +02:00
if sibling . name in [ " h2 " , " h3 " , " h4 " ] or any ( substring in sibling . text for substring in [ " The following functions " , " List of functions " ] ) :
2024-08-23 15:36:38 +02:00
break
extracted [ " outcome " ] + = f " { sibling . text . strip ( ) } "
return extracted
def extract_content ( element ) :
content = { }
2024-08-23 15:49:44 +02:00
description_title = element . find_next (
" em " , string = lambda text : " Description: " in text
)
2024-08-23 15:36:38 +02:00
purpose_title = element . find_next ( " em " , string = lambda text : " Purpose: " in text )
outcome_title = element . find_next ( " em " , string = lambda text : " Outcome: " in text )
2024-08-23 15:49:44 +02:00
content [ " purpose " ] = (
purpose_title . parent . parent . get_text ( strip = True ) . replace ( " Purpose: " , " " ) . strip ( )
)
2024-08-23 15:36:38 +02:00
for sibling in purpose_title . parent . parent . find_next_siblings ( ) :
if " Description: " in sibling . text :
break
content [ " purpose " ] + = f " { sibling . text . strip ( ) } "
2024-08-23 15:49:44 +02:00
content [ " description " ] = (
description_title . parent . parent . get_text ( strip = True )
. replace ( " Description: " , " " )
. strip ( )
)
2024-09-02 11:13:10 +02:00
2024-08-23 15:36:38 +02:00
for sibling in description_title . parent . parent . find_next_siblings ( ) :
if " Outcome: " in sibling . text :
2024-08-23 15:49:44 +02:00
break
2024-08-23 15:36:38 +02:00
content [ " description " ] + = f " { sibling . text . strip ( ) } "
2024-08-23 15:49:44 +02:00
content [ " outcome " ] = (
outcome_title . parent . parent . get_text ( strip = True ) . replace ( " Outcome: " , " " ) . strip ( )
)
2024-08-23 15:36:38 +02:00
for sibling in outcome_title . parent . parent . find_next_siblings ( ) :
2024-09-02 11:13:10 +02:00
if sibling . name in [ " h2 " , " h3 " , " h4 " ] or any ( substring in sibling . text for substring in [ " The following functions " , " List of functions " ] ) :
2024-08-23 15:36:38 +02:00
break
content [ " outcome " ] + = f " { sibling . text . strip ( ) } "
2024-09-02 11:13:10 +02:00
content [ " outcome " ] = content [ " outcome " ] . split ( " The following functions " ) [ 0 ] . strip ( )
2024-08-23 15:36:38 +02:00
return content
2024-08-22 16:46:56 +02:00
2024-08-23 15:49:44 +02:00
2024-08-22 16:46:56 +02:00
def remove_heading ( input_string ) :
return re . sub ( r ' ^ \ d+( \ . \ d+)* \ s+ ' , ' ' , input_string )
# Check if the request was successful
if response . status_code == 200 :
# Parse the page content with BeautifulSoup
soup = BeautifulSoup ( response . content , ' html.parser ' )
2024-09-02 11:13:10 +02:00
# Removing all links <a>
for a in soup . find_all ( ' a ' , href = True ) :
if a [ ' href ' ] . startswith ( ' # ' ) :
a . decompose ( )
2024-08-22 16:46:56 +02:00
# Extract the section titled "4 CSIRT Services Framework Structure"
2024-08-23 15:49:44 +02:00
section_header = soup . find (
' h2 ' , id = " 5-Service-Area-Information-Security-Event-Management "
)
2024-08-22 16:46:56 +02:00
if section_header :
services = section_header . find_next_siblings ( ' h3 ' )
functions = section_header . find_next_siblings ( ' h4 ' )
for service in services :
2024-08-23 15:36:38 +02:00
if " Monitoring and detection " in service . text :
content = extract_nostrong_content ( service )
else :
content = extract_content ( service )
2024-08-22 16:46:56 +02:00
name = remove_heading ( service . text . strip ( ) )
2024-08-23 15:49:44 +02:00
suuid = str (
uuid . uuid5 ( uuid . UUID ( " 43803a9f-9ea6-4ebc-9cb5-68ccdc2c23e0 " ) , name )
)
2024-08-22 16:46:56 +02:00
cluster [ " values " ] . append (
{
2024-08-23 15:36:38 +02:00
" description " : content [ " description " ] ,
" meta " : {
" purpose " : content [ " purpose " ] ,
2024-08-23 15:49:44 +02:00
" outcome " : content [ " outcome " ] ,
2024-08-23 15:36:38 +02:00
} ,
2024-08-23 15:49:44 +02:00
" uuid " : suuid ,
2024-08-22 16:46:56 +02:00
" value " : name ,
2024-08-23 15:49:44 +02:00
" related " : [ ] ,
2024-08-22 16:46:56 +02:00
}
)
for function in functions :
2024-08-23 15:36:38 +02:00
content = extract_content ( function )
2024-08-22 16:46:56 +02:00
# get the parent service
parent_service = function . find_previous ( ' h3 ' )
relationship = {
2024-08-23 15:49:44 +02:00
" dest-uuid " : str (
uuid . uuid5 (
uuid . UUID ( " 43803a9f-9ea6-4ebc-9cb5-68ccdc2c23e0 " ) ,
remove_heading ( parent_service . text . strip ( ) ) ,
)
) ,
" type " : " part-of " ,
2024-08-22 16:46:56 +02:00
}
name = remove_heading ( function . text . strip ( ) )
cluster [ " values " ] . append (
{
2024-08-23 15:36:38 +02:00
" description " : content [ " description " ] ,
" meta " : {
" purpose " : content [ " purpose " ] ,
2024-08-23 15:49:44 +02:00
" outcome " : content [ " outcome " ] ,
2024-08-23 15:36:38 +02:00
} ,
2024-08-23 15:49:44 +02:00
" uuid " : str (
uuid . uuid5 (
uuid . UUID ( " 43803a9f-9ea6-4ebc-9cb5-68ccdc2c23e0 " ) , name
)
) ,
2024-08-22 16:46:56 +02:00
" value " : name ,
2024-08-23 15:49:44 +02:00
" related " : [ relationship ] ,
2024-08-22 16:46:56 +02:00
}
)
2024-08-23 15:49:44 +02:00
with open (
os . path . join (
os . path . dirname ( __file__ ) ,
' .. ' ,
' galaxies ' ,
f ' first-csirt-services-framework.json ' ,
) ,
' w ' ,
) as f :
json . dump ( galaxy , f , indent = 2 , sort_keys = True , ensure_ascii = False )
f . write (
' \n '
) # only needed for the beauty and to be compliant with jq_all_the_things
with open (
os . path . join (
os . path . dirname ( __file__ ) ,
' .. ' ,
' clusters ' ,
f ' first-csirt-services-framework.json ' ,
) ,
' w ' ,
) as f :
2024-08-22 16:46:56 +02:00
json . dump ( cluster , f , indent = 2 , sort_keys = True , ensure_ascii = False )
2024-08-23 15:49:44 +02:00
f . write (
' \n '
) # only needed for the beauty and to be compliant with jq_all_the_things
2024-08-22 16:46:56 +02:00
else :
print ( " Couldn ' t find the section header. " )
else :
print ( f " Failed to download the webpage. Status code: { response . status_code } " )