mirror of
https://github.com/MISP/misp-galaxy.git
synced 2024-11-25 16:27:19 +00:00
chg [intel] mistakes on wikipedia got fixed
This commit is contained in:
parent
64803fb28c
commit
7885a8fd00
2 changed files with 35 additions and 19 deletions
|
@ -458,6 +458,16 @@
|
||||||
"uuid": "46b43a4e-f9db-5a9f-a65f-c0d444315d26",
|
"uuid": "46b43a4e-f9db-5a9f-a65f-c0d444315d26",
|
||||||
"value": "Financial Intelligence Unit (Bahamas)"
|
"value": "Financial Intelligence Unit (Bahamas)"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"description": "National Crime Intelligence Agency (NCIA)",
|
||||||
|
"meta": {
|
||||||
|
"country": "BS",
|
||||||
|
"country_name": "Bahamas"
|
||||||
|
},
|
||||||
|
"related": [],
|
||||||
|
"uuid": "afc0c983-dd11-50bc-8ab8-6f9879bbddf2",
|
||||||
|
"value": "National Crime Intelligence Agency (NCIA)"
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"description": "NSA – National Security Agency",
|
"description": "NSA – National Security Agency",
|
||||||
"meta": {
|
"meta": {
|
||||||
|
@ -2353,6 +2363,29 @@
|
||||||
"uuid": "82947bb1-4702-5c23-8d8a-aed56968e6df",
|
"uuid": "82947bb1-4702-5c23-8d8a-aed56968e6df",
|
||||||
"value": "Intelligence Protection Organization of Army of the Guardians of the Islamic Revolution"
|
"value": "Intelligence Protection Organization of Army of the Guardians of the Islamic Revolution"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"description": "Intelligence org of FARAJA",
|
||||||
|
"meta": {
|
||||||
|
"country": "IR",
|
||||||
|
"country_name": "Iran"
|
||||||
|
},
|
||||||
|
"related": [],
|
||||||
|
"uuid": "0f5e5eed-104d-56d8-a136-50da25ff1211",
|
||||||
|
"value": "Intelligence org of FARAJA"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "Intelligence org of the Islamic Republic of Iran[12]",
|
||||||
|
"meta": {
|
||||||
|
"country": "IR",
|
||||||
|
"country_name": "Iran",
|
||||||
|
"refs": [
|
||||||
|
"https://en.wikipedia.org#cite_note-12"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"related": [],
|
||||||
|
"uuid": "fe4ae08b-ee63-5b38-a58c-fd2b3288c826",
|
||||||
|
"value": "Intelligence org of the Islamic Republic of Iran[12]"
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"description": "General Security Directorate - (GSD) - (Internal security agency)",
|
"description": "General Security Directorate - (GSD) - (Internal security agency)",
|
||||||
"meta": {
|
"meta": {
|
||||||
|
|
|
@ -44,10 +44,6 @@ def get_notes_on_lower_level(content):
|
||||||
if li.find('ul'):
|
if li.find('ul'):
|
||||||
notes.extend(get_notes_on_lower_level(li.find('ul')))
|
notes.extend(get_notes_on_lower_level(li.find('ul')))
|
||||||
else:
|
else:
|
||||||
|
|
||||||
if li.text in ["Islamic Republic of Iran Army:", "Islamic Revolutionary Guard Corps:", "FARAJA", "Judicial system of the Islamic Republic of Iran", "Intelligence [12]", "Intelligence org"]: # These are not intelligence agencies but Iran's entry is broken
|
|
||||||
continue
|
|
||||||
|
|
||||||
a_tag = li.find('a')
|
a_tag = li.find('a')
|
||||||
|
|
||||||
title = li.text
|
title = li.text
|
||||||
|
@ -71,17 +67,8 @@ def get_notes_on_lower_level(content):
|
||||||
def get_agencies_from_country(heading, current_country):
|
def get_agencies_from_country(heading, current_country):
|
||||||
agencies = []
|
agencies = []
|
||||||
contents = []
|
contents = []
|
||||||
if current_country != "Gambia": # Gambia has a mistake on the wikipedia page
|
contents.append(heading.find_next('ul'))
|
||||||
contents.append(heading.find_next('ul'))
|
|
||||||
else:
|
|
||||||
soup = BeautifulSoup(str(heading), 'html.parser')
|
|
||||||
ul_tag = soup.new_tag('ul')
|
|
||||||
li_tag = soup.new_tag('li')
|
|
||||||
a_tag = heading.find_next('p').find('a')
|
|
||||||
li_tag.append(a_tag)
|
|
||||||
ul_tag.append(li_tag)
|
|
||||||
contents.append(ul_tag)
|
|
||||||
|
|
||||||
current_content = contents[0]
|
current_content = contents[0]
|
||||||
while True:
|
while True:
|
||||||
next_sibling = current_content.find_next_sibling()
|
next_sibling = current_content.find_next_sibling()
|
||||||
|
@ -89,10 +76,6 @@ def get_agencies_from_country(heading, current_country):
|
||||||
if next_sibling is None or next_sibling.name == 'h2':
|
if next_sibling is None or next_sibling.name == 'h2':
|
||||||
break
|
break
|
||||||
|
|
||||||
if current_country == "Bahamas" and next_sibling.name == 'h2': # Bahamas has a mistake on the wikipedia page
|
|
||||||
current_country = None
|
|
||||||
continue
|
|
||||||
|
|
||||||
if next_sibling.name == 'ul':
|
if next_sibling.name == 'ul':
|
||||||
contents.append(next_sibling)
|
contents.append(next_sibling)
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue