From 7885a8fd007cfd93fc570de78ca329eb15f46166 Mon Sep 17 00:00:00 2001 From: niclas Date: Wed, 13 Mar 2024 10:10:35 +0100 Subject: [PATCH] chg [intel] mistakes on wikipedia got fixed --- clusters/intelligence-agencies.json | 33 +++++++++++++++++++++++++++++ tools/IntelAgencies/main.py | 21 ++---------------- 2 files changed, 35 insertions(+), 19 deletions(-) diff --git a/clusters/intelligence-agencies.json b/clusters/intelligence-agencies.json index 4d79f84..0e3952f 100644 --- a/clusters/intelligence-agencies.json +++ b/clusters/intelligence-agencies.json @@ -458,6 +458,16 @@ "uuid": "46b43a4e-f9db-5a9f-a65f-c0d444315d26", "value": "Financial Intelligence Unit (Bahamas)" }, + { + "description": "National Crime Intelligence Agency (NCIA)", + "meta": { + "country": "BS", + "country_name": "Bahamas" + }, + "related": [], + "uuid": "afc0c983-dd11-50bc-8ab8-6f9879bbddf2", + "value": "National Crime Intelligence Agency (NCIA)" + }, { "description": "NSA – National Security Agency", "meta": { @@ -2353,6 +2363,29 @@ "uuid": "82947bb1-4702-5c23-8d8a-aed56968e6df", "value": "Intelligence Protection Organization of Army of the Guardians of the Islamic Revolution" }, + { + "description": "Intelligence org of FARAJA", + "meta": { + "country": "IR", + "country_name": "Iran" + }, + "related": [], + "uuid": "0f5e5eed-104d-56d8-a136-50da25ff1211", + "value": "Intelligence org of FARAJA" + }, + { + "description": "Intelligence org of the Islamic Republic of Iran[12]", + "meta": { + "country": "IR", + "country_name": "Iran", + "refs": [ + "https://en.wikipedia.org#cite_note-12" + ] + }, + "related": [], + "uuid": "fe4ae08b-ee63-5b38-a58c-fd2b3288c826", + "value": "Intelligence org of the Islamic Republic of Iran[12]" + }, { "description": "General Security Directorate - (GSD) - (Internal security agency)", "meta": { diff --git a/tools/IntelAgencies/main.py b/tools/IntelAgencies/main.py index ac35bd0..416e8f3 100644 --- a/tools/IntelAgencies/main.py +++ b/tools/IntelAgencies/main.py @@ -44,10 +44,6 @@ def get_notes_on_lower_level(content): if li.find('ul'): notes.extend(get_notes_on_lower_level(li.find('ul'))) else: - - if li.text in ["Islamic Republic of Iran Army:", "Islamic Revolutionary Guard Corps:", "FARAJA", "Judicial system of the Islamic Republic of Iran", "Intelligence [12]", "Intelligence org"]: # These are not intelligence agencies but Iran's entry is broken - continue - a_tag = li.find('a') title = li.text @@ -71,17 +67,8 @@ def get_notes_on_lower_level(content): def get_agencies_from_country(heading, current_country): agencies = [] contents = [] - if current_country != "Gambia": # Gambia has a mistake on the wikipedia page - contents.append(heading.find_next('ul')) - else: - soup = BeautifulSoup(str(heading), 'html.parser') - ul_tag = soup.new_tag('ul') - li_tag = soup.new_tag('li') - a_tag = heading.find_next('p').find('a') - li_tag.append(a_tag) - ul_tag.append(li_tag) - contents.append(ul_tag) - + contents.append(heading.find_next('ul')) + current_content = contents[0] while True: next_sibling = current_content.find_next_sibling() @@ -89,10 +76,6 @@ def get_agencies_from_country(heading, current_country): if next_sibling is None or next_sibling.name == 'h2': break - if current_country == "Bahamas" and next_sibling.name == 'h2': # Bahamas has a mistake on the wikipedia page - current_country = None - continue - if next_sibling.name == 'ul': contents.append(next_sibling)