Add [agencies] refs

2024-11-26 08:47:18 +00:00 · 2024-03-12 11:22:30 +01:00 · 2024-03-12 11:22:30 +01:00 · bb28408b14
commit bb28408b14
parent 0d26334448
5 changed files with 4751 additions and 33 deletions
--- a/clusters/intelligence-agencies.json
+++ b/clusters/intelligence-agencies.json
--- a/galaxies/intelligence-agencies.json
+++ b/galaxies/intelligence-agencies.json
@ -0,0 +1,9 @@
+{
+  "description": "List of intelligence agencies",
+  "icon": "ninja",
+  "name": "intelligence-agencies",
+  "namespace": "intelligence-agency",
+  "type": "intelligence-agency",
+  "uuid": "3ef969e7-96cd-4048-aa83-191ac457d0db",
+  "version": 1
+}
--- a/tools/WikipediaAPI/lol.html
+++ b/tools/WikipediaAPI/lol.html
--- a/tools/WikipediaAPI/main.py
+++ b/tools/WikipediaAPI/main.py
@ -3,19 +3,19 @@ from modules.intel import IntelAgency, Meta, Galaxy, Cluster
 import os
 import uuid
 import json
-import re

 from bs4 import BeautifulSoup

 CLUSTER_PATH = '../../clusters'
 GALAXY_PATH = '../../galaxies'
 GALAXY_NAME = 'intelligence-agencies'
-UUID = str(uuid.uuid4())
+UUID = "3ef969e7-96cd-4048-aa83-191ac457d0db"
+WIKIPEDIA_URL = "https://en.wikipedia.org"

 def get_UUIDs():
-    if GALAXY_NAME in os.listdir(CLUSTER_PATH):
+    if f"{GALAXY_NAME}.json" in os.listdir(CLUSTER_PATH):
        uuids = {}
-        with open(os.path.join(CLUSTER_PATH, GALAXY_NAME)) as fr:
+        with open(os.path.join(CLUSTER_PATH, f"{GALAXY_NAME}.json")) as fr:
            galaxy_json = json.load(fr)
            for cluster in galaxy_json["values"]:
                uuids[cluster["value"]] = cluster["uuid"]
@ -28,18 +28,29 @@ def get_notes_on_lower_level(content):
        if li.find('ul'):
            notes.extend(get_notes_on_lower_level(li.find('ul')))
        else:
-            notes.append(li.text)
+            a_tag = li.find('a')
+
+            title = li.text
+            link_href = None
+            description = li.text
+
+            if a_tag:
+                title = a_tag.get('title', description)
+                if a_tag.has_attr('href'):
+                    link_href = f'{WIKIPEDIA_URL}{a_tag["href"]}'
+
+            notes.append((title, link_href, description, None))
    return notes

 def get_agencies_from_country(heading, current_country, uuids):
    agencies = []
    content = heading.find_next('ul')
    agency_names = get_notes_on_lower_level(content)
-    for name in agency_names:
+    for name, links, description, synonyms in agency_names:
        if uuids and name in uuids:
-            agencies.append(IntelAgency(value=name, uuid=uuids[name], meta=Meta(country=current_country)))
+            agencies.append(IntelAgency(value=name, uuid=uuids[name], meta=Meta(country=current_country, refs=[links]), description=description))
        else:
-            agencies.append(IntelAgency(value=name, meta=Meta(country=current_country), uuid=str(uuid.uuid4())))
+            agencies.append(IntelAgency(value=name, meta=Meta(country=current_country, refs=[links]), uuid=str(uuid.uuid4()), description=description))
    return agencies
    
 def extract_info(content, uuids):
@ -93,6 +104,5 @@ if __name__ == '__main__':
    )
    for agency in agencies:
        cluster.add_value(agency)
-    print(cluster.values)
-    print(cluster.uuid)
+
    cluster.save_to_file(os.path.join(CLUSTER_PATH, f'{GALAXY_NAME}.json'))
--- a/tools/WikipediaAPI/modules/intel.py
+++ b/tools/WikipediaAPI/modules/intel.py
@ -1,9 +1,30 @@
-from dataclasses import dataclass, field, asdict
+from dataclasses import dataclass, field, asdict, is_dataclass
 import json

@dataclass
 class Meta:
    country: str = ""
+    refs: list = field(default_factory=list)
+    synonyms: list = field(default_factory=list)
+
+def custom_asdict(obj):
+    if is_dataclass(obj):
+        result = {}
+        for field_name, field_def in obj.__dataclass_fields__.items():
+            value = getattr(obj, field_name)
+            if field_name == 'meta': 
+                meta_value = custom_asdict(value) 
+                meta_value = {k: v for k, v in meta_value.items() if not (k in ['refs', 'synonyms'] and (not v or all(e is None for e in v)))}
+                value = meta_value
+            elif isinstance(value, (list, tuple)) and all(is_dataclass(i) for i in value):
+                value = [custom_asdict(i) for i in value]
+            elif isinstance(value, list) and all(e is None for e in value): 
+                continue 
+            result[field_name] = value
+        return result
+    else:
+        return obj
+

@dataclass
 class IntelAgency:
@ -34,31 +55,20 @@ class Galaxy:
            file.write(json.dumps(asdict(self), indent=4))

@dataclass
-class Cluster():
-    def __init__(
-        self,
-        authors: str,
-        category: str,
-        description: str,
-        name: str,
-        source: str,
-        type: str,
-        uuid: str,
-        version: int,
-    ):
-        self.authors = authors
-        self.category = category
-        self.description = description
-        self.name = name
-        self.source = source
-        self.type = type
-        self.uuid = uuid
-        self.version = version
-        self.values = []
+class Cluster:
+    authors: str
+    category: str
+    description: str
+    name: str
+    source: str
+    type: str
+    uuid: str
+    version: int
+    values: list = field(default_factory=list)

    def add_value(self, value: IntelAgency):
        self.values.append(value)

    def save_to_file(self, path: str):
        with open(path, "w") as file:
-            file.write(json.dumps(asdict(self), indent=4))
+            file.write(json.dumps(custom_asdict(self), indent=4, ensure_ascii=False))