diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..bee8a64 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +__pycache__ diff --git a/clusters/target-information.json b/clusters/target-information.json index 69521a3..2789f32 100644 --- a/clusters/target-information.json +++ b/clusters/target-information.json @@ -1493,7 +1493,7 @@ "Zhōnghuá Rénmín Gònghéguó" ], "territory-type": [ - "" + "Country" ] }, "uuid": "53d3d205-db31-4ec9-86aa-c2bf11fd18e6", diff --git a/tools/__init__.py b/tools/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tools/chk_dup.py b/tools/chk_dup.py index 2ed2f89..9df3000 100755 --- a/tools/chk_dup.py +++ b/tools/chk_dup.py @@ -8,9 +8,19 @@ import os import collections -def loadjsons(path): +def loadjsons(path, return_paths=False): """ - Find all Jsons and load them in a dict + Find all Jsons and load them in a dict + + Parameters: + path: string + return_names: boolean, if the name of the file should be returned, + default: False + + Returns: + List of parsed file contents. + If return_paths is True, then every list item is a tuple of the + file name and the file content """ files = [] data = [] @@ -18,9 +28,14 @@ def loadjsons(path): if os.path.isfile(os.path.join(path, name)) and name.endswith('.json'): files.append(name) for jfile in files: - data.append(json.load(open("%s/%s" % (path, jfile)))) + filepath = os.path.join(path, jfile) + if return_paths: + data.append((filepath, json.load(open(filepath)))) + else: + data.append(json.load(json.load(open(filepath)))) return data + if __name__ == '__main__': """ Iterate all name + synonyms @@ -33,19 +48,19 @@ if __name__ == '__main__': items = djson.get('values') for entry in items: name = entry.get('value').strip().lower() - counter[name]+=1 + counter[name] += 1 namespace.append([name, djson.get('name')]) try: for synonym in entry.get('meta').get('synonyms'): name = synonym.strip().lower() - counter[name]+=1 + counter[name] += 1 namespace.append([name, djson.get('name')]) except (AttributeError, TypeError): pass counter = dict(counter) for key, val in counter.items(): - if val>1: - print ("Warning duplicate %s" % key) + if val > 1: + print("Warning duplicate %s" % key) for item in namespace: - if item[0]==key: - print (item) + if item[0] == key: + print(item) diff --git a/tools/chk_empty_strings.py b/tools/chk_empty_strings.py new file mode 100755 index 0000000..1ccac24 --- /dev/null +++ b/tools/chk_empty_strings.py @@ -0,0 +1,24 @@ +#!/usr/bin/env python3 +# coding=utf-8 +""" + Tools to find empty string entries in galaxies +""" +from .chk_dup import loadjsons +import sys + + +if __name__ == '__main__': + jsons = loadjsons("clusters", return_paths=True) + retval = 0 + for clustername, djson in jsons: + items = djson.get('values') + for entry in items: + name = entry.get('value') + for key, value in entry.get('meta', {}).items(): + if isinstance(value, list): + if '' in value: + retval = 1 + print("Empty string found in Cluster %r: values/%s/meta/%s" + "" % (clustername, name, key), + file=sys.stderr) + sys.exit(retval) diff --git a/validate_all.sh b/validate_all.sh index 7d1a842..f797c55 100755 --- a/validate_all.sh +++ b/validate_all.sh @@ -84,3 +84,6 @@ do fi echo '' done + +# check for empyt strings in clusters +python3 -m tools.chk_empty_strings