From 65b9a0164409568e912517f7c35869423cf52240 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Vinot?= Date: Wed, 17 Sep 2014 17:19:03 +0200 Subject: [PATCH] Add config file for DomainClassifier, proper reporting --- bin/DomClassifier.py | 16 ++++++++++++---- bin/packages/config.cfg.sample | 4 ++++ 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/bin/DomClassifier.py b/bin/DomClassifier.py index c2b9f4ef..f1f59f72 100755 --- a/bin/DomClassifier.py +++ b/bin/DomClassifier.py @@ -28,6 +28,10 @@ def main(): publisher.info("""ZMQ DomainClassifier is Running""") c = DomainClassifier.domainclassifier.Extract(rawtext="") + + cc = p.config.get("DomClassifier", "cc") + cc_tld = p.config.get("DomClassifier", "cc_tld") + while True: try: message = p.get_from_set() @@ -44,12 +48,16 @@ def main(): c.text(rawtext=paste) c.potentialdomain() c.validdomain(rtype=['A'], extended=True) - localizeddomains = c.include(expression=r'\.lu$') + localizeddomains = c.include(expression=cc_tld) if localizeddomains: - print (localizeddomains) - localizeddomains = c.localizedomain(cc='LU') + print(localizeddomains) + publisher.warning('DomainC;{};{};{};Checked {} located in {}'.format( + PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc_tld)) + localizeddomains = c.localizedomain(cc=cc) if localizeddomains: - print (localizeddomains) + print(localizeddomains) + publisher.warning('DomainC;{};{};{};Checked {} located in {}'.format( + PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc)) except IOError: print "CRC Checksum Failed on :", PST.p_path publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format( diff --git a/bin/packages/config.cfg.sample b/bin/packages/config.cfg.sample index 6ddd5f20..ac8488f8 100644 --- a/bin/packages/config.cfg.sample +++ b/bin/packages/config.cfg.sample @@ -34,6 +34,10 @@ db = 1 [Url] cc_critical = DE +[DomClassifier] +cc = DE +cc_tld = r'\.de$' + # Indexer configuration [Indexer] type = whoosh