Add config file for DomainClassifier, proper reporting

This commit is contained in:
Raphaël Vinot 2014-09-17 17:19:03 +02:00
parent f017680365
commit 65b9a01644
2 changed files with 16 additions and 4 deletions

View file

@ -28,6 +28,10 @@ def main():
publisher.info("""ZMQ DomainClassifier is Running""")
c = DomainClassifier.domainclassifier.Extract(rawtext="")
cc = p.config.get("DomClassifier", "cc")
cc_tld = p.config.get("DomClassifier", "cc_tld")
while True:
try:
message = p.get_from_set()
@ -44,12 +48,16 @@ def main():
c.text(rawtext=paste)
c.potentialdomain()
c.validdomain(rtype=['A'], extended=True)
localizeddomains = c.include(expression=r'\.lu$')
localizeddomains = c.include(expression=cc_tld)
if localizeddomains:
print(localizeddomains)
localizeddomains = c.localizedomain(cc='LU')
publisher.warning('DomainC;{};{};{};Checked {} located in {}'.format(
PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc_tld))
localizeddomains = c.localizedomain(cc=cc)
if localizeddomains:
print(localizeddomains)
publisher.warning('DomainC;{};{};{};Checked {} located in {}'.format(
PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc))
except IOError:
print "CRC Checksum Failed on :", PST.p_path
publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format(

View file

@ -34,6 +34,10 @@ db = 1
[Url]
cc_critical = DE
[DomClassifier]
cc = DE
cc_tld = r'\.de$'
# Indexer configuration
[Indexer]
type = whoosh