2022-06-03 13:30:48 +00:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
# -*-coding:UTF-8 -*
|
|
|
|
"""
|
|
|
|
The Zerobins Module
|
|
|
|
======================
|
|
|
|
This module spots zerobins-like services for further processing
|
|
|
|
"""
|
|
|
|
|
|
|
|
##################################
|
|
|
|
# Import External packages
|
|
|
|
##################################
|
|
|
|
import os
|
|
|
|
import re
|
2023-02-21 11:22:49 +00:00
|
|
|
import sys
|
2022-10-25 14:25:19 +00:00
|
|
|
|
2022-06-03 13:30:48 +00:00
|
|
|
sys.path.append(os.environ['AIL_BIN'])
|
|
|
|
##################################
|
|
|
|
# Import Project packages
|
|
|
|
##################################
|
|
|
|
from modules.abstract_module import AbstractModule
|
|
|
|
from lib import crawlers
|
|
|
|
|
|
|
|
|
|
|
|
class Zerobins(AbstractModule):
|
|
|
|
"""
|
|
|
|
Zerobins module for AIL framework
|
|
|
|
"""
|
|
|
|
|
|
|
|
def __init__(self):
|
|
|
|
super(Zerobins, self).__init__()
|
|
|
|
|
|
|
|
binz = [
|
2023-02-21 11:22:49 +00:00
|
|
|
r'^https:\/\/(zerobin||privatebin)\..*$', # historical ones
|
2022-06-03 13:30:48 +00:00
|
|
|
]
|
|
|
|
|
|
|
|
self.regex = re.compile('|'.join(binz))
|
|
|
|
|
|
|
|
# Pending time between two computation (computeNone) in seconds
|
|
|
|
self.pending_seconds = 10
|
|
|
|
|
|
|
|
# Send module state to logs
|
|
|
|
self.redis_logger.info(f'Module {self.module_name} initialized')
|
|
|
|
|
|
|
|
def computeNone(self):
|
|
|
|
"""
|
|
|
|
Compute when no message in queue
|
|
|
|
"""
|
|
|
|
self.redis_logger.debug("No message in queue")
|
|
|
|
|
|
|
|
def compute(self, message):
|
2022-10-25 14:25:19 +00:00
|
|
|
"""
|
2022-06-03 13:30:48 +00:00
|
|
|
Compute a message in queue
|
|
|
|
"""
|
2022-10-25 14:25:19 +00:00
|
|
|
url, item_id = message.split()
|
2022-06-03 13:30:48 +00:00
|
|
|
|
|
|
|
# Extract zerobins addresses
|
2022-10-25 14:25:19 +00:00
|
|
|
matching_binz = self.regex_findall(self.regex, item_id, url)
|
2022-06-03 13:30:48 +00:00
|
|
|
|
|
|
|
if len(matching_binz) > 0:
|
2022-10-25 14:25:19 +00:00
|
|
|
for bin_url in matching_binz:
|
|
|
|
print(f'send {bin_url} to crawler')
|
2023-02-21 11:22:49 +00:00
|
|
|
# TODO Change priority ???
|
|
|
|
crawlers.create_task(bin_url, depth=0, har=False, screenshot=False, proxy='force_tor',
|
|
|
|
parent='manual', priority=60)
|
2022-06-03 13:30:48 +00:00
|
|
|
|
|
|
|
self.redis_logger.debug("Compute message in queue")
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
module = Zerobins()
|
2022-10-25 14:25:19 +00:00
|
|
|
module.run()
|