mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-26 15:57:16 +00:00
chg: [Crawler] add domains blacklist
This commit is contained in:
parent
b3a6dc8487
commit
6328cc22b7
4 changed files with 17 additions and 1 deletions
|
@ -130,6 +130,16 @@ if __name__ == '__main__':
|
|||
db=p.config.getint("ARDB_Onion", "db"),
|
||||
decode_responses=True)
|
||||
|
||||
# load domains blacklist
|
||||
try:
|
||||
with open(os.environ['AIL_BIN']+'/torcrawler/blacklist_onion.txt', 'r') as f:
|
||||
r_onion.delete('blacklist_{}'.format(type_hidden_service))
|
||||
lines = f.read().splitlines()
|
||||
for line in lines:
|
||||
r_onion.sadd('blacklist_{}'.format(type_hidden_service), line)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
while True:
|
||||
|
||||
# Recovering the streamed message informations.
|
||||
|
@ -160,7 +170,7 @@ if __name__ == '__main__':
|
|||
print('domain: {}'.format(domain))
|
||||
print('domain_url: {}'.format(domain_url))
|
||||
|
||||
if not r_onion.sismember('banned_{}'.format(type_hidden_service), domain):
|
||||
if not r_onion.sismember('blacklist_{}'.format(type_hidden_service), domain):
|
||||
|
||||
date = datetime.datetime.now().strftime("%Y%m%d")
|
||||
date_month = datetime.datetime.now().strftime("%Y%m")
|
||||
|
|
1
bin/torcrawler/blacklist_onion.txt
Normal file
1
bin/torcrawler/blacklist_onion.txt
Normal file
|
@ -0,0 +1 @@
|
|||
www.facebookcorewwwi.onion
|
|
@ -75,6 +75,7 @@ def hiddenServices_page():
|
|||
statDomains['domains_up'] = r_serv_onion.scard('onion_up:{}'.format(date))
|
||||
statDomains['domains_down'] = r_serv_onion.scard('onion_down:{}'.format(date))
|
||||
statDomains['total'] = statDomains['domains_up'] + statDomains['domains_down']
|
||||
statDomains['domains_queue'] = r_serv_onion.scard('onion_domain_crawler_queue')
|
||||
|
||||
for onion in last_onions:
|
||||
metadata_onion = {}
|
||||
|
|
|
@ -120,6 +120,10 @@
|
|||
<tr>
|
||||
<td>Crawled Domains</td>
|
||||
<td>{{ statDomains['total'] }}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Domains in Queue</td>
|
||||
<td>{{ statDomains['domains_queue'] }}</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
|
Loading…
Reference in a new issue