mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-10 08:38:28 +00:00
chg: [Crawler] add domains blacklist
This commit is contained in:
parent
b3a6dc8487
commit
6328cc22b7
4 changed files with 17 additions and 1 deletions
|
@ -130,6 +130,16 @@ if __name__ == '__main__':
|
||||||
db=p.config.getint("ARDB_Onion", "db"),
|
db=p.config.getint("ARDB_Onion", "db"),
|
||||||
decode_responses=True)
|
decode_responses=True)
|
||||||
|
|
||||||
|
# load domains blacklist
|
||||||
|
try:
|
||||||
|
with open(os.environ['AIL_BIN']+'/torcrawler/blacklist_onion.txt', 'r') as f:
|
||||||
|
r_onion.delete('blacklist_{}'.format(type_hidden_service))
|
||||||
|
lines = f.read().splitlines()
|
||||||
|
for line in lines:
|
||||||
|
r_onion.sadd('blacklist_{}'.format(type_hidden_service), line)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
|
|
||||||
# Recovering the streamed message informations.
|
# Recovering the streamed message informations.
|
||||||
|
@ -160,7 +170,7 @@ if __name__ == '__main__':
|
||||||
print('domain: {}'.format(domain))
|
print('domain: {}'.format(domain))
|
||||||
print('domain_url: {}'.format(domain_url))
|
print('domain_url: {}'.format(domain_url))
|
||||||
|
|
||||||
if not r_onion.sismember('banned_{}'.format(type_hidden_service), domain):
|
if not r_onion.sismember('blacklist_{}'.format(type_hidden_service), domain):
|
||||||
|
|
||||||
date = datetime.datetime.now().strftime("%Y%m%d")
|
date = datetime.datetime.now().strftime("%Y%m%d")
|
||||||
date_month = datetime.datetime.now().strftime("%Y%m")
|
date_month = datetime.datetime.now().strftime("%Y%m")
|
||||||
|
|
1
bin/torcrawler/blacklist_onion.txt
Normal file
1
bin/torcrawler/blacklist_onion.txt
Normal file
|
@ -0,0 +1 @@
|
||||||
|
www.facebookcorewwwi.onion
|
|
@ -75,6 +75,7 @@ def hiddenServices_page():
|
||||||
statDomains['domains_up'] = r_serv_onion.scard('onion_up:{}'.format(date))
|
statDomains['domains_up'] = r_serv_onion.scard('onion_up:{}'.format(date))
|
||||||
statDomains['domains_down'] = r_serv_onion.scard('onion_down:{}'.format(date))
|
statDomains['domains_down'] = r_serv_onion.scard('onion_down:{}'.format(date))
|
||||||
statDomains['total'] = statDomains['domains_up'] + statDomains['domains_down']
|
statDomains['total'] = statDomains['domains_up'] + statDomains['domains_down']
|
||||||
|
statDomains['domains_queue'] = r_serv_onion.scard('onion_domain_crawler_queue')
|
||||||
|
|
||||||
for onion in last_onions:
|
for onion in last_onions:
|
||||||
metadata_onion = {}
|
metadata_onion = {}
|
||||||
|
|
|
@ -120,6 +120,10 @@
|
||||||
<tr>
|
<tr>
|
||||||
<td>Crawled Domains</td>
|
<td>Crawled Domains</td>
|
||||||
<td>{{ statDomains['total'] }}</td>
|
<td>{{ statDomains['total'] }}</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Domains in Queue</td>
|
||||||
|
<td>{{ statDomains['domains_queue'] }}</td>
|
||||||
</tr>
|
</tr>
|
||||||
</tbody>
|
</tbody>
|
||||||
</table>
|
</table>
|
||||||
|
|
Loading…
Reference in a new issue