chg: [crawler] add monthly crawled domains stats

This commit is contained in:
terrtia 2024-02-27 14:56:48 +01:00
parent afe13185d9
commit 0d55725e28
No known key found for this signature in database
GPG key ID: 1E1B1F50D84613D0
4 changed files with 33 additions and 4 deletions

View file

@ -925,6 +925,12 @@ def get_crawlers_stats_by_day(date, domain_type):
'down': r_crawler.scard(f'{domain_type}_down:{date}'),
}
def get_crawlers_stats_by_month(domain_type, date=None):
stats = []
for date in Date.get_month_dates(date=date):
stats.append(get_crawlers_stats_by_day(date, domain_type))
return stats
def get_crawlers_stats(domain_type=None):
stats = {}

View file

@ -1,6 +1,7 @@
#!/usr/bin/python3
import datetime
from calendar import monthrange
from dateutil.rrule import rrule, MONTHLY
from dateutil.relativedelta import relativedelta
@ -90,6 +91,14 @@ def get_current_week_day():
start = dt - datetime.timedelta(days=dt.weekday())
return start.strftime("%Y%m%d")
def get_month_dates(date=None):
if date:
date = convert_date_str_to_datetime(date)
else:
date = datetime.date.today()
num_days = monthrange(date.year, date.month)[1]
return [datetime.date(date.year, date.month, day).strftime("%Y%m%d") for day in range(1, num_days+1)]
def get_date_week_by_date(date):
dt = datetime.date(int(date[0:4]), int(date[4:6]), int(date[6:8]))
start = dt - datetime.timedelta(days=dt.weekday())

View file

@ -306,6 +306,16 @@ def crawlers_last_domains_json():
stats.append(crawlers.get_crawlers_stats_by_day(date, domain_type))
return jsonify(stats)
@crawler_splash.route('/crawlers/last/domains/monthly/json')
@login_required
@login_read_only
def crawlers_last_domains_monthly_json():
domain_type = request.args.get('type')
if domain_type not in crawlers.get_crawler_all_types():
return jsonify({'error': 'Invalid domain type'}), 400
stats = crawlers.get_crawlers_stats_by_month(domain_type)
return jsonify(stats)
#### Domains ####

View file

@ -102,6 +102,8 @@
</div>
</div>
<div id="barchart_type_month"></div>
</div>
</div>
@ -142,9 +144,11 @@ $(document).ready(function(){
$('#date-range-to-input').val(s2);
}
});
chart.stackBarChart =barchart_type_stack("{{ url_for('crawler_splash.crawlers_last_domains_json') }}?type={{type}}", 'id');
chart.onResize();
chart.stackBarChart = barchart_type_stack("{{ url_for('crawler_splash.crawlers_last_domains_json') }}?type={{type}}", 'barchart_type');
chart.stackBarChartMonth = barchart_type_stack("{{ url_for('crawler_splash.crawlers_last_domains_monthly_json') }}?type={{type}}", 'barchart_type_month');
chart.onResize()
$(window).on("resize", function() {
chart.onResize();
});
@ -174,7 +178,7 @@ function toggle_sidebar(){
<script>
var margin = {top: 20, right: 90, bottom: 55, left: 0},
width = parseInt(d3.select('#barchart_type').style('width'), 10);
width = parseInt(d3.select('#' + id).style('width'), 10);
width = 1000 - margin.left - margin.right,
height = 500 - margin.top - margin.bottom;
var x = d3.scaleBand().rangeRound([0, width]).padding(0.1);
@ -187,7 +191,7 @@ var yAxis = d3.axisLeft(y);
var color = d3.scaleOrdinal(d3.schemeSet3);
var svg = d3.select("#barchart_type").append("svg")
var svg = d3.select("#" + id).append("svg")
.attr("id", "thesvg")
.attr("viewBox", "0 0 "+width+" 500")
.attr("width", width + margin.left + margin.right)