-
Notifications
You must be signed in to change notification settings - Fork 358
Expand file tree
/
Copy pathpreprint_metrics.py
More file actions
73 lines (61 loc) · 2.51 KB
/
preprint_metrics.py
File metadata and controls
73 lines (61 loc) · 2.51 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
from elasticsearch.exceptions import NotFoundError
import elasticsearch_metrics.imps.elastic6 as metrics
from .metric_mixin import MetricMixin
class BasePreprintMetric(MetricMixin, metrics.Metric):
count = metrics.Integer(doc_values=True, index=True, required=True)
provider_id = metrics.Keyword(index=True, doc_values=True, required=True)
user_id = metrics.Keyword(index=True, doc_values=True, required=False)
preprint_id = metrics.Keyword(index=True, doc_values=True, required=True)
version = metrics.Keyword(index=True, doc_values=True)
path = metrics.Text(index=True)
# TODO: locale
class Index:
settings = {
'number_of_shards': 1,
'number_of_replicas': 1,
'refresh_interval': '1s',
}
class Meta:
abstract = True
source = metrics.MetaField(enabled=True)
@classmethod
def record_for_preprint(cls, preprint, user=None, **kwargs):
count = kwargs.pop('count', 1)
return cls.record(
count=count,
preprint_id=preprint._id,
user_id=getattr(user, '_id', None),
provider_id=preprint.provider._id,
**kwargs
)
@classmethod
def get_count_for_preprint(cls, preprint, after=None, before=None, index=None) -> int:
if preprint.version == 1:
search = cls.search(index=index).filter('terms', preprint_id=[preprint.get_guid()._id, preprint._id])
else:
search = cls.search(index=index).filter('term', preprint_id=preprint._id)
timestamp = {}
if after:
timestamp['gte'] = after
if before:
timestamp['lt'] = before
if timestamp:
search = search.filter('range', timestamp=timestamp)
search.aggs.metric('sum_count', 'sum', field='count')
# Optimization: set size to 0 so that hits aren't returned (we only care about the aggregation)
search = search.extra(size=0)
try:
response = search.execute()
except NotFoundError:
# _get_relevant_indices returned 1 or more indices
# that doesn't exist. Fall back to unoptimized query
search = search.index().index(cls._default_index())
response = search.execute()
# No indexed data
if not hasattr(response.aggregations, 'sum_count'):
return 0
return int(response.aggregations.sum_count.value)
class PreprintView(BasePreprintMetric):
pass
class PreprintDownload(BasePreprintMetric):
pass