Skip to content

Supported backends #4

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 14 commits into
base: master
Choose a base branch
from
33 changes: 32 additions & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@ examples:

::

finish_reason -> finish_reason
downloader/request_bytes -> downloader.request_bytes
downloader/request_method_count/GET -> downloader.request_method_count.GET
finish_reason -> finish_reason
robotstxt/exception_count/<class 'PermissionError'> -> robotstxt.exception_count.class_PermissionError

Installation
Expand Down Expand Up @@ -102,11 +102,41 @@ You can also specify prefixes to ignore the same way using

STATSD_IGNORE = []

Handlers
--------

This extension currently supports sending stats to 3 collectors: graphite,
telegraf and datadog.

To enable each of these you have to set ``STATSD_HANDLER``:

Please note that tags for graphite handler are not currently supported. To set
the handler:

::

STATSD_HANDLER = "scrapy_statsd_extension.handlers.graphite.GraphiteHandler"

For telegraf, you will need the statsd input plugin.

::

STATSD_HANDLER = "scrapy_statsd_extension.handlers.telegraf.TelegrafHandler"

For datadog, you will need to set your api key in ``DATADOG_API_KEY`` in
settings or as a system variable.

::

STATSD_HANDLER = "scrapy_statsd_extension.handlers.datadog.DatadogHandler"


Tags
----

Certain platforms such as datadog and influxdb offer tagging options.


To enable tagging set ``STATSD_TAGGING`` to ``True``, it is disabled by
default:

Expand All @@ -127,3 +157,4 @@ as a tag on all metrics:
You can also set custom tags by setting ``statsd_tags`` attribute on each
spider. This must be a dictionary containing tag names as keys and tag values as
dictionary values.

7 changes: 4 additions & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
Twisted
Scrapy
statsd-telegraf
scrapy
statsd
graphyte
datadog
21 changes: 11 additions & 10 deletions scrapy_statsd_extension/__init__.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,15 @@


class StatsdExtension(object):
@classmethod
def from_crawler(cls, crawler):
ext = cls(crawler)

crawler.signals.connect(ext.spider_opened, signal=signals.spider_opened)
crawler.signals.connect(ext.spider_closed, signal=signals.spider_closed)

return ext

def __init__(self, crawler):
if not crawler.settings.getbool("STATSD_ENABLED", defaults.STATSD_ENABLED):
raise NotConfigured
Expand All @@ -18,18 +27,10 @@ def __init__(self, crawler):
"STATSD_LOG_EVERY", defaults.STATSD_LOG_EVERY
)

self.handler = load_object(defaults.STATSD_HANDLER).from_crawler(crawler)
handler_class = crawler.settings.get("STATSD_HANDLER", defaults.STATSD_HANDLER)
self.handler = load_object(handler_class).from_crawler(crawler)
self.stats = crawler.stats

@classmethod
def from_crawler(cls, crawler):
ext = cls(crawler)

crawler.signals.connect(ext.spider_opened, signal=signals.spider_opened)
crawler.signals.connect(ext.spider_closed, signal=signals.spider_closed)

return ext

def spider_opened(self, spider):
if self.log_periodic:
self.log_task = LoopingCall(self.log_stats, spider)
Expand Down
5 changes: 3 additions & 2 deletions scrapy_statsd_extension/defaults.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
STATSD_PORT = 8125
STATSD_LOG_PERIODIC = True
STATSD_LOG_EVERY = 5
STATSD_HANDLER = "scrapy_statsd_extension.handlers.StatsdBase"
STATSD_PREFIX = "scrapy"
STATSD_HANDLER = "scrapy_statsd_extension.handlers.graphite.GraphiteHandler"
STATSD_LOG_ONLY = []
STATSD_TAGGING = False
STATSD_IGNORE = []
STATSD_TAGGING = True
STATSD_TAGS = {"spider_name": True}
Original file line number Diff line number Diff line change
@@ -1,64 +1,64 @@
import statsd

from scrapy_statsd_extension import utils, defaults


class StatsdBase(object):
def __init__(self, crawler_settings):
host = crawler_settings.get("STATSD_HOST", defaults.STATSD_HOST)
port = crawler_settings.get("STATSD_PORT", defaults.STATSD_PORT)
prefix = crawler_settings.get("STATSD_PREFIX", defaults.STATSD_PREFIX)
self.client = statsd.StatsClient(host, port, prefix)

self.prefixes_to_log = crawler_settings.get(
"STATSD_LOG_ONLY", defaults.STATSD_LOG_ONLY
)
self.log_all_fields = bool(log_only) == False

self.ignored_prefixes = (
crawler_settings.get("STATSD_IGNORE", defaults.STATSD_IGNORE) or []
)

self.tagging_enabled = crawler_settings.get(
"STATSD_TAGGING", defaults.STATSD_TAGGING
)
self.tags = crawler_settings.get("STATSD_TAGS", defaults.STATSD_TAGS)

@classmethod
def from_crawler(cls, crawler):
return cls(crawler.settings)

def not_ignored_field(self, key):
for prefix in self.ignored_prefixes:
if key.startswith(prefix):
return False

return True

def has_valid_prefix(self, key):
if self.log_all_fields is True:
return True

for prefix in self.prefixes_to_log:
if key.startswith(prefix):
return True

return False

def get_tags(self, spider):
if not self.tagging_enabled:
return

tags = {}

if self.tags["spider_name_tag"]:
tags["spider_name_tag"] = spider.name

if hasattr("spider", "statsd_tags"):
tags.extend(spider.statsd_tags)

return tags

def increment(self, key, value, spider):
if self.not_ignored_field(key) and self.has_valid_prefix(key):
self.client.incr(key, value, tags=self.get_tags(spider))
from scrapy_statsd_extension import utils, defaults
class StatsdBaseHandler:
@classmethod
def from_crawler(cls, crawler):
return cls(crawler.settings)
def __init__(self, crawler_settings):
host = crawler_settings.get("STATSD_HOST", defaults.STATSD_HOST)
port = crawler_settings.get("STATSD_PORT", defaults.STATSD_PORT)
prefix = crawler_settings.get("STATSD_PREFIX", defaults.STATSD_PREFIX)
self.client = self.create_client(host, port, prefix)
self.prefixes_to_log = crawler_settings.get(
"STATSD_LOG_ONLY", defaults.STATSD_LOG_ONLY
)
self.log_all_fields = bool(self.prefixes_to_log) == False
self.ignored_prefixes = (
crawler_settings.get("STATSD_IGNORE", defaults.STATSD_IGNORE) or []
)
self.tagging_enabled = crawler_settings.get(
"STATSD_TAGGING", defaults.STATSD_TAGGING
)
self.tag_settings = crawler_settings.get("STATSD_TAGS", defaults.STATSD_TAGS)
def not_ignored_field(self, key):
for prefix in self.ignored_prefixes:
if key.startswith(prefix):
return False
return True
def has_valid_prefix(self, key):
if self.log_all_fields is True:
return True
for prefix in self.prefixes_to_log:
if key.startswith(prefix):
return True
return False
def get_tags(self, spider):
if not self.tagging_enabled:
return
tags = {}
if self.tag_settings.get("spider_name", False):
tags["spider_name"] = spider.name
if hasattr(spider, "statsd_tags"):
tags.extend(spider.statsd_tags)
return tags
def create_client(self, host, prefix):
raise NotImplementedError("create_client not implemented in handler!")
def increment(self, key, value, spider):
raise NotImplementedError("increment not implemented in handler!")
28 changes: 28 additions & 0 deletions scrapy_statsd_extension/handlers/datadog.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from os import environ

from datadog import ThreadStats, initialize, statsd

from scrapy_statsd_extension import handlers, defaults


class DatadogHandler(handlers.StatsdBaseHandler):
def __init__(self, crawler_settings):
super().__init__(crawler_settings)

api_key = crawler_settings.get(
"DATADOG_API_KEY", environ.get("DATADOG_API_KEY")
)
namespace = crawler_settings.get("STATSD_PREFIX", defaults.STATSD_PREFIX)

initialize(api_key=api_key, statsd_namespace=namespace)

def create_client(self, host, port, prefix):
return statsd

def get_tags(self, spider):
tags = super().get_tags(spider)
return [f"{key}:{value}" for key, value in tags.items()]

def increment(self, key, value, spider):
if self.not_ignored_field(key) and self.has_valid_prefix(key):
self.client.increment(key, value, tags=self.get_tags(spider))
20 changes: 20 additions & 0 deletions scrapy_statsd_extension/handlers/graphite.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import graphyte

from scrapy_statsd_extension.handlers import StatsdBaseHandler


class GraphiteHandler(StatsdBaseHandler):
def create_client(self, host, port, prefix):
import pudb

pu.db
graphyte.init(host, port=port, prefix=prefix)
return graphyte

def increment(self, key, value, spider):
if self.not_ignored_field(key) and self.has_valid_prefix(key):
import pudb

pu.db

self.client.send(key, value, tags=self.get_tags(spider))
18 changes: 18 additions & 0 deletions scrapy_statsd_extension/handlers/telegraf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import statsd

from scrapy_statsd_extension.handlers import StatsdBaseHandler


class TelegrafHandler(StatsdBaseHandler):
def create_client(self, host, port, prefix):
return statsd.StatsClient(host, port, prefix)

def get_formatted_tags(self, spider):
tags = self.get_tags(spider)
return [f"{key}={value}" for key, value in tags.items()]

def increment(self, key, value, spider):
formatted_tags = self.get_formatted_tags(spider)

if self.not_ignored_field(key) and self.has_valid_prefix(key):
self.client.incr(",".join([key, *formatted_tags]), value)
9 changes: 7 additions & 2 deletions setup.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
version="0.1.0",
url="https://github.com/scrapy-plugins/scrapy-statsd",
description="Scrapy extenstion to log stats to statsd",
long_description=(pathlib.Path(__file__).parent / "README.md").read_text(),
long_description=(pathlib.Path(__file__).parent / "README.rst").read_text(),
long_description_content_type="text/markdown",
author="Scrapy developers",
license="BSD",
Expand All @@ -19,5 +19,10 @@
"Programming Language :: Python :: 3.7",
],
packages=find_packages(exclude=("tests", "tests.*")),
install_requires=["Twisted", "Scrapy", "statsd-telegraf"],
install_requires=["scrapy"],
extras_require={
"datadog": ["datadog"],
"graphite": ["graphyte"],
"telegraf": ["statsd"],
},
)