Date

Sometime in 2012 I was tasked to host a statsd server. I used collectd at the time and wanted to make our system metrics more approachable. One of the first stumbling blocks most people hit is when carbon-cache starts dropping metrics. We ran lower frequency processors which did not help. I originally looked at Heavywater's Graphite cookbook but like the Logstash it uses attributes and has a lot of moving parts.

  • Solution

I wrote 2 cookboks to act as a framework to help people build their own Graphite cookbooks. It is not a complete solution to hosting Graphite as there is nothing for Apache or Nginx

  1. carbon
  2. graphite

Below is an example where an E5-2620 was used to host up to 3Million Metrics. It makes use of the haproxy_lwrp cookbook.

group "graphite" do
  action :create
end
user "graphite" do
  group "graphite"
  shell "/bin/bash"
  home "/opt/graphite"
  supports :manage_home => true
end
directory "/opt/graphite" do
  owner "graphite"
  group "graphite"
  mode 0755
  recursive true
  action :create
end
# Start port of the cache query ports
cache_query_port = 7000
# Start port of the line receiver
line_receiver_port = 3001
# Start port of the pickle receiver
pickle_receiver_port = 3101
# Relay line receiver start
relay_line_receiver_port = 2031
relay_pickle_receiver_port = 2041
# When using CPU Affinity, we should let it know what the starting cpu is.  If you have other cpu's used by other resources, bump accordingly.
cpu = 0
# destionations array
dst = []
# carbon query port array
cqp = []
dstplus = []
cluster = []
whisperpath = []
whisper = 1
carbon_install "stable" do
  action :git
  carbon_stable_base_git_uri("https://github.com/damm/")
  carbon_stable_packages({"whisper" => "0.9.x", "carbon-1" => "json_logs" })
end

carbons = []
("a".."l").each do |ci|
  carbon_cache "carbon_cache-" + ci do
    action :create
    init_style "runit"
    cpu_affinity cpu unless Chef::Config[:solo]
    carbon_instance ci
    line_listner({"line_receiver_interface" => "0.0.0.0", "line_receiver_port" => line_receiver_port })
    pickle_listner({"pickle_receiver_interface" => "0.0.0.0", "pickle_receiver_port" => pickle_receiver_port })
    udp_listner({"enable_udp_listner" => "False", "udp_receiver_interface" => "0.0.0.0", "udp_receiver_port" => line_receiver_port })
    cache_query({"cache_query_interface" => "0.0.0.0", "cache_query_port" => cache_query_port })
    storage_schema({ :all => { :pattern => "(.*)", :retentions => "10s:14d, 60s:30d, 600s:1y" } })
    lock_writes("True")
    fallocate_create("True")
    max_updates_per_second(500)
    max_creates_per_minute(500)
    storage_aggregation({:min => { :pattern => "\.min$", :xfilesfactor => "0.1", :aggregationmethod => "min" }, :max => { :pattern => "\.max$", :xfilesfactor => "0.1", :aggregationmethod => "max" }, :sum => { :pattern => "\.count$", :xfilesfactor => "0", :aggregationmethod => "sum" }, :default_average => { :pattern => ".*", :xfilesfactor => "0.5", :aggregationmethod => "average"}})
    dst << ["127.0.0.1:#{pickle_receiver_port}"]
    cqp << ["\"127.0.0.1:#{cache_query_port}:#{ci}\""]
    dstplus << "127.0.0.1:#{pickle_receiver_port}:#{ci}"
    carbons << { "instance" => ci, "line_receiver_port" => line_receiver_port, "pickle_receiver_port" => pickle_receiver_port, "cache_query_port" => cache_query_port, "cpu" => cpu }
    cpu+= 1
    cache_query_port+= 1
    line_receiver_port+= 1
    pickle_receiver_port+= 1
  end
end
node.set['carbon']['caches']=carbons

nodeofrelays = []
carbon_relay "carbon_relay-a"  do
  relay_rules({ "default" => { "default" => "true", "destinations" => dstplus, "continue" => String.new, "pattern" => String.new } })
  line_listner({"line_receiver_interface" => "0.0.0.0", "line_receiver_port" => 2031 })
  pickle_listner({"pickle_receiver_interface" => "0.0.0.0", "pickle_receiver_port" => 2041 })
  destinations dstplus
  relay_instance "a"
  cpu_affinity cpu unless Chef::Config[:solo]
  init_style "runit"
  nodeofrelays << { "instance" => "a", "relay_line_receiver_port" => 2031, "relay_pickle_receiver_port" => 2041, "destinations" => dstplus, "cpu" => cpu }
  cpu+= 1
  action :create
end
carbon_relay "carbon_relay-b"  do
  relay_rules({ "default" => { "default" => "true", "destinations" => dstplus, "continue" => String.new, "pattern" => String.new } })
  line_listner({"line_receiver_interface" => "0.0.0.0", "line_receiver_port" => 2032 })
  pickle_listner({"pickle_receiver_interface" => "0.0.0.0", "pickle_receiver_port" => 2042 })
  destinations dstplus
  relay_instance "b"
  cpu_affinity cpu unless Chef::Config[:solo]
  init_style "runit"
  nodeofrelays << { "instance" => "b", "relay_line_receiver_port" => 2032, "relay_pickle_receiver_port" => 2042, "destinations" => dstplus, "cpu" => cpu }
  cpu+= 1
  action :create
end
carbon_relay "carbon_relay-c"  do
  relay_rules({ "default" => { "default" => "true", "destinations" => dstplus, "continue" => String.new, "pattern" => String.new } })
  line_listner({"line_receiver_interface" => "0.0.0.0", "line_receiver_port" => 2013 })
  pickle_listner({"pickle_receiver_interface" => "0.0.0.0", "pickle_receiver_port" => 2014 })
  destinations dstplus
  relay_instance "c"
  cpu_affinity cpu unless Chef::Config[:solo]
  init_style "runit"
  nodeofrelays << { "instance" => "c", "relay_line_receiver_port" => 2013, "relay_pickle_receiver_port" => 2014, "destinations" => dstplus, "cpu" => cpu }
  cpu+= 1
  action :create
end
node.set['carbon']['relays']=nodeofrelays


cookbook_file "/opt/graphite/conf/graphTemplates.conf" do
  source "graphTemplates.conf"
  owner "graphite"
  group "graphite"
  cookbook "graphite"
end

graphite_web "graphite-web-stable" do
  action :git
end

package "libpq-dev"
package "postgresql-client-common"
package "postgresql-client-9.1"
python_pip "psycopg2" do
  virtualenv "/opt/graphite"
  action :install
  user "graphite"
end

if Chef::Config[:solo]
Chef::Log.warn("This cookbook uses search in multiple areas, when using solo we assume things")
node.set['postgresql']['password']="photonanddreams"
include_recipe "postgresql::server"
  postgres_server = node
else
  postgres_server = search(:node, 'name:$fixme$').first
end

postgresql_database "graphite" do
  if Chef::Config[:solo]
    connection ({:host => "localhost", :port => 5432, :username => 'postgres', :password => postgres_server.postgresql.password})
  else
    connection ({:host => postgres_server.network.ipaddress_eth0, :port => 5432, :username => 'postgres', :password => postgres_server.postgresql.password})
  end
  action :create
end

graphite_federated = []
render_hosts = []
if Chef::Config[:solo]
  Chef::Log.warn 'this portion of this cookbook requires search'
else
  if node.has_key?("ec2")
    Chef::Log.warn("Not setting up remote rendering")
  else
    search(:node, 'roles:graphite_rendering').collect do |host|
      render_hosts << ["\"#{host['network']['ipaddress_eth1']}:8080\""]
    end
  end
  search(:node, 'roles:graphite_statsd').collect do |host2|
    if host2.has_key?("ec2")
      graphite_federated << ["\"#{host2['ipaddress']}:8080\""]
    else
      graphite_federated << ["\"#{host2['network']['ipaddress_eth1']}:8080\""]
    end
  end
end

graphite_web "graphite-web" do
  action :create
  init_style "runit"
  workers 24
  backlog 65535
  listen_port 8080
  listen_address "0.0.0.0"
  user "graphite"
  group "graphite"
  graphite_home "/opt/graphite"
  carbonlink_hosts cqp
  syncdb false
  if graphite_federated.empty?
    Chef::Log.warn("No federated hosts, not setting it up")
  else
    rendering_hosts render_hosts
    remote_rendering "True"
    cluster_servers graphite_federated
  end
  cpu_affinity "13-24" unless Chef::Config[:solo]
  debug "True"
  database_engine "postgresql_psycopg2"
  if Chef::Config[:solo]
    database ({ :name => 'graphite', :user => 'postgres', :password => postgres_server.postgresql.password, :host => postgres_server.ipaddress, :port => 5432 })
  else
    database ({ :name => 'graphite', :user => 'postgres', :password => postgres_server.postgresql.password, :host => postgres_server.network.ipaddress_eth0, :port => 5432 })
  end
end
node.set['graphite']={ "port" => 8080, "workers" => 24, "backlog" => 65535, "standard_dirs" => "/opt/graphite/storage", "cpu" => "13-24" }

apache_module "proxy"
apache_module "proxy_http"
apache_module "proxy_balancer"
apache_module "headers"
apache_module "rewrite"
apache_site "default" do
  enable false
end

template "/etc/apache2/sites-available/graphite" do
  source "apache2.graphite.conf.erb"
  variables({
              :balancermember => ["127.0.0.1:8080"],
              :timeout => 1500,
              :serveradmin => "abuse@mopub.com",
              :maxattempts => 2
            })
end

apache_site "graphite" do
  enable true
end

log "graphite: #{node['graphite']}" do
  level :info
end
log "carbon relays: #{node['carbon']['relays']}" do
  level :info
end
log "carbon caches: #{node['carbon']['caches']}" do
  level :info
end

node.save unless Chef::Config[:solo]