class Riemann::Tools::Rabbitmq

Public Instance Methods

base_url() click to toggle source
# File bin/riemann-rabbitmq, line 28
def base_url
  protocol = "http"
  if (options[:monitor_use_tls]) && (options[:monitor_use_tls]==true)
    protocol = "https"
  end
  "#{protocol}://#{options[:monitor_user]}:#{options[:monitor_pass]}@#{options[:monitor_host]}:#{options[:monitor_port]}/api"
end
check_node() click to toggle source
# File bin/riemann-rabbitmq, line 210
def check_node
  opts[:node].each do |n|
    uri = URI(node_url(n))
    response = safe_get(uri, event_host)

    return if response.nil?

    if response.status != 200
      if response.status == 404
        report(:host => event_host,
          :service => "rabbitmq.node.#{n}",
          :state => "critical",
          :description => "Node was not found in the cluster"
        )
      else
        report(:host => event_host,
          :service => "rabbitmq.node.#{n}",
          :state => "critical",
          :description => "HTTP error: #{response.status} - #{response.body}"
        )
      end
      return
    end

    json = JSON.parse(response.body)

    if json['mem_alarm']
      report(:host => event_host,
        :service => "rabbitmq.node.#{n}",
        :state     => "critical",
        :description => "Memory alarm has triggered; job submission throttled"
      )
      return
    end

    if json['disk_free_alarm']
      report(:host => event_host,
        :service => "rabbitmq.node.#{n}",
        :state     => "critical",
        :description => "Disk free alarm has triggered; job submission throttled"
      )
      return
    end

    report(:host => event_host,
      :service => "rabbitmq.node.#{n}",
      :state   => "ok",
      :description => "Node looks OK to me"
    )
  end
end
check_overview() click to toggle source
# File bin/riemann-rabbitmq, line 164
def check_overview
  uri = URI(overview_url)
  response = safe_get(uri, event_host)

  return if response.nil?

  json = JSON.parse(response.body)

  if response.status != 200
      report(:host => event_host,
        :service => "rabbitmq",
        :state => "critical",
        :description => "HTTP connection error: #{response.status} - #{response.body}"
      )
  else
    report(:host => event_host,
      :service => "rabbitmq monitoring",
      :state => "ok",
      :description => "HTTP connection ok"
    )

    %w( message_stats queue_totals object_totals ).each do |stat|
      # NOTE / BUG ?
      # Brand new servers can have blank message stats. Is this ok?
      # I can't decide.
      next if json[stat].empty?
      json[stat].each_pair do |k,v|
        service = "rabbitmq.#{stat}.#{k}"
        if k =~ /details$/
          metric = v['rate']
        else
          metric = v
        end

        # TODO: Set state via thresholds which can be configured

        report(:host => event_host,
               :service => service,
               :metric => metric,
               :description => "RabbitMQ monitor"
              )
      end
    end
  end
end
check_queues() click to toggle source
# File bin/riemann-rabbitmq, line 80
def check_queues
  response = safe_get(queues_url, event_host)
  max_size_check_filter = if options[:ignore_max_size_queues]
    Regexp.new(options[:ignore_max_size_queues])
  else
    nil
  end

  return if response.nil?

  json = JSON.parse(response.body)

  if response.status != 200
      report(:host => event_host,
        :service => "rabbitmq.queue",
        :state => "critical",
        :description => "HTTP connection error to /api/queues: #{response.status} - #{response.body}"
      )
  else
    report(:host => event_host,
      :service => "rabbitmq.queue",
      :state => "ok",
      :description => "HTTP connection ok"
    )

    json = JSON.parse(response.body)

    json.each do |queue|
      svc = "rabbitmq.queue.#{queue['vhost']}.#{queue['name']}"
      errs = []

      if queue['messages_ready']!=nil and queue['messages_ready'] > 0 and queue['consumers'] == 0
        errs << "Queue has jobs but no consumers"
      end

      if (max_size_check_filter.nil? or queue['name'] !~ max_size_check_filter) and queue['messages_ready']!=nil and queue['messages_ready'] > options[:max_queue_size]
        errs << "Queue has #{queue['messages_ready']} jobs"
      end

      if errs.empty?
        report(:host    => event_host,
               :service => svc,
               :state   => "ok",
               :description => "Queue is looking good"
        )
      else
        report(:host    => event_host,
               :service => svc,
               :state   => "critical",
               :description => errs.join("; ")
        )
      end

      stats = (queue['message_stats'] || {}).merge(
                'messages'                        => queue['messages'],
                'messages_details'                => queue['messages_details'],
                'messages_ready'                  => queue['messages_ready'],
                'messages_ready_details'          => queue['messages_ready_details'],
                'messages_unacknowledged'         => queue['messages_unacknowledged'],
                'messages_unacknowledged_details' => queue['messages_unacknowledged_details'],
                'consumers'                       => queue['consumers'],
                'memory'                          => queue['memory'],
              )

      stats.each_pair do |k,v|
        service = "#{svc}.#{k}"
        if k =~ /details$/ and v!=nil
          metric = v['rate']
        else
          metric = v
        end

        # TODO: Set state via thresholds which can be configured

        report(:host => event_host,
               :service => service,
               :metric => metric,
               :description => "RabbitMQ monitor"
        )
      end
    end
  end
end
event_host() click to toggle source
# File bin/riemann-rabbitmq, line 48
def event_host
  if options[:event_host]
    return options[:event_host]
  else
    return options[:monitor_host]
  end
end
node_url(n) click to toggle source
# File bin/riemann-rabbitmq, line 40
def node_url(n)
  "#{base_url}/nodes/#{n}"
end
overview_url() click to toggle source
# File bin/riemann-rabbitmq, line 36
def overview_url
  "#{base_url}/overview"
end
queues_url() click to toggle source
# File bin/riemann-rabbitmq, line 44
def queues_url
  "#{base_url}/queues"
end
safe_get(uri, event_host) click to toggle source
# File bin/riemann-rabbitmq, line 56
def safe_get(uri, event_host)
    # Handle connection timeouts
    response = nil
    begin
      connection = Faraday.new(uri)
      response = connection.get do |req|
        req.options[:timeout] = options[:read_timeout]
        req.options[:open_timeout] = options[:open_timeout]
      end
      report(:host => event_host,
        :service => "rabbitmq monitoring",
        :state   => 'ok',
        :description => "Monitoring operational"
      )
    rescue => e
      report(:host => event_host,
        :service => "rabbitmq monitoring",
        :state => "critical",
        :description => "HTTP connection error: #{e.class} - #{e.message}"
      )
    end
    response
end
tick() click to toggle source
# File bin/riemann-rabbitmq, line 262
def tick
  check_overview
  check_node if opts[:node]
  check_queues
end