Documentation
¶
Index ¶
- type Action
- type ActionRunner
- func (a *ActionRunner) Failure(message string, eventID string)
- func (a *ActionRunner) GetState() *ActionRunnerState
- func (a *ActionRunner) RestoreState(state *ActionRunnerState)
- func (a *ActionRunner) Success(message string, eventID string)
- func (a *ActionRunner) Timeout(message string, eventID string)
- func (a *ActionRunner) Warning(message string, eventID string)
- type ActionRunnerState
- type ActionState
- type Check
- type CheckConfig
- type CheckFunc
- type CheckNotification
- type CheckResponse
- type CheckState
- type CheckStatus
- type Component
- type EventTracker
- func (t *EventTracker) ActiveEvents() map[string]string
- func (t *EventTracker) ClearSequence(eventID string)
- func (t *EventTracker) GetEventID(checkName string) string
- func (t *EventTracker) GetMaintenanceEventID() string
- func (t *EventTracker) GetNextSequence(eventID string) int
- func (t *EventTracker) GetOrCreateEventID(checkName string, status Status) string
- func (t *EventTracker) GetState() *EventTrackerState
- func (t *EventTracker) IsMaintenanceActive() bool
- func (t *EventTracker) RestoreState(state *EventTrackerState)
- type EventTrackerState
- type Health
- func (h *Health) Handler() http.Handler
- func (h *Health) HandlerFunc(w http.ResponseWriter, r *http.Request)
- func (h *Health) NotificationsDisable(suppressTime time.Duration)
- func (h *Health) NotificationsEnable()
- func (h *Health) NotificationsEnabled() bool
- func (h *Health) Persister() StatePersister
- func (h *Health) Register(c CheckConfig) error
- func (h *Health) SaveState(ctx context.Context)
- func (h *Health) Status(ctx context.Context) Check
- func (h *Health) Subscribe() (c <-chan CheckNotification)
- type NoopPersister
- func (p *NoopPersister) Close() error
- func (p *NoopPersister) DeleteCheckState(_ context.Context, _ string) error
- func (p *NoopPersister) LoadAllCheckStates(_ context.Context) (map[string]*CheckState, error)
- func (p *NoopPersister) LoadCheckState(_ context.Context, _ string) (*CheckState, error)
- func (p *NoopPersister) LoadEventTrackerState(_ context.Context) (*EventTrackerState, error)
- func (p *NoopPersister) SaveCheckState(_ context.Context, _ string, _ *CheckState) error
- func (p *NoopPersister) SaveEventTrackerState(_ context.Context, _ *EventTrackerState) error
- type Notifications
- type Option
- type StatePersister
- type Status
- type StatusUpdater
- type System
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type Action ¶
type Action struct {
Command string
UnlockAfterDuration time.Duration
UnlockOnlyAfterHealthy bool
SendCommandOutput bool
// Notifiers list of enabled notifiers
Notifiers []string
// contains filtered or unexported fields
}
Action contains configuration for running an action
func (Action) Run ¶
func (a Action) Run(message string) (notification CheckNotification)
Run executes the action command
type ActionRunner ¶
type ActionRunner struct {
// contains filtered or unexported fields
}
ActionRunner keeps track of a checks actions
func NewActionRunner ¶
func NewActionRunner(checkName string, successAction, warningAction, failureAction, timeoutAction *Action, notifications *Notifications, eventTracker *EventTracker) *ActionRunner
NewActionRunner returns a new ActionRunner
func (*ActionRunner) Failure ¶
func (a *ActionRunner) Failure(message string, eventID string)
Failure handles a failed check result
func (*ActionRunner) GetState ¶ added in v6.0.1
func (a *ActionRunner) GetState() *ActionRunnerState
GetState returns a snapshot of the ActionRunner state for persistence.
func (*ActionRunner) RestoreState ¶ added in v6.0.1
func (a *ActionRunner) RestoreState(state *ActionRunnerState)
RestoreState restores the ActionRunner state from a persisted snapshot.
func (*ActionRunner) Success ¶
func (a *ActionRunner) Success(message string, eventID string)
Success handles a successful check result
func (*ActionRunner) Timeout ¶
func (a *ActionRunner) Timeout(message string, eventID string)
Timeout handles a timed out check result
func (*ActionRunner) Warning ¶
func (a *ActionRunner) Warning(message string, eventID string)
Warning handles a warning check result
type ActionRunnerState ¶ added in v6.0.1
type ActionRunnerState struct {
// Status is the current action runner status
Status Status `json:"status"`
// Per-action state
SuccessAction *ActionState `json:"success_action,omitempty"`
WarningAction *ActionState `json:"warning_action,omitempty"`
FailureAction *ActionState `json:"failure_action,omitempty"`
TimeoutAction *ActionState `json:"timeout_action,omitempty"`
}
ActionRunnerState represents the persistable state of an ActionRunner.
type ActionState ¶ added in v6.0.1
type ActionState struct {
// LastRun is when the action was last executed
LastRun time.Time `json:"last_run"`
// CanRun indicates whether the action is eligible to run
CanRun bool `json:"can_run"`
}
ActionState represents the persistable state of an Action.
type Check ¶
type Check struct {
// Status is the check.
Status Status `json:"check"`
// Timestamp is the time in which the check occurred.
Timestamp time.Time `json:"timestamp"`
// Failures holds the failed checks along with their messages.
Failures map[string]string `json:"failures,omitempty"`
// System holds information of the go process.
*System `json:"system,omitempty"`
// Component holds information on the component for which checks are made
Component `json:"component"`
}
Check represents the health check response.
type CheckConfig ¶
type CheckConfig struct {
// Name is the name of the resource to be checked.
Name string
// Interval is how often the health check should run
Interval time.Duration
// Timeout is the timeout defined for every check.
Timeout time.Duration
// SkipOnErr if set to true, it will retrieve StatusPassing providing the error message from the failed resource.
SkipOnErr bool
// Check is the func which executes the check.
Check CheckFunc
// Status
Status *StatusUpdater
// SuccessesBeforePassing number of passing checks before reporting as passing
SuccessesBeforePassing int
// FailuresBeforeWarning number of passing checks before reporting as warning
FailuresBeforeWarning int
// FailuresBeforeCritical number of passing checks before reporting as critical
FailuresBeforeCritical int
// SuccessAction configuration
SuccessAction *Action
// WarningAction configuration
WarningAction *Action
// FailureAction configuration
FailureAction *Action
// TimeoutAction configuration
TimeoutAction *Action
// Notifiers list of enabled notifiers
Notifiers []string
// contains filtered or unexported fields
}
CheckConfig carries the parameters to run the check.
type CheckFunc ¶
type CheckFunc func(ctx context.Context) CheckResponse
CheckFunc is the func which executes the check.
type CheckNotification ¶
type CheckNotification struct {
Name string
Message string
Attachment []byte
Tags []string
Notifiers []string
EventID string // Event ID for correlating related alerts during an incident
Sequence int // Sequence number within the event (1, 2, 3...)
}
CheckNotification represents a notification sent when check status changes.
type CheckResponse ¶
type CheckResponse struct {
// Error message
Error error
// IsWarning if set to true, it will retrieve StatusPassing providing the error message from the failed resource.
IsWarning bool
// NoNotification disables a notification for this response
NoNotification bool
}
CheckResponse is returned by a check function.
type CheckState ¶ added in v6.0.1
type CheckState struct {
// StatusUpdater state
Successes int `json:"successes"`
Failures int `json:"failures"`
PendingEventID string `json:"pending_event_id"`
// CheckStatus state
Status Status `json:"status"`
ErrorMsg string `json:"error_msg"`
// ActionRunner state
ActionRunnerState *ActionRunnerState `json:"action_runner_state,omitempty"`
// UpdatedAt is when this state was last updated
UpdatedAt time.Time `json:"updated_at"`
}
CheckState represents the persistable state of a health check.
type CheckStatus ¶
type CheckStatus struct {
// Status is the check.
Status Status
// Error informational message about the Status
Error error
// contains filtered or unexported fields
}
CheckStatus holds the current status of a check.
func (*CheckStatus) Get ¶
func (s *CheckStatus) Get() (status Status, err error)
Get check of running check
func (*CheckStatus) Update ¶
func (s *CheckStatus) Update(status Status, err error)
Update check of running check
type Component ¶
type Component struct {
// Name is the name of the component.
Name string `json:"name"`
// Version is the component version.
Version string `json:"version"`
}
Component descriptive values about the component for which checks are made
type EventTracker ¶
type EventTracker struct {
// contains filtered or unexported fields
}
EventTracker manages event IDs for health check incidents. An event starts when a check transitions from healthy to unhealthy, and ends when it returns to healthy.
Special handling for maintenance mode: - When maintenance check becomes unhealthy, a maintenance event_id is created - All checks that fail during maintenance use the maintenance event_id - When maintenance ends, checks that are still unhealthy keep the maintenance event_id - Only when each individual check becomes healthy does it clear its event_id
func NewEventTracker ¶
func NewEventTracker() *EventTracker
NewEventTracker creates a new event tracker
func (*EventTracker) ActiveEvents ¶
func (t *EventTracker) ActiveEvents() map[string]string
ActiveEvents returns a copy of all active events
func (*EventTracker) ClearSequence ¶
func (t *EventTracker) ClearSequence(eventID string)
ClearSequence removes sequence tracking for an event (call when event ends)
func (*EventTracker) GetEventID ¶
func (t *EventTracker) GetEventID(checkName string) string
GetEventID returns the current event ID for a check without modifying state. Returns empty string if no active event.
func (*EventTracker) GetMaintenanceEventID ¶
func (t *EventTracker) GetMaintenanceEventID() string
GetMaintenanceEventID returns the current maintenance event ID
func (*EventTracker) GetNextSequence ¶
func (t *EventTracker) GetNextSequence(eventID string) int
GetNextSequence returns the next sequence number for an event and increments counter. Returns 0 if eventID is empty.
func (*EventTracker) GetOrCreateEventID ¶
func (t *EventTracker) GetOrCreateEventID(checkName string, status Status) string
GetOrCreateEventID returns the active event ID for a check, creating a new one if none exists (for failure/warning notifications). For success notifications, this returns the event_id that was active (so downstream systems know which event ended) and clears it from the tracker.
Special maintenance behavior: - If this is the maintenance check becoming unhealthy, creates maintenance event_id - If maintenance is active and another check fails, uses maintenance event_id - If maintenance ends but a check is still unhealthy, keeps maintenance event_id
func (*EventTracker) GetState ¶ added in v6.0.1
func (t *EventTracker) GetState() *EventTrackerState
GetState returns a snapshot of the EventTracker state for persistence.
func (*EventTracker) IsMaintenanceActive ¶
func (t *EventTracker) IsMaintenanceActive() bool
IsMaintenanceActive returns whether maintenance mode is currently active
func (*EventTracker) RestoreState ¶ added in v6.0.1
func (t *EventTracker) RestoreState(state *EventTrackerState)
RestoreState restores the EventTracker state from a persisted snapshot. This should be called before any checks are registered.
type EventTrackerState ¶ added in v6.0.1
type EventTrackerState struct {
// EventIDs maps check names to their active event IDs
EventIDs map[string]string `json:"event_ids"`
// Sequences maps event IDs to their current sequence numbers
Sequences map[string]int `json:"sequences"`
// MaintenanceEventID is the current maintenance event ID (empty if not in maintenance)
MaintenanceEventID string `json:"maintenance_event_id"`
// MaintenanceActive indicates whether maintenance mode is currently active
MaintenanceActive bool `json:"maintenance_active"`
// MaintenanceChecks tracks checks that started failing during maintenance
MaintenanceChecks map[string]bool `json:"maintenance_checks"`
// UpdatedAt is when this state was last updated
UpdatedAt time.Time `json:"updated_at"`
}
EventTrackerState represents the persistable state of an EventTracker.
type Health ¶
type Health struct {
NotificationsSender *Notifications
EventTracker *EventTracker
// contains filtered or unexported fields
}
Health is the health-checks container
func (*Health) HandlerFunc ¶
func (h *Health) HandlerFunc(w http.ResponseWriter, r *http.Request)
HandlerFunc is the HTTP handler function.
func (*Health) NotificationsDisable ¶
NotificationsDisable disables notification
func (*Health) NotificationsEnable ¶
func (h *Health) NotificationsEnable()
NotificationsEnable enables notifications
func (*Health) NotificationsEnabled ¶
NotificationsEnabled enables notifications
func (*Health) Persister ¶ added in v6.0.1
func (h *Health) Persister() StatePersister
Persister returns the configured state persister. Returns nil if no persister is configured (using NoopPersister).
func (*Health) Register ¶
func (h *Health) Register(c CheckConfig) error
Register registers a check config to be performed.
func (*Health) SaveState ¶ added in v6.0.1
SaveState persists the current state of all checks and the event tracker. This can be called periodically or before shutdown to ensure state is saved. Errors are logged but not returned - persistence failures should not impact health checks.
func (*Health) Subscribe ¶
func (h *Health) Subscribe() (c <-chan CheckNotification)
Subscribe returns a channel for receiving health check notifications
type NoopPersister ¶ added in v6.0.1
type NoopPersister struct{}
NoopPersister is a no-op implementation of StatePersister. It performs no persistence and is the default when no persister is configured.
func NewNoopPersister ¶ added in v6.0.1
func NewNoopPersister() *NoopPersister
NewNoopPersister creates a new no-op persister.
func (*NoopPersister) Close ¶ added in v6.0.1
func (p *NoopPersister) Close() error
Close is a no-op.
func (*NoopPersister) DeleteCheckState ¶ added in v6.0.1
func (p *NoopPersister) DeleteCheckState(_ context.Context, _ string) error
DeleteCheckState is a no-op.
func (*NoopPersister) LoadAllCheckStates ¶ added in v6.0.1
func (p *NoopPersister) LoadAllCheckStates(_ context.Context) (map[string]*CheckState, error)
LoadAllCheckStates returns an empty map.
func (*NoopPersister) LoadCheckState ¶ added in v6.0.1
func (p *NoopPersister) LoadCheckState(_ context.Context, _ string) (*CheckState, error)
LoadCheckState returns nil (no persisted state).
func (*NoopPersister) LoadEventTrackerState ¶ added in v6.0.1
func (p *NoopPersister) LoadEventTrackerState(_ context.Context) (*EventTrackerState, error)
LoadEventTrackerState returns nil (no persisted state).
func (*NoopPersister) SaveCheckState ¶ added in v6.0.1
func (p *NoopPersister) SaveCheckState(_ context.Context, _ string, _ *CheckState) error
SaveCheckState is a no-op.
func (*NoopPersister) SaveEventTrackerState ¶ added in v6.0.1
func (p *NoopPersister) SaveEventTrackerState(_ context.Context, _ *EventTrackerState) error
SaveEventTrackerState is a no-op.
type Notifications ¶
type Notifications struct {
// contains filtered or unexported fields
}
Notifications manages publishing notifications
func NewNotificationSender ¶
func NewNotificationSender(channel chan CheckNotification) *Notifications
NewNotificationSender for sending notifications
func (*Notifications) Disable ¶
func (n *Notifications) Disable(disableDuration time.Duration)
Disable notifications
func (*Notifications) Enabled ¶
func (n *Notifications) Enabled() bool
Enabled check to see if notifications are enabled
func (*Notifications) Send ¶
func (n *Notifications) Send(notification CheckNotification)
Send notifications
type Option ¶
Option is the health-container options type
func WithChecks ¶
func WithChecks(checks ...CheckConfig) Option
WithChecks adds checks to newly instantiated health-container
func WithComponent ¶
WithComponent sets the component description of the component to which this check refer
func WithMaxConcurrent ¶
WithMaxConcurrent sets max number of concurrently running checks. Set to 1 if want to run all checks sequentially.
func WithStatePersister ¶ added in v6.0.1
func WithStatePersister(p StatePersister) Option
WithStatePersister sets the state persister for persisting health check state across process restarts. If not set, a no-op persister is used (no persistence).
func WithSystemInfo ¶
func WithSystemInfo() Option
WithSystemInfo enables the option to return system information about the go process.
func WithTracerProvider ¶
func WithTracerProvider(tp trace.TracerProvider, instrumentationName string) Option
WithTracerProvider sets trace provider for the checks and instrumentation name that will be used for tracer from trace provider.
type StatePersister ¶ added in v6.0.1
type StatePersister interface {
// SaveEventTrackerState persists the event tracker state.
SaveEventTrackerState(ctx context.Context, state *EventTrackerState) error
// LoadEventTrackerState loads the persisted event tracker state.
// Returns nil, nil if no state exists.
LoadEventTrackerState(ctx context.Context) (*EventTrackerState, error)
// SaveCheckState persists the state for a single check.
SaveCheckState(ctx context.Context, checkName string, state *CheckState) error
// LoadCheckState loads the persisted state for a single check.
// Returns nil, nil if no state exists for the check.
LoadCheckState(ctx context.Context, checkName string) (*CheckState, error)
// LoadAllCheckStates loads all persisted check states.
LoadAllCheckStates(ctx context.Context) (map[string]*CheckState, error)
// DeleteCheckState removes persisted state for a check.
DeleteCheckState(ctx context.Context, checkName string) error
// Close releases any resources held by the persister.
Close() error
}
StatePersister defines the interface for persisting health check state. Implementations should be safe for concurrent use.
type Status ¶
type Status string
Status type represents health check
const ( // StatusPassing healthcheck is passing StatusPassing Status = "passing" // StatusWarning healthcheck is failing but should not fail the component StatusWarning Status = "warning" // StatusCritical healthcheck is failing should fail the component StatusCritical Status = "critical" // StatusTimeout healthcheck timed out should fail the component StatusTimeout Status = "timeout" // StatusInitializing healthcheck is starting up and has not meet the passing threshold StatusInitializing Status = "initializing" // MinimumInterval is the minimum time between checks // to prevent fork bombing a system MinimumInterval = time.Second )
type StatusUpdater ¶
type StatusUpdater struct {
// contains filtered or unexported fields
}
StatusUpdater keeps track of a checks status
func NewStatusUpdater ¶
func NewStatusUpdater(successesBeforePassing, failuresBeforeWarning, failuresBeforeCritical int, actionRunner *ActionRunner, notifications *Notifications, notifiers []string, eventTracker *EventTracker) *StatusUpdater
NewStatusUpdater returns a new StatusUpdater that is in critical condition. It sends an "initializing" notification to indicate the check is starting up. Use NewStatusUpdaterSilent when restoring from persisted state to avoid spurious notifications.
func NewStatusUpdaterSilent ¶ added in v6.0.2
func NewStatusUpdaterSilent(successesBeforePassing, failuresBeforeWarning, failuresBeforeCritical int, actionRunner *ActionRunner, notifications *Notifications, notifiers []string, eventTracker *EventTracker) *StatusUpdater
NewStatusUpdaterSilent returns a new StatusUpdater without sending the "initializing" notification. This should be used when restoring state from persistence to avoid sending spurious notifications that would incorrectly resolve or duplicate existing alerts.
func (*StatusUpdater) GetState ¶ added in v6.0.1
func (s *StatusUpdater) GetState() *CheckState
GetState returns a snapshot of the StatusUpdater state for persistence.
func (*StatusUpdater) RestoreState ¶ added in v6.0.1
func (s *StatusUpdater) RestoreState(state *CheckState)
RestoreState restores the StatusUpdater state from a persisted snapshot.
type System ¶
type System struct {
// Version is the go version.
Version string `json:"version"`
// GoroutinesCount is the number of the current goroutines.
GoroutinesCount int `json:"goroutines_count"`
// TotalAllocBytes is the total bytes allocated.
TotalAllocBytes int `json:"total_alloc_bytes"`
// HeapObjectsCount is the number of objects in the go heap.
HeapObjectsCount int `json:"heap_objects_count"`
// TotalAllocBytes is the bytes allocated and not yet freed.
AllocBytes int `json:"alloc_bytes"`
}
System runtime variables about the go process.
Source Files
¶
Directories
¶
| Path | Synopsis |
|---|---|
|
checks
|
|
|
influxdb
Package influxdb implements a health check for InfluxDB instance.
|
Package influxdb implements a health check for InfluxDB instance. |
|
maintenance
Package maintenance implements a file-based maintenance mode check.
|
Package maintenance implements a file-based maintenance mode check. |
|
persister
|
|
|
sqlite
Package sqlite provides a SQLite-based implementation of the health.StatePersister interface.
|
Package sqlite provides a SQLite-based implementation of the health.StatePersister interface. |